diff --git a/.ci/premerge_advisor_explain.py b/.ci/premerge_advisor_explain.py index e1bc59f389b36..269f75cace266 100644 --- a/.ci/premerge_advisor_explain.py +++ b/.ci/premerge_advisor_explain.py @@ -39,6 +39,7 @@ def get_comment( ) -> dict[str, str]: repo = github.Github(github_token).get_repo("llvm/llvm-project") pr = repo.get_issue(pr_number).as_pull_request() + body = COMMENT_TAG.format(platform=platform.system()) + "\n" + body comment = {"body": body} comment_id = get_comment_id(platform.system(), pr) if comment_id: @@ -128,7 +129,7 @@ def main( ), ) ] - with open("comment", "w") as comment_file_handle: + with open("comments", "w") as comment_file_handle: json.dump(comments, comment_file_handle) else: print(advisor_response.reason) diff --git a/.github/workflows/libc-fullbuild-tests.yml b/.github/workflows/libc-fullbuild-tests.yml index 3a048aeb9405b..01fd895cce7e8 100644 --- a/.github/workflows/libc-fullbuild-tests.yml +++ b/.github/workflows/libc-fullbuild-tests.yml @@ -48,6 +48,42 @@ jobs: cpp_compiler: clang++-22 target: x86_64-unknown-uefi-llvm include_scudo: OFF + - os: ubuntu-24.04 + build_type: MinSizeRel + c_compiler: clang-22 + cpp_compiler: clang++-22 + target: armv6m-none-eabi + include_scudo: OFF + - os: ubuntu-24.04 + build_type: MinSizeRel + c_compiler: clang-22 + cpp_compiler: clang++-22 + target: armv7m-none-eabi + include_scudo: OFF + - os: ubuntu-24.04 + build_type: MinSizeRel + c_compiler: clang-22 + cpp_compiler: clang++-22 + target: armv7em-none-eabi + include_scudo: OFF + - os: ubuntu-24.04 + build_type: MinSizeRel + c_compiler: clang-22 + cpp_compiler: clang++-22 + target: armv8m.main-none-eabi + include_scudo: OFF + - os: ubuntu-24.04 + build_type: MinSizeRel + c_compiler: clang-22 + cpp_compiler: clang++-22 + target: armv8.1m.main-none-eabi + include_scudo: OFF + - os: ubuntu-24.04 + build_type: MinSizeRel + c_compiler: clang-22 + cpp_compiler: clang++-22 + target: riscv32-unknown-elf + include_scudo: OFF # TODO: add back gcc build when it is fixed # - c_compiler: gcc # cpp_compiler: g++ @@ -93,28 +129,39 @@ jobs: run: | export RUNTIMES="libc" + export CMAKE_FLAGS=" + -G Ninja + -S ${{ github.workspace }}/runtimes + -B ${{ steps.strings.outputs.build-output-dir }} + -DCMAKE_ASM_COMPILER=${{ matrix.c_compiler }} + -DCMAKE_C_COMPILER=${{ matrix.c_compiler }} + -DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }} + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + -DCMAKE_C_COMPILER_LAUNCHER=sccache + -DCMAKE_CXX_COMPILER_LAUNCHER=sccache + -DCMAKE_INSTALL_PREFIX=${{ steps.strings.outputs.build-install-dir }}" + if [[ ${{ matrix.include_scudo}} == "ON" ]]; then export RUNTIMES="$RUNTIMES;compiler-rt" - export CMAKE_FLAGS=" + export CMAKE_FLAGS="$CMAKE_FLAGS -DLLVM_LIBC_INCLUDE_SCUDO=ON -DCOMPILER_RT_BUILD_SCUDO_STANDALONE_WITH_LLVM_LIBC=ON -DCOMPILER_RT_BUILD_GWP_ASAN=OFF -DCOMPILER_RT_SCUDO_STANDALONE_BUILD_SHARED=OFF" fi - cmake -B ${{ steps.strings.outputs.build-output-dir }} \ - -DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }} \ - -DCMAKE_C_COMPILER=${{ matrix.c_compiler }} \ - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ - -DCMAKE_C_COMPILER_LAUNCHER=sccache \ - -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \ - -DCMAKE_INSTALL_PREFIX=${{ steps.strings.outputs.build-install-dir }} \ - -DLLVM_RUNTIME_TARGETS=${{ matrix.target }} \ - -DLLVM_ENABLE_RUNTIMES="$RUNTIMES" \ - -DLLVM_LIBC_FULL_BUILD=ON \ - -G Ninja \ - -S ${{ github.workspace }}/runtimes \ - $CMAKE_FLAGS + case "${{ matrix.target }}" in + *-none-eabi|riscv32-unknown-elf) + cmake $CMAKE_FLAGS \ + -C ${{ github.workspace }}/libc/cmake/caches/${{ matrix.target }}.cmake + ;; + *) + cmake -DLLVM_RUNTIME_TARGETS=${{ matrix.target }} \ + -DLLVM_ENABLE_RUNTIMES="$RUNTIMES" \ + -DLLVM_LIBC_FULL_BUILD=ON \ + $CMAKE_FLAGS + ;; + esac - name: Build run: > @@ -124,8 +171,12 @@ jobs: --target install - name: Test - # Skip UEFI tests until we have testing set up. - if: ${{ ! endsWith(matrix.target, '-uefi-llvm') }} + # Skip UEFI and baremetal tests until we have testing set up. + if: ${{ + !endsWith(matrix.target, '-uefi-llvm') && + !endsWith(matrix.target, '-none-eabi') && + matrix.target != 'riscv32-unknown-elf' + }} run: > cmake --build ${{ steps.strings.outputs.build-output-dir }} diff --git a/.github/workflows/premerge.yaml b/.github/workflows/premerge.yaml index 02a6f3b868d85..daf88b5b22125 100644 --- a/.github/workflows/premerge.yaml +++ b/.github/workflows/premerge.yaml @@ -119,6 +119,14 @@ jobs: path: artifacts/ retention-days: 5 include-hidden-files: 'true' + - name: Upload Comment + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + if: ${{ always() && !startsWith(matrix.runs-on, 'depot-ubuntu-24.04-arm') }} + continue-on-error: true + with: + name: workflow-args + path: | + comments premerge-checks-windows: name: Build and Test Windows diff --git a/.gitignore b/.gitignore index 860b8ea12abd4..a9d616286adf1 100644 --- a/.gitignore +++ b/.gitignore @@ -54,9 +54,9 @@ autoconf/autom4te.cache /cmake-build* # Coding assistants' stuff /CLAUDE.md -/.claude/ +.claude/ /GEMINI.md -/.gemini/ +.gemini/ #==============================================================================# # Directories to ignore (do not add trailing '/'s, they skip symlinks). diff --git a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td index 1e0fb038b19d8..47ff9389e8028 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td +++ b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td @@ -1026,4 +1026,29 @@ def CIR_UnwindAttr : CIR_UnitAttr<"Unwind", "unwind"> { let storageType = [{ CatchUnwind }]; } +//===----------------------------------------------------------------------===// +// CIR_BlockAddrInfoAttr +//===----------------------------------------------------------------------===// + +def CIR_BlockAddrInfoAttr : CIR_Attr<"BlockAddrInfo", "block_addr_info"> { + let summary = "Block Addres attribute"; + let description = [{ + This attribute is used to represent the address of a basic block + within a function. It combines the symbol reference to a function + with the name of a label inside that function. + }]; + let parameters = (ins "mlir::FlatSymbolRefAttr":$func, + "mlir::StringAttr":$label); + + let assemblyFormat = "`<` $func `,` $label `>`"; + let builders = [ + AttrBuilder<(ins "llvm::StringRef":$func_name, + "llvm::StringRef":$label_name + ), [{ + return $_get($_ctxt, mlir::FlatSymbolRefAttr::get($_ctxt, func_name), + mlir::StringAttr::get($_ctxt, label_name)); + }]> + ]; +} + #endif // CLANG_CIR_DIALECT_IR_CIRATTRS_TD diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 2124b1dc62a81..e612d6a0ba886 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -3386,6 +3386,10 @@ def CIR_BaseClassAddrOp : CIR_Op<"base_class_addr"> { cannot be known by the operation, and that information affects how the operation is lowered. + The validity of the relationship of derived and base cannot yet be verified. + If the target class is not a valid base class for the object, the behavior + is undefined. + Example: ```c++ struct Base { }; @@ -3399,8 +3403,6 @@ def CIR_BaseClassAddrOp : CIR_Op<"base_class_addr"> { ``` }]; - // The validity of the relationship of derived and base cannot yet be - // verified, currently not worth adding a verifier. let arguments = (ins Arg:$derived_addr, IndexAttr:$offset, UnitAttr:$assume_not_null); @@ -3414,6 +3416,56 @@ def CIR_BaseClassAddrOp : CIR_Op<"base_class_addr"> { }]; } +//===----------------------------------------------------------------------===// +// DerivedClassAddrOp +//===----------------------------------------------------------------------===// + +def CIR_DerivedClassAddrOp : CIR_Op<"derived_class_addr"> { + let summary = "Get the derived class address for a class/struct"; + let description = [{ + The `cir.derived_class_addr` operaration gets the address of a particular + derived class given a non-virtual base class pointer. The offset in bytes + of the base class must be passed in, similar to `cir.base_class_addr`, but + going into the other direction. This means lowering to a negative offset. + + The operation contains a flag for whether or not the operand may be nullptr. + That depends on the context and cannot be known by the operation, and that + information affects how the operation is lowered. + + The validity of the relationship of derived and base cannot yet be verified. + If the target class is not a valid derived class for the object, the + behavior is undefined. + + Example: + ```c++ + class A {}; + class B : public A {}; + + B *getAsB(A *a) { + return static_cast(a); + } + ``` + + leads to + ```mlir + %2 = cir.load %0 : !cir.ptr>, !cir.ptr + %3 = cir.base_class_addr %2 : !cir.ptr [0] -> !cir.ptr + ``` + }]; + + let arguments = (ins + Arg:$base_addr, + IndexAttr:$offset, UnitAttr:$assume_not_null); + + let results = (outs Res:$derived_addr); + + let assemblyFormat = [{ + $base_addr `:` qualified(type($base_addr)) + (`nonnull` $assume_not_null^)? + ` ` `[` $offset `]` `->` qualified(type($derived_addr)) attr-dict + }]; +} + //===----------------------------------------------------------------------===// // ComplexCreateOp //===----------------------------------------------------------------------===// @@ -4845,4 +4897,38 @@ def CIR_AtomicClearOp : CIR_Op<"atomic.clear"> { }]; } +//===----------------------------------------------------------------------===// +// BlockAddressOp +//===----------------------------------------------------------------------===// + +def CIR_BlockAddressOp : CIR_Op<"block_address", [Pure]> { + let summary = "Get the address of a cir.label within a function"; + let description = [{ + The `cir.blockaddress` operation takes a function name and a label and + produces a pointer value that represents the address of that cir.label + within the specified function. + + This operation models GCC's "labels as values" extension (`&&label`), which + allows taking the address of a local label and using it as a computed + jump target (e.g., with `goto *addr;`). + + Example: + ```mlir + %1 = cir.alloca !cir.ptr, !cir.ptr>, ["ptr", init] + {alignment = 8 : i64} + %addr = cir.block_address <@c, "label1"> : !cir.ptr + cir.store align(8) %addr, %1 : !cir.ptr, !cir.ptr> + cir.br ^bb1 + ^bb1: + cir.label "label" + ``` + }]; + + let arguments = (ins CIR_BlockAddrInfoAttr:$block_addr_info); + let results = (outs CIR_VoidPtrType:$addr); + let assemblyFormat = [{ + $block_addr_info `:` qualified(type($addr)) attr-dict + }]; +} + #endif // CLANG_CIR_DIALECT_IR_CIROPS_TD diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index f5a368636c43d..da155d31d4a88 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -781,9 +781,25 @@ struct LibcFunNamePrefixSuffixParser { } }; +// Constant fold a conditional expression 'cond ? A : B' to +// - 'A', if 'cond' has constant true value; +// - 'B', if 'cond' has constant false value. +static const Expr *tryConstantFoldConditionalExpr(const Expr *E, + const ASTContext &Ctx) { + // FIXME: more places can use this function + if (const auto *CE = dyn_cast(E)) { + bool CondEval; + + if (CE->getCond()->EvaluateAsBooleanCondition(CondEval, Ctx)) + return CondEval ? CE->getLHS() : CE->getRHS(); + } + return E; +} + // A pointer type expression is known to be null-terminated, if it has the // form: E.c_str(), for any expression E of `std::string` type. -static bool isNullTermPointer(const Expr *Ptr) { +static bool isNullTermPointer(const Expr *Ptr, ASTContext &Ctx) { + Ptr = tryConstantFoldConditionalExpr(Ptr, Ctx); if (isa(Ptr->IgnoreParenImpCasts())) return true; if (isa(Ptr->IgnoreParenImpCasts())) @@ -874,7 +890,7 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg, const Expr *Arg = Call->getArg(ArgIdx); - if (isNullTermPointer(Arg)) + if (isNullTermPointer(Arg, Ctx)) // If Arg is a null-terminated pointer, it is safe anyway. return true; // continue parsing @@ -922,8 +938,8 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg, // (including the format argument) is unsafe pointer. return llvm::any_of( llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()), - [&UnsafeArg](const Expr *Arg) -> bool { - if (Arg->getType()->isPointerType() && !isNullTermPointer(Arg)) { + [&UnsafeArg, &Ctx](const Expr *Arg) -> bool { + if (Arg->getType()->isPointerType() && !isNullTermPointer(Arg, Ctx)) { UnsafeArg = Arg; return true; } @@ -1175,7 +1191,7 @@ static bool hasUnsafePrintfStringArg(const CallExpr &Node, ASTContext &Ctx, // We don't really recognize this "normal" printf, the only thing we // can do is to require all pointers to be null-terminated: for (const auto *Arg : Node.arguments()) - if (Arg->getType()->isPointerType() && !isNullTermPointer(Arg)) { + if (Arg->getType()->isPointerType() && !isNullTermPointer(Arg, Ctx)) { Result.addNode(Tag, DynTypedNode::create(*Arg)); return true; } diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h index a391d7e70ace7..5ab1d0e05cf8a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h +++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h @@ -405,6 +405,19 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { return Address(baseAddr, destType, addr.getAlignment()); } + Address createDerivedClassAddr(mlir::Location loc, Address addr, + mlir::Type destType, unsigned offset, + bool assumeNotNull) { + if (destType == addr.getElementType()) + return addr; + + cir::PointerType ptrTy = getPointerTo(destType); + auto derivedAddr = + cir::DerivedClassAddrOp::create(*this, loc, ptrTy, addr.getPointer(), + mlir::APInt(64, offset), assumeNotNull); + return Address(derivedAddr, destType, addr.getAlignment()); + } + mlir::Value createVTTAddrPoint(mlir::Location loc, mlir::Type retTy, mlir::Value addr, uint64_t offset) { return cir::VTTAddrPointOp::create(*this, loc, retTy, diff --git a/clang/lib/CIR/CodeGen/CIRGenClass.cpp b/clang/lib/CIR/CodeGen/CIRGenClass.cpp index a8296782ebc40..89c4696b9da94 100644 --- a/clang/lib/CIR/CodeGen/CIRGenClass.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenClass.cpp @@ -1110,6 +1110,25 @@ mlir::Value CIRGenFunction::getVTTParameter(GlobalDecl gd, bool forVirtualBase, } } +Address CIRGenFunction::getAddressOfDerivedClass( + mlir::Location loc, Address baseAddr, const CXXRecordDecl *derived, + llvm::iterator_range path, + bool nullCheckValue) { + assert(!path.empty() && "Base path should not be empty!"); + + QualType derivedTy = getContext().getCanonicalTagType(derived); + mlir::Type derivedValueTy = convertType(derivedTy); + CharUnits nonVirtualOffset = + cgm.computeNonVirtualBaseClassOffset(derived, path); + + // Note that in OG, no offset (nonVirtualOffset.getQuantity() == 0) means it + // just gives the address back. In CIR a `cir.derived_class` is created and + // made into a nop later on during lowering. + return builder.createDerivedClassAddr(loc, baseAddr, derivedValueTy, + nonVirtualOffset.getQuantity(), + /*assumeNotNull=*/!nullCheckValue); +} + Address CIRGenFunction::getAddressOfBaseClass( Address value, const CXXRecordDecl *derived, llvm::iterator_range path, diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index d35bb0af0de14..8607558c1cf7d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -1301,7 +1301,6 @@ LValue CIRGenFunction::emitCastLValue(const CastExpr *e) { case CK_NonAtomicToAtomic: case CK_AtomicToNonAtomic: case CK_ToUnion: - case CK_BaseToDerived: case CK_ObjCObjectLValueCast: case CK_VectorSplat: case CK_ConstructorConversion: @@ -1336,6 +1335,7 @@ LValue CIRGenFunction::emitCastLValue(const CastExpr *e) { lv.getAddress().getAlignment()), e->getType(), lv.getBaseInfo()); } + case CK_LValueBitCast: { // This must be a reinterpret_cast (or c-style equivalent). const auto *ce = cast(e); @@ -1387,6 +1387,22 @@ LValue CIRGenFunction::emitCastLValue(const CastExpr *e) { return makeAddrLValue(baseAddr, e->getType(), lv.getBaseInfo()); } + case CK_BaseToDerived: { + const auto *derivedClassDecl = e->getType()->castAsCXXRecordDecl(); + LValue lv = emitLValue(e->getSubExpr()); + + // Perform the base-to-derived conversion + Address derived = getAddressOfDerivedClass( + getLoc(e->getSourceRange()), lv.getAddress(), derivedClassDecl, + e->path(), /*NullCheckValue=*/false); + // C++11 [expr.static.cast]p2: Behavior is undefined if a downcast is + // performed and the object is not of the derived type. + assert(!cir::MissingFeatures::sanitizers()); + + assert(!cir::MissingFeatures::opTBAA()); + return makeAddrLValue(derived, e->getType(), lv.getBaseInfo()); + } + case CK_ZeroToOCLOpaqueType: llvm_unreachable("NULL to OpenCL opaque type lvalue cast is not valid"); } @@ -1782,11 +1798,7 @@ CIRGenCallee CIRGenFunction::emitDirectCallee(const GlobalDecl &gd) { const auto *fd = cast(gd.getDecl()); if (unsigned builtinID = fd->getBuiltinID()) { - if (fd->getAttr()) { - cgm.errorNYI("AsmLabelAttr"); - } - - StringRef ident = fd->getName(); + StringRef ident = cgm.getMangledName(gd); std::string fdInlineName = (ident + ".inline").str(); bool isPredefinedLibFunction = diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index ce95607bd468d..f777562ba6309 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -168,6 +168,15 @@ class ScalarExprEmitter : public StmtVisitor { return emitLoadOfLValue(e); } + mlir::Value VisitAddrLabelExpr(const AddrLabelExpr *e) { + auto func = cast(cgf.curFn); + auto blockInfoAttr = cir::BlockAddrInfoAttr::get( + &cgf.getMLIRContext(), func.getSymName(), e->getLabel()->getName()); + return cir::BlockAddressOp::create(builder, cgf.getLoc(e->getSourceRange()), + cgf.convertType(e->getType()), + blockInfoAttr); + } + mlir::Value VisitIntegerLiteral(const IntegerLiteral *e) { mlir::Type type = cgf.convertType(e->getType()); return cir::ConstantOp::create(builder, cgf.getLoc(e->getExprLoc()), @@ -1972,6 +1981,20 @@ mlir::Value ScalarExprEmitter::VisitCastExpr(CastExpr *ce) { return builder.createIntToPtr(middleVal, destCIRTy); } + case CK_BaseToDerived: { + const CXXRecordDecl *derivedClassDecl = destTy->getPointeeCXXRecordDecl(); + assert(derivedClassDecl && "BaseToDerived arg isn't a C++ object pointer!"); + Address base = cgf.emitPointerWithAlignment(subExpr); + Address derived = cgf.getAddressOfDerivedClass( + cgf.getLoc(ce->getSourceRange()), base, derivedClassDecl, ce->path(), + cgf.shouldNullCheckClassCastValue(ce)); + + // C++11 [expr.static.cast]p11: Behavior is undefined if a downcast is + // performed and the object is not of the derived type. + assert(!cir::MissingFeatures::sanitizers()); + + return cgf.getAsNaturalPointerTo(derived, ce->getType()->getPointeeType()); + } case CK_UncheckedDerivedToBase: case CK_DerivedToBase: { // The EmitPointerWithAlignment path does this fine; just discard @@ -1979,7 +2002,6 @@ mlir::Value ScalarExprEmitter::VisitCastExpr(CastExpr *ce) { return cgf.getAsNaturalPointerTo(cgf.emitPointerWithAlignment(ce), ce->getType()->getPointeeType()); } - case CK_Dynamic: { Address v = cgf.emitPointerWithAlignment(subExpr); const auto *dce = cast(ce); diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 2dddf26981105..b22bf2d87fc10 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -823,6 +823,11 @@ class CIRGenFunction : public CIRGenTypeCache { llvm::iterator_range path, bool nullCheckValue, SourceLocation loc); + Address getAddressOfDerivedClass( + mlir::Location loc, Address baseAddr, const CXXRecordDecl *derived, + llvm::iterator_range path, + bool nullCheckValue); + /// Return the VTT parameter that should be passed to a base /// constructor/destructor with virtual bases. /// FIXME: VTTs are Itanium ABI-specific, so the definition should move diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index 9ac5efe0e41c7..22aada882defc 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -1912,22 +1912,45 @@ mlir::LogicalResult cir::FuncOp::verify() { llvm::SmallSet labels; llvm::SmallSet gotos; - + llvm::SmallSet blockAddresses; + bool invalidBlockAddress = false; getOperation()->walk([&](mlir::Operation *op) { if (auto lab = dyn_cast(op)) { labels.insert(lab.getLabel()); } else if (auto goTo = dyn_cast(op)) { gotos.insert(goTo.getLabel()); + } else if (auto blkAdd = dyn_cast(op)) { + if (blkAdd.getBlockAddrInfoAttr().getFunc().getAttr() != getSymName()) { + // Stop the walk early, no need to continue + invalidBlockAddress = true; + return mlir::WalkResult::interrupt(); + } + blockAddresses.insert(blkAdd.getBlockAddrInfoAttr().getLabel()); } + return mlir::WalkResult::advance(); }); + if (invalidBlockAddress) + return emitOpError() << "blockaddress references a different function"; + + llvm::SmallSet mismatched; if (!labels.empty() || !gotos.empty()) { - llvm::SmallSet mismatched = - llvm::set_difference(gotos, labels); + mismatched = llvm::set_difference(gotos, labels); if (!mismatched.empty()) return emitOpError() << "goto/label mismatch"; } + + mismatched.clear(); + + if (!labels.empty() || !blockAddresses.empty()) { + mismatched = llvm::set_difference(blockAddresses, labels); + + if (!mismatched.empty()) + return emitOpError() + << "expects an existing label target in the referenced function"; + } + return success(); } diff --git a/clang/lib/CIR/Dialect/Transforms/GotoSolver.cpp b/clang/lib/CIR/Dialect/Transforms/GotoSolver.cpp index 00972b6976295..d590ccce1f540 100644 --- a/clang/lib/CIR/Dialect/Transforms/GotoSolver.cpp +++ b/clang/lib/CIR/Dialect/Transforms/GotoSolver.cpp @@ -8,6 +8,7 @@ #include "PassDetail.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/Dialect/Passes.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/Support/TimeProfiler.h" #include @@ -30,17 +31,29 @@ static void process(cir::FuncOp func) { mlir::OpBuilder rewriter(func.getContext()); llvm::StringMap labels; llvm::SmallVector gotos; + llvm::SmallSet blockAddrLabel; func.getBody().walk([&](mlir::Operation *op) { if (auto lab = dyn_cast(op)) { - // Will construct a string copy inplace. Safely erase the label labels.try_emplace(lab.getLabel(), lab->getBlock()); - lab.erase(); } else if (auto goTo = dyn_cast(op)) { gotos.push_back(goTo); + } else if (auto blockAddr = dyn_cast(op)) { + blockAddrLabel.insert(blockAddr.getBlockAddrInfo().getLabel()); } }); + for (auto &lab : labels) { + StringRef labelName = lab.getKey(); + Block *block = lab.getValue(); + if (!blockAddrLabel.contains(labelName)) { + // erase the LabelOp inside the block if safe + if (auto lab = dyn_cast(&block->front())) { + lab.erase(); + } + } + } + for (auto goTo : gotos) { mlir::OpBuilder::InsertionGuard guard(rewriter); rewriter.setInsertionPoint(goTo); diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index d88a4ad76f27b..d43a462a25092 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -1360,6 +1360,41 @@ mlir::LogicalResult CIRToLLVMBaseClassAddrOpLowering::matchAndRewrite( return mlir::success(); } +mlir::LogicalResult CIRToLLVMDerivedClassAddrOpLowering::matchAndRewrite( + cir::DerivedClassAddrOp derivedClassOp, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + const mlir::Type resultType = + getTypeConverter()->convertType(derivedClassOp.getType()); + mlir::Value baseAddr = adaptor.getBaseAddr(); + // The offset is set in the operation as an unsigned value, but it must be + // applied as a negative offset. + int64_t offsetVal = -(adaptor.getOffset().getZExtValue()); + if (offsetVal == 0) { + // If the offset is zero, we can just return the base address, + rewriter.replaceOp(derivedClassOp, baseAddr); + return mlir::success(); + } + llvm::SmallVector offset = {offsetVal}; + mlir::Type byteType = mlir::IntegerType::get(resultType.getContext(), 8, + mlir::IntegerType::Signless); + if (derivedClassOp.getAssumeNotNull()) { + rewriter.replaceOpWithNewOp( + derivedClassOp, resultType, byteType, baseAddr, offset, + mlir::LLVM::GEPNoWrapFlags::inbounds); + } else { + mlir::Location loc = derivedClassOp.getLoc(); + mlir::Value isNull = mlir::LLVM::ICmpOp::create( + rewriter, loc, mlir::LLVM::ICmpPredicate::eq, baseAddr, + mlir::LLVM::ZeroOp::create(rewriter, loc, baseAddr.getType())); + mlir::Value adjusted = + mlir::LLVM::GEPOp::create(rewriter, loc, resultType, byteType, baseAddr, + offset, mlir::LLVM::GEPNoWrapFlags::inbounds); + rewriter.replaceOpWithNewOp(derivedClassOp, isNull, + baseAddr, adjusted); + } + return mlir::success(); +} + mlir::LogicalResult CIRToLLVMATanOpLowering::matchAndRewrite( cir::ATanOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { @@ -3802,6 +3837,12 @@ mlir::LogicalResult CIRToLLVMVAArgOpLowering::matchAndRewrite( return mlir::success(); } +mlir::LogicalResult CIRToLLVMBlockAddressOpLowering::matchAndRewrite( + cir::BlockAddressOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + return mlir::failure(); +} + std::unique_ptr createConvertCIRToLLVMPass() { return std::make_unique(); } diff --git a/clang/lib/Headers/__clang_hip_libdevice_declares.h b/clang/lib/Headers/__clang_hip_libdevice_declares.h index fa8d918248dd0..fad9c6ca7ffc5 100644 --- a/clang/lib/Headers/__clang_hip_libdevice_declares.h +++ b/clang/lib/Headers/__clang_hip_libdevice_declares.h @@ -338,6 +338,23 @@ __device__ __attribute__((const)) __2f16 __ocml_sqrt_2f16(__2f16); __device__ __attribute__((const)) __2f16 __ocml_trunc_2f16(__2f16); __device__ __attribute__((const)) __2f16 __ocml_pown_2f16(__2f16, __2i16); +__device__ void __asan_poison_memory_region(const void *addr, + __SIZE_TYPE__ size); +__device__ void __asan_unpoison_memory_region(const void *addr, + __SIZE_TYPE__ size); +__device__ int __asan_address_is_poisoned(const void *addr); +__device__ void *__asan_region_is_poisoned(void *beg, __SIZE_TYPE__ size); + +#if __has_feature(address_sanitizer) +#define ASAN_POISON_MEMORY_REGION(addr, size) \ + __asan_poison_memory_region((addr), (size)) +#define ASAN_UNPOISON_MEMORY_REGION(addr, size) \ + __asan_unpoison_memory_region((addr), (size)) +#else +#define ASAN_POISON_MEMORY_REGION(addr, size) ((void)(addr), (void)(size)) +#define ASAN_UNPOISON_MEMORY_REGION(addr, size) ((void)(addr), (void)(size)) +#endif + #ifdef __cplusplus } // extern "C" #endif diff --git a/clang/test/CIR/CodeGen/asm-label-inline-builtins.c b/clang/test/CIR/CodeGen/asm-label-inline-builtins.c new file mode 100644 index 0000000000000..24c9a32e7c41d --- /dev/null +++ b/clang/test/CIR/CodeGen/asm-label-inline-builtins.c @@ -0,0 +1,58 @@ +// RUN: %clang_cc1 -triple x86_64 -fclangir -emit-cir -disable-llvm-passes -o %t-cir.cir %s +// RUN: FileCheck --input-file=%t-cir.cir %s --check-prefix=CIR +// RUN: %clang_cc1 -triple x86_64 -fclangir -emit-llvm -disable-llvm-passes -o %t-cir.ll %s +// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM +// RUN: %clang_cc1 -triple x86_64 -emit-llvm -disable-llvm-passes -o %t.ll %s +// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG + + +// Verifies that clang-generated *.inline carry the same name at call and callee +// site, in spite of asm labels. + +typedef struct _IO_FILE FILE; +extern FILE *stdout; +extern int vprintf (const char *__restrict __format, __builtin_va_list __arg); +extern int __vfprintf_chk (FILE *__restrict __stream, int __flag, + const char *__restrict __format, __builtin_va_list __ap); +extern int __vprintf_chk (int __flag, const char *__restrict __format, + __builtin_va_list __ap); + +extern __typeof (vprintf) vprintf __asm ("__vprintfieee128"); +extern __typeof (__vfprintf_chk) __vfprintf_chk __asm ("__vfprintf_chkieee128"); +extern __typeof (__vprintf_chk) __vprintf_chk __asm ("__vprintf_chkieee128"); + +extern __inline __attribute__ ((__always_inline__)) __attribute__ ((__gnu_inline__)) __attribute__ ((__artificial__)) int +vprintf (const char *__restrict __fmt, __builtin_va_list __ap) +{ + return __vfprintf_chk (stdout, 2 - 1, __fmt, __ap); +} + +void test(const char *fmt, __builtin_va_list ap) { + vprintf(fmt, ap); +} + +// CIR: cir.func internal private @__vprintfieee128.inline({{.*}}) -> !s32i inline(always) +// CIR: cir.call @__vfprintf_chkieee128(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) +// +// CIR: cir.func {{.*}} @test({{.*}}) +// CIR: cir.call @__vprintfieee128.inline(%{{.*}}, %{{.*}}) + + +// LLVM: define internal i32 @__vprintfieee128.inline({{.*}}) #[[ALWAYS_INLINE_ATTR:.*]] { +// LLVM: call i32 @__vfprintf_chkieee128(ptr %{{.*}}, i32 1, ptr %{{.*}}, ptr %{{.*}}) +// +// LLVM: define {{.*}} void @test{{.*}} +// LLVM: call i32 @__vprintfieee128.inline(ptr %{{.*}}, ptr %{{.*}}) +// +// LLVM: attributes #[[ALWAYS_INLINE_ATTR]] = { alwaysinline } + +// Note: OGCG emits these in the opposite order, but the content is the same. + + +// OGCG: define {{.*}} void @test{{.*}} +// OGCG: call i32 @__vprintfieee128.inline(ptr noundef %{{.*}}, ptr noundef %{{.*}}) +// +// OGCG: define internal i32 @__vprintfieee128.inline({{.*}}) #[[ALWAYS_INLINE_ATTR:.*]] { +// OGCG: call i32 @__vfprintf_chkieee128(ptr noundef %{{.*}}, i32 noundef 1, ptr noundef %{{.*}}, ptr noundef %{{.*}}) +// +// OGCG: attributes #[[ALWAYS_INLINE_ATTR]] = { alwaysinline {{.*}} } diff --git a/clang/test/CIR/CodeGen/base-to-derived.cpp b/clang/test/CIR/CodeGen/base-to-derived.cpp new file mode 100644 index 0000000000000..af9aa0ffd19c1 --- /dev/null +++ b/clang/test/CIR/CodeGen/base-to-derived.cpp @@ -0,0 +1,97 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s +// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +class A { + int a; +}; + +class B { + int b; +public: + A *getAsA(); +}; + +class X : public A, public B { + int x; +}; + +X *castAtoX(A *a) { + return static_cast(a); +} + +// CIR: cir.func {{.*}} @_Z8castAtoXP1A(%[[ARG0:.*]]: !cir.ptr {{.*}}) +// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["a", init] +// CIR: cir.store %[[ARG0]], %[[A_ADDR]] : !cir.ptr, !cir.ptr> +// CIR: %[[A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr>, !cir.ptr +// CIR: %[[X:.*]] = cir.derived_class_addr %[[A]] : !cir.ptr [0] -> !cir.ptr + +// Note: Because the offset is 0, a null check is not needed. + +// LLVM: define {{.*}} ptr @_Z8castAtoXP1A(ptr %[[ARG0:.*]]) +// LLVM: %[[A_ADDR:.*]] = alloca ptr +// LLVM: store ptr %[[ARG0]], ptr %[[A_ADDR]] +// LLVM: %[[X:.*]] = load ptr, ptr %[[A_ADDR]] + +// OGCG: define {{.*}} ptr @_Z8castAtoXP1A(ptr {{.*}} %[[ARG0:.*]]) +// OGCG: %[[A_ADDR:.*]] = alloca ptr +// OGCG: store ptr %[[ARG0]], ptr %[[A_ADDR]] +// OGCG: %[[X:.*]] = load ptr, ptr %[[A_ADDR]] + +X *castBtoX(B *b) { + return static_cast(b); +} + +// CIR: cir.func {{.*}} @_Z8castBtoXP1B(%[[ARG0:.*]]: !cir.ptr {{.*}}) +// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["b", init] +// CIR: cir.store %[[ARG0]], %[[B_ADDR]] : !cir.ptr, !cir.ptr> +// CIR: %[[B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr>, !cir.ptr +// CIR: %[[X:.*]] = cir.derived_class_addr %[[B]] : !cir.ptr [4] -> !cir.ptr + +// LLVM: define {{.*}} ptr @_Z8castBtoXP1B(ptr %[[ARG0:.*]]) +// LLVM: %[[B_ADDR:.*]] = alloca ptr, i64 1, align 8 +// LLVM: store ptr %[[ARG0]], ptr %[[B_ADDR]], align 8 +// LLVM: %[[B:.*]] = load ptr, ptr %[[B_ADDR]], align 8 +// LLVM: %[[IS_NULL:.*]] = icmp eq ptr %[[B]], null +// LLVM: %[[B_NON_NULL:.*]] = getelementptr inbounds i8, ptr %[[B]], i32 -4 +// LLVM: %[[X:.*]] = select i1 %[[IS_NULL]], ptr %[[B]], ptr %[[B_NON_NULL]] + +// OGCG: define {{.*}} ptr @_Z8castBtoXP1B(ptr {{.*}} %[[ARG0:.*]]) +// OGCG: entry: +// OGCG: %[[B_ADDR:.*]] = alloca ptr +// OGCG: store ptr %[[ARG0]], ptr %[[B_ADDR]] +// OGCG: %[[B:.*]] = load ptr, ptr %[[B_ADDR]] +// OGCG: %[[IS_NULL:.*]] = icmp eq ptr %[[B]], null +// OGCG: br i1 %[[IS_NULL]], label %[[LABEL_NULL:.*]], label %[[LABEL_NOTNULL:.*]] +// OGCG: [[LABEL_NOTNULL]]: +// OGCG: %[[B_NON_NULL:.*]] = getelementptr inbounds i8, ptr %[[B]], i64 -4 +// OGCG: br label %[[LABEL_END:.*]] +// OGCG: [[LABEL_NULL]]: +// OGCG: br label %[[LABEL_END:.*]] +// OGCG: [[LABEL_END]]: +// OGCG: %[[X:.*]] = phi ptr [ %[[B_NON_NULL]], %[[LABEL_NOTNULL]] ], [ null, %[[LABEL_NULL]] ] + +X &castBReftoXRef(B &b) { + return static_cast(b); +} + +// CIR: cir.func {{.*}} @_Z14castBReftoXRefR1B(%[[ARG0:.*]]: !cir.ptr {{.*}}) +// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["b", init, const] +// CIR: cir.store %[[ARG0]], %[[B_ADDR]] : !cir.ptr, !cir.ptr> +// CIR: %[[B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr>, !cir.ptr +// CIR: %[[X:.*]] = cir.derived_class_addr %[[B]] : !cir.ptr nonnull [4] -> !cir.ptr + +// LLVM: define {{.*}} ptr @_Z14castBReftoXRefR1B(ptr %[[ARG0:.*]]) +// LLVM: %[[B_ADDR:.*]] = alloca ptr +// LLVM: store ptr %[[ARG0]], ptr %[[B_ADDR]] +// LLVM: %[[B:.*]] = load ptr, ptr %[[B_ADDR]] +// LLVM: %[[X:.*]] = getelementptr inbounds i8, ptr %[[B]], i32 -4 + +// OGCG: define {{.*}} ptr @_Z14castBReftoXRefR1B(ptr {{.*}} %[[ARG0:.*]]) +// OGCG: %[[B_ADDR:.*]] = alloca ptr +// OGCG: store ptr %[[ARG0]], ptr %[[B_ADDR]] +// OGCG: %[[B:.*]] = load ptr, ptr %[[B_ADDR]] +// OGCG: %[[X:.*]] = getelementptr inbounds i8, ptr %[[B]], i64 -4 diff --git a/clang/test/CIR/CodeGen/label-values.c b/clang/test/CIR/CodeGen/label-values.c new file mode 100644 index 0000000000000..41178e3f62f20 --- /dev/null +++ b/clang/test/CIR/CodeGen/label-values.c @@ -0,0 +1,76 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR + +void A(void) { + void *ptr = &&LABEL_A; +LABEL_A: + return; +} +// CIR: cir.func dso_local @A +// CIR: [[PTR:%.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["ptr", init] {alignment = 8 : i64} +// CIR: [[BLOCK:%.*]] = cir.block_address <@A, "LABEL_A"> : !cir.ptr +// CIR: cir.store align(8) [[BLOCK]], [[PTR]] : !cir.ptr, !cir.ptr> +// CIR: cir.br ^bb1 +// CIR: ^bb1: // pred: ^bb0 +// CIR: cir.label "LABEL_A" +// CIR: cir.return + +void B(void) { +LABEL_B: + void *ptr = &&LABEL_B; +} + +// CIR: cir.func dso_local @B() +// CIR: [[PTR:%.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["ptr", init] {alignment = 8 : i64} +// CIR: cir.br ^bb1 +// CIR: ^bb1: +// CIR: cir.label "LABEL_B" +// CIR: [[BLOCK:%.*]] = cir.block_address <@B, "LABEL_B"> : !cir.ptr +// CIR: cir.store align(8) [[BLOCK]], [[PTR]] : !cir.ptr, !cir.ptr> +// CIR: cir.return + +void C(int x) { + void *ptr = (x == 0) ? &&LABEL_A : &&LABEL_B; +LABEL_A: + return; +LABEL_B: + return; +} + +// CIR: cir.func dso_local @C +// CIR: [[BLOCK1:%.*]] = cir.block_address <@C, "LABEL_A"> : !cir.ptr +// CIR: [[BLOCK2:%.*]] = cir.block_address <@C, "LABEL_B"> : !cir.ptr +// CIR: [[COND:%.*]] = cir.select if [[CMP:%.*]] then [[BLOCK1]] else [[BLOCK2]] : (!cir.bool, !cir.ptr, !cir.ptr) -> !cir.ptr +// CIR: cir.store align(8) [[COND]], [[PTR:%.*]] : !cir.ptr, !cir.ptr> +// CIR: cir.br ^bb1 +// CIR: ^bb1: // pred: ^bb0 +// CIR: cir.label "LABEL_A" +// CIR: cir.br ^bb2 +// CIR: ^bb2: // 2 preds: ^bb1, ^bb3 +// CIR: cir.return +// CIR: ^bb3: // no predecessors +// CIR: cir.label "LABEL_B" +// CIR: cir.br ^bb2 + +void D(void) { + void *ptr = &&LABEL_A; + void *ptr2 = &&LABEL_A; +LABEL_A: + void *ptr3 = &&LABEL_A; + return; +} + +// CIR: cir.func dso_local @D +// CIR: %[[PTR:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["ptr", init] +// CIR: %[[PTR2:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["ptr2", init] +// CIR: %[[PTR3:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["ptr3", init] +// CIR: %[[BLK1:.*]] = cir.block_address <@D, "LABEL_A"> : !cir.ptr +// CIR: cir.store align(8) %[[BLK1]], %[[PTR]] : !cir.ptr, !cir.ptr> +// CIR: %[[BLK2:.*]] = cir.block_address <@D, "LABEL_A"> : !cir.ptr +// CIR: cir.store align(8) %[[BLK2]], %[[PTR2]] : !cir.ptr, !cir.ptr> +// CIR: cir.br ^bb1 +// CIR: ^bb1: // pred: ^bb0 +// CIR: cir.label "LABEL_A" +// CIR: %[[BLK3:.*]] = cir.block_address <@D, "LABEL_A"> : !cir.ptr +// CIR: cir.store align(8) %[[BLK3]], %[[PTR3]] : !cir.ptr, !cir.ptr> +// CIR: cir.return diff --git a/clang/test/CIR/IR/block-adress.cir b/clang/test/CIR/IR/block-adress.cir new file mode 100644 index 0000000000000..9d6840819c2d4 --- /dev/null +++ b/clang/test/CIR/IR/block-adress.cir @@ -0,0 +1,34 @@ +// RUN: cir-opt %s --verify-roundtrip | FileCheck %s + +!void = !cir.void + +module { + cir.func @block_address(){ + %0 = cir.block_address <@block_address, "label"> : !cir.ptr + cir.br ^bb1 + ^bb1: + cir.label "label" + cir.return + } +// CHECK: cir.func @block_address +// CHECK: %0 = cir.block_address <@block_address, "label"> : !cir.ptr +// CHECK: cir.br ^bb1 +// CHECK: ^bb1: +// CHECK: cir.label "label" +// CHECK: cir.return + +cir.func @block_address_inside_scope() -> () { + cir.scope{ + %0 = cir.block_address <@block_address_inside_scope, "label"> : !cir.ptr + } + cir.br ^bb1 +^bb1: + cir.label "label" + cir.return +} +// CHECK: cir.func @block_address_inside_scope +// CHECK: cir.scope +// CHECK: %0 = cir.block_address <@block_address_inside_scope, "label"> : !cir.ptr +// CHECK: cir.label "label" +// CHECK: cir.return +} diff --git a/clang/test/CIR/IR/invalid-block-address.cir b/clang/test/CIR/IR/invalid-block-address.cir new file mode 100644 index 0000000000000..4519485c28803 --- /dev/null +++ b/clang/test/CIR/IR/invalid-block-address.cir @@ -0,0 +1,21 @@ +// RUN: cir-opt %s -verify-diagnostics -split-input-file + +!void = !cir.void + +// expected-error@+1 {{expects an existing label target in the referenced function}} +cir.func @bad_block_address() -> () { + %0 = cir.block_address <@bad_block_address, "label"> : !cir.ptr + cir.br ^bb1 + ^bb1: + cir.label "wrong_label" + cir.return +} + +// expected-error@+1 {{blockaddress references a different function}} +cir.func @bad_block_func() -> () { + %0 = cir.block_address <@mismatch_func, "label"> : !cir.ptr + cir.br ^bb1 + ^bb1: + cir.label "label" + cir.return +} diff --git a/clang/test/CIR/Transforms/goto_solver.cir b/clang/test/CIR/Transforms/goto_solver.cir new file mode 100644 index 0000000000000..6ae019b44a39e --- /dev/null +++ b/clang/test/CIR/Transforms/goto_solver.cir @@ -0,0 +1,62 @@ +// RUN: cir-opt %s -cir-goto-solver --verify-roundtrip -o - | FileCheck %s + +!void = !cir.void + +cir.func @a(){ + %0 = cir.alloca !cir.ptr, !cir.ptr>, ["ptr", init] {alignment = 8 : i64} + %1 = cir.block_address <@a, "label1"> : !cir.ptr + cir.store align(8) %1, %0 : !cir.ptr, !cir.ptr> + cir.br ^bb1 +^bb1: + cir.label "label1" + cir.br ^bb2 +^bb2: + // This label is not referenced by any blockaddressOp, so it should be removed + cir.label "label2" + cir.return +} + +// CHECK: cir.func @a() +// CHECK: %1 = cir.block_address <@a, "label1"> : !cir.ptr +// CHECK: ^bb1: +// CHECK: cir.label "label1" +// CHECK: cir.br ^bb2 +// CHECK: ^bb2: +// CHECK-NOT: cir.label "label2" + +cir.func @b(){ + %0 = cir.alloca !cir.ptr, !cir.ptr>, ["ptr", init] {alignment = 8 : i64} + %1 = cir.block_address <@b, "label1"> : !cir.ptr + cir.store align(8) %1, %0 : !cir.ptr, !cir.ptr> + cir.goto "label2" +^bb1: + cir.label "label1" + cir.br ^bb2 +^bb2: + // This label is not referenced by any blockaddressOp, so it should be removed + cir.label "label2" + cir.return +} + +// CHECK: cir.func @b() { +// CHECK: %1 = cir.block_address <@b, "label1"> : !cir.ptr +// CHECK: cir.store align(8) %1, {{.*}} : !cir.ptr, !cir.ptr> +// CHECK: cir.br ^bb2 +// CHECK: ^bb1: +// CHECK: cir.label "label1" +// CHECK: cir.br ^bb2 +// CHECK: ^bb2: +// CHECK-NOT: cir.label "label2" + +cir.func @c() { + cir.label "label1" + %0 = cir.alloca !cir.ptr, !cir.ptr>, ["ptr", init] {alignment = 8 : i64} + %1 = cir.block_address <@c, "label1"> : !cir.ptr + cir.store align(8) %1, %0 : !cir.ptr, !cir.ptr> + cir.return +} + +// CHECK: cir.func @c +// CHECK: cir.label "label1" +// CHECK: %1 = cir.block_address <@c, "label1"> : !cir.ptr +// CHECK: cir.store align(8) %1, {{.*}} : !cir.ptr, !cir.ptr> diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-fold-conditional.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fold-conditional.cpp new file mode 100644 index 0000000000000..b4f30b533bc4b --- /dev/null +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fold-conditional.cpp @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 -fsyntax-only -Wno-all -Wunsafe-buffer-usage -verify %s -std=c++20 +// RUN: %clang_cc1 -fsyntax-only -Wno-all -Wunsafe-buffer-usage -verify %s -x c +// expected-no-diagnostics + +typedef struct {} FILE; +int fprintf( FILE* stream, const char* format, ... ); +FILE * stderr; + +#define DEBUG_ASSERT_MESSAGE(name, assertion, label, message, file, line, value) \ + fprintf(stderr, "AssertMacros: %s, %s file: %s, line: %d, value: %lld\n", \ + assertion, (message!=0) ? message : "", file, line, (long long) (value)); + + +#define Require(assertion, exceptionLabel) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) { \ + DEBUG_ASSERT_MESSAGE( \ + "DEBUG_ASSERT_COMPONENT_NAME_STRING", \ + #assertion, #exceptionLabel, 0, __FILE__, __LINE__, 0); \ + goto exceptionLabel; \ + } \ + } while ( 0 ) + + +void f(int x, int y) { + Require(x == y, L1); + L1: + return; +} + diff --git a/clang/tools/cir-opt/cir-opt.cpp b/clang/tools/cir-opt/cir-opt.cpp index c4d29a2117c75..ee42015bb38e9 100644 --- a/clang/tools/cir-opt/cir-opt.cpp +++ b/clang/tools/cir-opt/cir-opt.cpp @@ -58,6 +58,10 @@ int main(int argc, char **argv) { return mlir::createHoistAllocasPass(); }); + ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> { + return mlir::createGotoSolverPass(); + }); + mlir::registerTransformsPasses(); return mlir::asMainReturnCode(MlirOptMain( diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h index 747b1a2233d32..c2401c86671d0 100644 --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -1394,7 +1394,7 @@ uptr SizeClassAllocator64::releaseToOSMaybe(RegionInfo *Region, Region->FreeListInfo.PushedBlocks) * BlockSize; if (UNLIKELY(BytesInFreeList == 0)) - return false; + return 0; // ==================================================================== // // 1. Check if we have enough free blocks and if it's worth doing a page diff --git a/compiler-rt/test/fuzzer/merge-posix.test b/compiler-rt/test/fuzzer/merge-posix.test index 2721668fb9706..5e342142216f8 100644 --- a/compiler-rt/test/fuzzer/merge-posix.test +++ b/compiler-rt/test/fuzzer/merge-posix.test @@ -14,7 +14,7 @@ RUN: echo ....U. > %tmp/T2/2 RUN: echo ...Z.. > %tmp/T2/3 RUN: echo ...Z.. > %tmp/T2/4 RUN: echo ....E. > %tmp/T2/5 -RUN: echo .....R > %tmp/T2/6 +RUN: %python -c "print('.....R' + 'X' * 1024, end='')" > %tmp/T2/6 # Check that we can report an error if file size exceeded RUN: (ulimit -f 1; not %run %t-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=SIGXFSZ) diff --git a/compiler-rt/test/profile/Darwin/instrprof-debug-info-correlate.c b/compiler-rt/test/profile/Darwin/instrprof-debug-info-correlate.c index 46d25a4e386dc..1e9bd11d3f49c 100644 --- a/compiler-rt/test/profile/Darwin/instrprof-debug-info-correlate.c +++ b/compiler-rt/test/profile/Darwin/instrprof-debug-info-correlate.c @@ -7,7 +7,9 @@ // RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t.normal // RUN: llvm-profdata merge -o %t.normal.profdata %t.profraw -// RUN: diff <(llvm-profdata show --all-functions --counts %t.normal.profdata) <(llvm-profdata show --all-functions --counts %t.profdata) +// RUN: llvm-profdata show --all-functions --counts %t.normal.profdata > %t.normal.functions +// RUN: llvm-profdata show --all-functions --counts %t.profdata > %t.functions +// RUN: diff %t.normal.functions %t.functions // RUN: %clang_pgogen -o %t.cov -g -mllvm --profile-correlate=debug-info -mllvm -pgo-function-entry-coverage -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp // RUN: env LLVM_PROFILE_FILE=%t.cov.proflite %run %t.cov @@ -17,7 +19,9 @@ // RUN: env LLVM_PROFILE_FILE=%t.cov.profraw %run %t.cov.normal // RUN: llvm-profdata merge -o %t.cov.normal.profdata %t.cov.profraw -// RUN: diff <(llvm-profdata show --all-functions --counts %t.cov.normal.profdata) <(llvm-profdata show --all-functions --counts %t.cov.profdata) +// RUN: llvm-profdata show --all-functions --counts %t.cov.normal.profdata > %t.cov.normal.functions +// RUN: llvm-profdata show --all-functions --counts %t.cov.profdata > %t.cov.functions +// RUN: diff %t.cov.normal.functions %t.cov.functions // Test debug info correlate with online merging. @@ -30,11 +34,15 @@ // RUN: env LLVM_PROFILE_FILE=%t.profdir/%m.proflite %run %t // RUN: llvm-profdata merge -o %t.profdata --debug-info=%t.dSYM %t.profdir/ -// RUN: diff <(llvm-profdata show --all-functions --counts %t.normal.profdata) <(llvm-profdata show --all-functions --counts %t.profdata) +// RUN: llvm-profdata show --all-functions --counts %t.normal.profdata > %t.normal.functions +// RUN: llvm-profdata show --all-functions --counts %t.profdata > %t.functions +// RUN: diff %t.normal.functions %t.functions // RUN: rm -rf %t.profdir && mkdir %t.profdir // RUN: env LLVM_PROFILE_FILE=%t.profdir/%m.cov.proflite %run %t.cov // RUN: env LLVM_PROFILE_FILE=%t.profdir/%m.cov.proflite %run %t.cov // RUN: llvm-profdata merge -o %t.cov.profdata --debug-info=%t.cov.dSYM %t.profdir/ -// RUN: diff <(llvm-profdata show --all-functions --counts %t.cov.normal.profdata) <(llvm-profdata show --all-functions --counts %t.cov.profdata) +// RUN: llvm-profdata show --all-functions --counts %t.cov.normal.profdata > %t.cov.normal.functions +// RUN: llvm-profdata show --all-functions --counts %t.cov.profdata > %t.cov.functions +// RUN: diff %t.cov.normal.functions %t.cov.functions diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index ca66aab3140ee..915c8b4a5c6ce 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -1244,7 +1244,8 @@ void ClauseProcessor::processMapObjects( std::string mapperIdName = typeSpec->name().ToString() + llvm::omp::OmpDefaultMapperName; if (auto *sym = converter.getCurrentScope().FindSymbol(mapperIdName)) { - mapperIdName = converter.mangleName(mapperIdName, sym->owner()); + mapperIdName = + converter.mangleName(mapperIdName, sym->GetUltimate().owner()); } else { mapperIdName = converter.mangleName(mapperIdName, *typeSpec->GetScope()); } diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index ff72d09edeaf3..5af673001f07c 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -2623,8 +2623,8 @@ genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable, typeSpec->name().ToString() + llvm::omp::OmpDefaultMapperName; if (auto *mapperSym = converter.getCurrentScope().FindSymbol(mapperIdName)) - mapperIdName = - converter.mangleName(mapperIdName, mapperSym->owner()); + mapperIdName = converter.mangleName( + mapperIdName, mapperSym->GetUltimate().owner()); else mapperIdName = converter.mangleName(mapperIdName, *typeSpec->GetScope()); diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 4b3c0903b95ec..e9ecec5aae693 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -2038,8 +2038,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { if (beginName.v == llvm::omp::OMPD_master_taskloop || beginName.v == llvm::omp::OMPD_master_taskloop_simd || beginName.v == llvm::omp::OMPD_parallel_master_taskloop || - beginName.v == llvm::omp::OMPD_parallel_master_taskloop_simd || - beginName.v == llvm::omp::Directive::OMPD_target_loop) { + beginName.v == llvm::omp::OMPD_parallel_master_taskloop_simd) { unsigned version{context_.langOptions().OpenMPVersion}; IssueNonConformanceWarning(beginName.v, beginName.source, version); } @@ -3623,8 +3622,8 @@ void OmpAttributeVisitor::IssueNonConformanceWarning(llvm::omp::Directive D, case llvm::omp::OMPD_allocate: setAlternativeStr("ALLOCATORS"); break; - case llvm::omp::OMPD_target_loop: - default:; + default: + break; } context_.Warn(common::UsageWarning::OpenMPUsage, source, "%s"_warn_en_US, warnStrOS.str()); diff --git a/flang/test/Lower/OpenMP/declare-mapper.f90 b/flang/test/Lower/OpenMP/declare-mapper.f90 index 9122661a2869a..70aaa6567597f 100644 --- a/flang/test/Lower/OpenMP/declare-mapper.f90 +++ b/flang/test/Lower/OpenMP/declare-mapper.f90 @@ -9,6 +9,8 @@ ! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 %t/omp-declare-mapper-6.f90 -o - | FileCheck %t/omp-declare-mapper-6.f90 ! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 -module-dir %t %t/omp-declare-mapper-7.mod.f90 -o - >/dev/null ! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 -J %t %t/omp-declare-mapper-7.use.f90 -o - | FileCheck %t/omp-declare-mapper-7.use.f90 +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 -module-dir %t %t/omp-declare-mapper-8.mod.f90 -o - >/dev/null +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 -J %t %t/omp-declare-mapper-8.use.f90 -o - | FileCheck %t/omp-declare-mapper-8.use.f90 !--- omp-declare-mapper-1.f90 subroutine declare_mapper_1 @@ -26,7 +28,7 @@ subroutine declare_mapper_1 end type type(my_type2) :: t real :: x, y(nvals) - !CHECK:omp.declare_mapper @[[MY_TYPE_MAPPER:_QQFdeclare_mapper_1my_type\.omp\.default\.mapper]] : [[MY_TYPE:!fir\.type<_QFdeclare_mapper_1Tmy_type\{num_vals:i32,values:!fir\.box>>\}>]] { + !CHECK:omp.declare_mapper @[[MY_TYPE_MAPPER:_QQFdeclare_mapper_1my_type_omp_default_mapper]] : [[MY_TYPE:!fir\.type<_QFdeclare_mapper_1Tmy_type\{num_vals:i32,values:!fir\.box>>\}>]] { !CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<[[MY_TYPE]]>): !CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFdeclare_mapper_1Evar"} : (!fir.ref<[[MY_TYPE]]>) -> (!fir.ref<[[MY_TYPE]]>, !fir.ref<[[MY_TYPE]]>) !CHECK: %[[VAL_2:.*]] = hlfir.designate %[[VAL_1]]#0{"values"} {fortran_attrs = #fir.var_attrs} : (!fir.ref<[[MY_TYPE]]>) -> !fir.ref>>> @@ -153,7 +155,7 @@ subroutine declare_mapper_4 integer :: num end type - !CHECK: omp.declare_mapper @[[MY_TYPE_MAPPER:_QQFdeclare_mapper_4my_type.omp.default.mapper]] : [[MY_TYPE:!fir\.type<_QFdeclare_mapper_4Tmy_type\{num:i32\}>]] + !CHECK: omp.declare_mapper @[[MY_TYPE_MAPPER:_QQFdeclare_mapper_4my_type_omp_default_mapper]] : [[MY_TYPE:!fir\.type<_QFdeclare_mapper_4Tmy_type\{num:i32\}>]] !$omp declare mapper (my_type :: var) map (var%num) type(my_type) :: a @@ -185,9 +187,9 @@ program declare_mapper_5 end type !CHECK: omp.declare_mapper @[[INNER_MAPPER_NAMED:_QQFFuse_innermy_mapper]] : [[MY_TYPE:!fir\.type<_QFTmytype\{x:i32,y:i32\}>]] - !CHECK: omp.declare_mapper @[[INNER_MAPPER_DEFAULT:_QQFFuse_innermytype.omp.default.mapper]] : [[MY_TYPE]] + !CHECK: omp.declare_mapper @[[INNER_MAPPER_DEFAULT:_QQFFuse_innermytype_omp_default_mapper]] : [[MY_TYPE]] !CHECK: omp.declare_mapper @[[OUTER_MAPPER_NAMED:_QQFmy_mapper]] : [[MY_TYPE]] - !CHECK: omp.declare_mapper @[[OUTER_MAPPER_DEFAULT:_QQFmytype.omp.default.mapper]] : [[MY_TYPE]] + !CHECK: omp.declare_mapper @[[OUTER_MAPPER_DEFAULT:_QQFmytype_omp_default_mapper]] : [[MY_TYPE]] !$omp declare mapper(mytype :: var) map(tofrom: var%x) !$omp declare mapper(my_mapper : mytype :: var) map(tofrom: var%y) @@ -325,3 +327,36 @@ program use_module_mapper a%x = 42 !$omp end target end program use_module_mapper + +!--- omp-declare-mapper-8.mod.f90 +! Module with a default DECLARE MAPPER to be compiled separately. +module default_mapper_mod + implicit none + type :: dtype + integer :: x + end type dtype + !$omp declare mapper(dtype :: v) map(tofrom: v%x) +end module default_mapper_mod + +!--- omp-declare-mapper-8.use.f90 +! Consumer program that USEs the module and relies on the default mapper. +! CHECK: omp.declare_mapper @{{.*dtype_omp_default_mapper}} : !fir.type<_QMdefault_mapper_modTdtype{x:i32}> +! CHECK: %{{.*}} = omp.map.info {{.*}} map_clauses(tofrom) {{.*}} mapper(@{{.*dtype_omp_default_mapper}}) {{.*}} {name = "a"} +! CHECK: %{{.*}} = omp.map.info {{.*}} map_clauses(tofrom) {{.*}} mapper(@{{.*dtype_omp_default_mapper}}) {{.*}} {name = "a"} +! CHECK: %{{.*}} = omp.map.info {{.*}} map_clauses(implicit, tofrom) {{.*}} mapper(@{{.*dtype_omp_default_mapper}}) {{.*}} {name = "a"} +program use_module_default_mapper + use default_mapper_mod + implicit none + type(dtype) :: a + !$omp target map(a) + a%x = 7 + !$omp end target + + !$omp target map(mapper(default) : a) + a%x = 8 + !$omp end target + + !$omp target + a%x = 8 + !$omp end target +end program use_module_default_mapper diff --git a/flang/test/Lower/OpenMP/map-mapper.f90 b/flang/test/Lower/OpenMP/map-mapper.f90 index 91564bfc7bc46..8934fbb5d6edf 100644 --- a/flang/test/Lower/OpenMP/map-mapper.f90 +++ b/flang/test/Lower/OpenMP/map-mapper.f90 @@ -8,7 +8,7 @@ program p !$omp declare mapper(xx : t1 :: nn) map(to: nn, nn%x) !$omp declare mapper(t1 :: nn) map(from: nn) - !CHECK-LABEL: omp.declare_mapper @_QQFt1.omp.default.mapper : !fir.type<_QFTt1{x:!fir.array<256xi32>}> + !CHECK-LABEL: omp.declare_mapper @_QQFt1_omp_default_mapper : !fir.type<_QFTt1{x:!fir.array<256xi32>}> !CHECK-LABEL: omp.declare_mapper @_QQFxx : !fir.type<_QFTt1{x:!fir.array<256xi32>}> type(t1) :: a, b @@ -20,7 +20,7 @@ program p end do !$omp end target - !CHECK: %[[MAP_B:.*]] = omp.map.info var_ptr(%{{.*}} : {{.*}}, {{.*}}) map_clauses(tofrom) capture(ByRef) mapper(@_QQFt1.omp.default.mapper) -> {{.*}} {name = "b"} + !CHECK: %[[MAP_B:.*]] = omp.map.info var_ptr(%{{.*}} : {{.*}}, {{.*}}) map_clauses(tofrom) capture(ByRef) mapper(@_QQFt1_omp_default_mapper) -> {{.*}} {name = "b"} !CHECK: omp.target map_entries(%[[MAP_B]] -> %{{.*}}, %{{.*}} -> %{{.*}} : {{.*}}, {{.*}}) { !$omp target map(mapper(default) : b) do i = 1, n diff --git a/flang/test/Lower/OpenMP/target.f90 b/flang/test/Lower/OpenMP/target.f90 index 4fa0c1a21c731..8b8a4f50279bb 100644 --- a/flang/test/Lower/OpenMP/target.f90 +++ b/flang/test/Lower/OpenMP/target.f90 @@ -530,7 +530,7 @@ subroutine omp_target_device_ptr use iso_c_binding, only : c_ptr, c_loc type(c_ptr) :: a integer, target :: b - !CHECK: %[[MAP:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(tofrom) capture(ByRef) mapper(@[[CPTR_DEFAULT:_QQM__fortran_builtinsc_ptr\.omp\.default\.mapper]]) -> {{.*}} {name = "a"} + !CHECK: %[[MAP:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(tofrom) capture(ByRef) mapper(@[[CPTR_DEFAULT:_QQM__fortran_builtinsc_ptr_omp_default_mapper]]) -> {{.*}} {name = "a"} !CHECK: omp.target_data map_entries(%[[MAP]]{{.*}}) use_device_ptr({{.*}} -> %[[VAL_1:.*]] : !fir.ref>) !$omp target data map(tofrom: a) use_device_ptr(a) !CHECK: {{.*}} = fir.coordinate_of %[[VAL_1:.*]], __address : (!fir.ref>) -> !fir.ref diff --git a/flang/test/Parser/OpenMP/declare-mapper-unparse.f90 b/flang/test/Parser/OpenMP/declare-mapper-unparse.f90 index b53bf5ce10557..9da6674c3a58d 100644 --- a/flang/test/Parser/OpenMP/declare-mapper-unparse.f90 +++ b/flang/test/Parser/OpenMP/declare-mapper-unparse.f90 @@ -29,7 +29,7 @@ program main !PARSE-TREE: OpenMPDeclareMapperConstruct !PARSE-TREE: OmpMapperSpecifier -!PARSE-TREE: string = 'ty.omp.default.mapper' +!PARSE-TREE: string = 'ty_omp_default_mapper' !PARSE-TREE: TypeSpec -> DerivedTypeSpec !PARSE-TREE: Name = 'ty' !PARSE-TREE: Name = 'mapped' diff --git a/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90 b/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90 index 50a38c6494aa6..7a627913f9555 100644 --- a/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90 +++ b/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90 @@ -57,7 +57,7 @@ subroutine f01 !PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OpenMPDeclareMapperConstruct -> OmpDirectiveSpecification !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare mapper !PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpMapperSpecifier -!PARSE-TREE: | | string = 't.omp.default.mapper' +!PARSE-TREE: | | string = 't_omp_default_mapper' !PARSE-TREE: | | TypeSpec -> DerivedTypeSpec !PARSE-TREE: | | | Name = 't' !PARSE-TREE: | | Name = 'v' diff --git a/flang/test/Semantics/OpenMP/declare-mapper-symbols.f90 b/flang/test/Semantics/OpenMP/declare-mapper-symbols.f90 index 5d77540aa6453..9a1b86758357f 100644 --- a/flang/test/Semantics/OpenMP/declare-mapper-symbols.f90 +++ b/flang/test/Semantics/OpenMP/declare-mapper-symbols.f90 @@ -13,7 +13,7 @@ program main !! Note, symbols come out in their respective scope, but not in declaration order. !CHECK: mymapper: MapperDetails !CHECK: ty: DerivedType components: x -!CHECK: ty.omp.default.mapper: MapperDetails +!CHECK: ty_omp_default_mapper: MapperDetails !CHECK: DerivedType scope: ty !CHECK: OtherConstruct scope: !CHECK: mapped (OmpMapToFrom) {{.*}} ObjectEntity type: TYPE(ty) diff --git a/flang/test/Semantics/OpenMP/target-loop-still-there.f90 b/flang/test/Semantics/OpenMP/target-loop-still-there.f90 new file mode 100644 index 0000000000000..2d3b1820e23d4 --- /dev/null +++ b/flang/test/Semantics/OpenMP/target-loop-still-there.f90 @@ -0,0 +1,10 @@ +!RUN: %flang_fc1 -fsyntax-only -fopenmp -fopenmp-version=60 -Werror %s | FileCheck --allow-empty %s + +!CHECK-NOT: deprecated +subroutine f00 + implicit none + integer :: i + !$omp target loop + do i = 1, 10 + end do +end diff --git a/libc/cmake/caches/armv6m-none-eabi.cmake b/libc/cmake/caches/armv6m-none-eabi.cmake new file mode 100644 index 0000000000000..1f463ae5c0ead --- /dev/null +++ b/libc/cmake/caches/armv6m-none-eabi.cmake @@ -0,0 +1,8 @@ +set(CMAKE_SYSTEM_PROCESSOR arm CACHE STRING "") +set(RUNTIMES_TARGET_TRIPLE "armv6m-none-eabi" CACHE STRING "") + +foreach(lang C;CXX;ASM) + set(CMAKE_${lang}_FLAGS "-march=armv6m -mcpu=cortex-m0plus -mfloat-abi=soft -Wno-atomic-alignment \"-Dvfprintf(stream, format, vlist)=vprintf(format, vlist)\" \"-Dfprintf(stream, format, ...)=printf(format)\" \"-Dfputs(string, stream)=puts(string)\" -D_LIBCPP_PRINT=1" CACHE STRING "") +endforeach() + +include(${CMAKE_CURRENT_LIST_DIR}/baremetal_common.cmake) diff --git a/libc/cmake/caches/armv7em-none-eabi.cmake b/libc/cmake/caches/armv7em-none-eabi.cmake new file mode 100644 index 0000000000000..afbe9c87dffe1 --- /dev/null +++ b/libc/cmake/caches/armv7em-none-eabi.cmake @@ -0,0 +1,8 @@ +set(CMAKE_SYSTEM_PROCESSOR arm CACHE STRING "") +set(RUNTIMES_TARGET_TRIPLE "armv7em-none-eabi" CACHE STRING "") + +foreach(lang C;CXX;ASM) + set(CMAKE_${lang}_FLAGS "-march=armv7em -mcpu=cortex-m4 -mfloat-abi=soft -Wno-atomic-alignment \"-Dvfprintf(stream, format, vlist)=vprintf(format, vlist)\" \"-Dfprintf(stream, format, ...)=printf(format)\" \"-Dfputs(string, stream)=puts(string)\" -D_LIBCPP_PRINT=1" CACHE STRING "") +endforeach() + +include(${CMAKE_CURRENT_LIST_DIR}/baremetal_common.cmake) diff --git a/libc/cmake/caches/armv7m-none-eabi.cmake b/libc/cmake/caches/armv7m-none-eabi.cmake new file mode 100644 index 0000000000000..796adb2f31148 --- /dev/null +++ b/libc/cmake/caches/armv7m-none-eabi.cmake @@ -0,0 +1,8 @@ +set(CMAKE_SYSTEM_PROCESSOR arm CACHE STRING "") +set(RUNTIMES_TARGET_TRIPLE "armv7m-none-eabi" CACHE STRING "") + +foreach(lang C;CXX;ASM) + set(CMAKE_${lang}_FLAGS "-march=armv7m -mcpu=cortex-m4 -mfloat-abi=soft -Wno-atomic-alignment \"-Dvfprintf(stream, format, vlist)=vprintf(format, vlist)\" \"-Dfprintf(stream, format, ...)=printf(format)\" \"-Dfputs(string, stream)=puts(string)\" -D_LIBCPP_PRINT=1" CACHE STRING "") +endforeach() + +include(${CMAKE_CURRENT_LIST_DIR}/baremetal_common.cmake) diff --git a/libc/cmake/caches/armv8.1m.main-none-eabi.cmake b/libc/cmake/caches/armv8.1m.main-none-eabi.cmake new file mode 100644 index 0000000000000..4095facce46ac --- /dev/null +++ b/libc/cmake/caches/armv8.1m.main-none-eabi.cmake @@ -0,0 +1,8 @@ +set(CMAKE_SYSTEM_PROCESSOR arm CACHE STRING "") +set(RUNTIMES_TARGET_TRIPLE "armv8.1m.main-none-eabi" CACHE STRING "") + +foreach(lang C;CXX;ASM) + set(CMAKE_${lang}_FLAGS "-mfloat-abi=hard -march=armv8.1-m.main+mve.fp+fp.dp -mcpu=cortex-m55" CACHE STRING "") +endforeach() + +include(${CMAKE_CURRENT_LIST_DIR}/baremetal_common.cmake) diff --git a/libc/cmake/caches/armv8m.main-none-eabi.cmake b/libc/cmake/caches/armv8m.main-none-eabi.cmake new file mode 100644 index 0000000000000..4b69f6a822e71 --- /dev/null +++ b/libc/cmake/caches/armv8m.main-none-eabi.cmake @@ -0,0 +1,8 @@ +set(CMAKE_SYSTEM_PROCESSOR arm CACHE STRING "") +set(RUNTIMES_TARGET_TRIPLE "armv8m.main-none-eabi" CACHE STRING "") + +foreach(lang C;CXX;ASM) + set(CMAKE_${lang}_FLAGS "-mfloat-abi=softfp -march=armv8m.main+fp+dsp -mcpu=cortex-m33" CACHE STRING "") +endforeach() + +include(${CMAKE_CURRENT_LIST_DIR}/baremetal_common.cmake) diff --git a/libc/cmake/caches/baremetal_common.cmake b/libc/cmake/caches/baremetal_common.cmake new file mode 100644 index 0000000000000..c0d665d790393 --- /dev/null +++ b/libc/cmake/caches/baremetal_common.cmake @@ -0,0 +1,21 @@ +# Expects target triple to be passed as `RUNTIMES_TARGET_TRIPLE` + +set(CMAKE_SYSTEM_NAME Generic CACHE STRING "") +set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY CACHE STRING "") +set(LLVM_ENABLE_RUNTIMES "libc" CACHE STRING "") +set(LLVM_INCLUDE_TESTS OFF CACHE BOOL "") +set(CMAKE_C_COMPILER_WORKS ON CACHE BOOL "") +set(CMAKE_CXX_COMPILER_WORKS ON CACHE BOOL "") +set(CMAKE_SYSROOT "" CACHE STRING "") +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) + +set(CMAKE_C_COMPILER_TARGET ${RUNTIMES_TARGET_TRIPLE} CACHE STRING "") +set(CMAKE_CXX_COMPILER_TARGET ${RUNTIMES_TARGET_TRIPLE} CACHE STRING "") +set(CMAKE_ASM_COMPILER_TARGET ${RUNTIMES_TARGET_TRIPLE} CACHE STRING "") +set(LLVM_DEFAULT_TARGET_TRIPLE ${RUNTIMES_TARGET_TRIPLE} CACHE STRING "") +set(LIBC_TARGET_TRIPLE ${RUNTIMES_TARGET_TRIPLE} CACHE STRING "") + +set(LLVM_LIBC_FULL_BUILD "ON" CACHE BOOL "") diff --git a/libc/cmake/caches/riscv32-unknown-elf.cmake b/libc/cmake/caches/riscv32-unknown-elf.cmake new file mode 100644 index 0000000000000..960fb2bb51a4f --- /dev/null +++ b/libc/cmake/caches/riscv32-unknown-elf.cmake @@ -0,0 +1,4 @@ +set(CMAKE_SYSTEM_PROCESSOR RISCV CACHE STRING "") +set(RUNTIMES_TARGET_TRIPLE "riscv32-unknown-elf" CACHE STRING "") + +include(${CMAKE_CURRENT_LIST_DIR}/baremetal_common.cmake) diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index d3bcad470b3e1..5036c9438a503 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -398,9 +398,11 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.wchar.wmemchr libc.src.wchar.wcpcpy libc.src.wchar.wcpncpy + libc.src.wchar.wcstod libc.src.wchar.wcstof libc.src.wchar.wcstok libc.src.wchar.wcstol + libc.src.wchar.wcstold libc.src.wchar.wcstoll libc.src.wchar.wcstoul libc.src.wchar.wcstoull diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml index faceb9bb4e12d..a524c7f56bed0 100644 --- a/libc/include/wchar.yaml +++ b/libc/include/wchar.yaml @@ -367,3 +367,17 @@ functions: arguments: - type: const wchar_t *__restrict - type: wchar_t **__restrict + - name: wcstod + standards: + - stdc + return_type: double + arguments: + - type: const wchar_t *__restrict + - type: wchar_t **__restrict + - name: wcstold + standards: + - stdc + return_type: long double + arguments: + - type: const wchar_t *__restrict + - type: wchar_t **__restrict diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt index e3fac9fb80529..e6d9af9eacf73 100644 --- a/libc/src/wchar/CMakeLists.txt +++ b/libc/src/wchar/CMakeLists.txt @@ -110,6 +110,28 @@ add_entrypoint_object( libc.src.errno.errno ) +add_entrypoint_object( + wcstod + SRCS + wcstod.cpp + HDRS + wcstod.h + DEPENDS + libc.src.__support.str_to_float + libc.src.errno.errno +) + +add_entrypoint_object( + wcstold + SRCS + wcstold.cpp + HDRS + wcstold.h + DEPENDS + libc.src.__support.str_to_float + libc.src.errno.errno +) + add_entrypoint_object( wcstok SRCS diff --git a/libc/src/wchar/wcstod.cpp b/libc/src/wchar/wcstod.cpp new file mode 100644 index 0000000000000..95351c304c0ff --- /dev/null +++ b/libc/src/wchar/wcstod.cpp @@ -0,0 +1,30 @@ +//===-- Implementation of wcstod ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/wcstod.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" +#include "src/__support/str_to_float.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(double, wcstod, + (const wchar_t *__restrict str, + wchar_t **__restrict str_end)) { + auto result = internal::strtofloatingpoint(str); + if (result.has_error()) + libc_errno = result.error; + + if (str_end != nullptr) + *str_end = const_cast(str + result.parsed_len); + + return result.value; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/wcstod.h b/libc/src/wchar/wcstod.h new file mode 100644 index 0000000000000..ff397b93d405d --- /dev/null +++ b/libc/src/wchar/wcstod.h @@ -0,0 +1,20 @@ +//===-- Implementation header for wcstod ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_WCSTOD_H +#define LLVM_LIBC_SRC_WCHAR_WCSTOD_H + +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +double wcstod(const wchar_t *__restrict str, wchar_t **__restrict str_end); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_WCHAR_WCSTOD_H diff --git a/libc/src/wchar/wcstold.cpp b/libc/src/wchar/wcstold.cpp new file mode 100644 index 0000000000000..ffbc3f248b883 --- /dev/null +++ b/libc/src/wchar/wcstold.cpp @@ -0,0 +1,30 @@ +//===-- Implementation of wcstold -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/wcstold.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" +#include "src/__support/str_to_float.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(long double, wcstold, + (const wchar_t *__restrict str, + wchar_t **__restrict str_end)) { + auto result = internal::strtofloatingpoint(str); + if (result.has_error()) + libc_errno = result.error; + + if (str_end != nullptr) + *str_end = const_cast(str + result.parsed_len); + + return result.value; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/wcstold.h b/libc/src/wchar/wcstold.h new file mode 100644 index 0000000000000..1525362b33571 --- /dev/null +++ b/libc/src/wchar/wcstold.h @@ -0,0 +1,21 @@ +//===-- Implementation header for wcstold -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_WCSTOLD_H +#define LLVM_LIBC_SRC_WCHAR_WCSTOLD_H + +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +long double wcstold(const wchar_t *__restrict str, + wchar_t **__restrict str_end); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_WCHAR_WCSTOLD_H diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt index 122cad2575327..a62a30fe00124 100644 --- a/libc/test/src/wchar/CMakeLists.txt +++ b/libc/test/src/wchar/CMakeLists.txt @@ -538,5 +538,32 @@ add_libc_test( DEPENDS libc.src.wchar.wcstof libc.test.UnitTest.ErrnoCheckingTest - libc.test.UnitTest.LibcFPTestHelpers + LINK_LIBRARIES + LibcFPTestHelpers +) + +add_libc_test( + wcstod_test + SUITE + libc_wchar_unittests + SRCS + wcstod_test.cpp + DEPENDS + libc.src.wchar.wcstod + libc.test.UnitTest.ErrnoCheckingTest + LINK_LIBRARIES + LibcFPTestHelpers +) + +add_libc_test( + wcstold_test + SUITE + libc_wchar_unittests + SRCS + wcstold_test.cpp + DEPENDS + libc.src.__support.FPUtil.fp_bits + libc.src.__support.uint128 + libc.src.wchar.wcstold + libc.test.UnitTest.ErrnoCheckingTest ) diff --git a/libc/test/src/wchar/wcstod_test.cpp b/libc/test/src/wchar/wcstod_test.cpp new file mode 100644 index 0000000000000..0c2b82cfba898 --- /dev/null +++ b/libc/test/src/wchar/wcstod_test.cpp @@ -0,0 +1,586 @@ +//===-- Unittests for wcstod ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/wcstod.h" + +#include "src/__support/FPUtil/FPBits.h" +#include "test/UnitTest/ErrnoCheckingTest.h" +#include "test/UnitTest/ErrnoSetterMatcher.h" +#include "test/UnitTest/RoundingModeUtils.h" +#include "test/UnitTest/Test.h" + +#include + +using LIBC_NAMESPACE::fputil::testing::ForceRoundingModeTest; +using LIBC_NAMESPACE::fputil::testing::RoundingMode; + +using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Fails; +using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Succeeds; + +class LlvmLibcWcstodTest : public LIBC_NAMESPACE::testing::ErrnoCheckingTest, + ForceRoundingModeTest { +public: + void run_test(const wchar_t *inputString, const ptrdiff_t expectedStrLen, + const uint64_t expectedRawData, const int expectedErrno = 0) { + // expectedRawData is the expected double result as a uint64_t, organized + // according to IEEE754: + // + // +-- 1 Sign Bit +-- 52 Mantissa bits + // | | + // | +-------------------------+------------------------+ + // | | | + // SEEEEEEEEEEEMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM + // | | + // +----+----+ + // | + // +-- 11 Exponent Bits + // + // This is so that the result can be compared in parts. + wchar_t *str_end = nullptr; + + LIBC_NAMESPACE::fputil::FPBits expected_fp = + LIBC_NAMESPACE::fputil::FPBits(expectedRawData); + + double result = LIBC_NAMESPACE::wcstod(inputString, &str_end); + if (expectedErrno == 0) + EXPECT_THAT(result, Succeeds(expected_fp.get_val())); + else + EXPECT_THAT(result, Fails(expectedErrno, expected_fp.get_val())); + EXPECT_EQ(str_end - inputString, expectedStrLen); + } +}; + +TEST_F(LlvmLibcWcstodTest, SimpleTest) { + run_test(L"123", 3, uint64_t(0x405ec00000000000)); + + // This should fail on Eisel-Lemire, forcing a fallback to simple decimal + // conversion. + run_test(L"12345678901234549760", 20, uint64_t(0x43e56a95319d63d8)); + + // Found while looking for difficult test cases here: + // https://github.com/nigeltao/parse-number-fxx-test-data/blob/main/more-test-cases/golang-org-issue-36657.txt + run_test(L"1090544144181609348835077142190", 31, + uint64_t(0x462b8779f2474dfb)); + + run_test(L"0x123", 5, uint64_t(0x4072300000000000)); +} + +// These are tests that have caused problems in the past. +TEST_F(LlvmLibcWcstodTest, SpecificFailures) { + run_test(L"3E70000000000000", 16, uint64_t(0x7FF0000000000000), ERANGE); + run_test(L"358416272e-33", 13, uint64_t(0x3adbbb2a68c9d0b9)); + run_test(L"2.16656806400000023841857910156251e9", 36, + uint64_t(0x41e0246690000001)); + run_test(L"27949676547093071875", 20, uint64_t(0x43f83e132bc608c9)); + run_test( + L"10000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "0000000000e-800", + 806, 0x3ff0000000000000); + run_test( + L"10000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "0000000000e-799", + 806, 0x4024000000000000); + run_test( + L"10000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "00000000000e-800", + 807, 0x4024000000000000); + run_test( + L"10000000000000000000000000000000000000000000000000000000000000000e-64", + 69, 0x3ff0000000000000); + run_test( + L"10000000000000000000000000000000000000000000000000000000000000000000000" + "0000000000000000000000000000000000000000000000000000000000e-128", + 134, 0x3ff0000000000000); + run_test(L"100000000000000000000000000000000000000000000000000000000000000000" + "0000000000000000000000000000000000000000000000000000000000000000000" + "0000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000e-256", + 262, 0x3ff0000000000000); + run_test(L"100000000000000000000000000000000000000000000000000000000000000000" + "0000000000000000000000000000000000000000000000000000000000000000000" + "0000000000000000000000000000000000000000000000000000000000000000000" + "0000000000000000000000000000000000000000000000000000000000000000000" + "0000000000000000000000000000000000000000000000000000000000000000000" + "0000000000000000000000000000000000000000000000000000000000000000000" + "0000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000e-512", + 518, 0x3ff0000000000000); + run_test( + L"10000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000e-1024", + 1031, 0x3ff0000000000000); + run_test( + L"0" + "100000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "00000000000000000e-1024", + 1032, 0x3ff0000000000000); +} + +TEST_F(LlvmLibcWcstodTest, FuzzFailures) { + run_test(L"-\xff\xff\xff\xff\xff\xff\xff\x01", 0, uint64_t(0)); + run_test(L"-.????", 0, uint64_t(0)); + run_test( + L"44444444444444444444444444444444444444444444444444A44444444444444444" + "44444444444*\x99\xff\xff\xff\xff", + 50, uint64_t(0x4a3e68fdd0e0b2d8)); + run_test(L"-NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNKNNNNNNNNNNNNNNNNNN?" + "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN?", + 0, uint64_t(0)); + run_test(L"0x.666E40", 9, uint64_t(0x3fd99b9000000000)); + + // glibc version 2.36 and higher (not tested with lower versions) disagrees + // with this result, but ours is correct for the nearest rounding mode. See + // this bug: https://sourceware.org/bugzilla/show_bug.cgi?id=30220 + run_test(L"0x30000002222225p-1077", 22, uint64_t(0x0006000000444445), ERANGE); + + // This value triggered a bug by having an exponent exactly equal to the + // maximum. The overflow checks would accept a value less than the max value + // as valid and greater than the max value as invalid (and set it to the max), + // but an exponent of exactly max value hit the else condition which is + // intended for underflow and set the exponent to the min exponent. + run_test( + L"18477446000000000000000000000000000005230000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000935166201543003765631683711878842" + "388777446000000000000430037600000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000005238581124701719460000000" + "000000000017194600000000000000000070046000000000000000000000000100000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000002000000000000000" + "000000000000056316837118788423887774460000000000000000000000000000052385" + "811247017194600000000000000000171946000000000000000000700460000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000002000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000523858112470171946000000" + "000000000001719460000000000000000007004600000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "0200000000000000000E608", + 1462, uint64_t(0x7ff0000000000000), ERANGE); + + // Same as above but for hex. + run_test(L"0x0164810157p2047", 17, uint64_t(0x7ff0000000000000), ERANGE); + + // This test ensures that only the correct number of characters is accepted. + // An exponent symbol followed by a sign isn't a valid exponent. + run_test(L"2e+", 1, uint64_t(0x4000000000000000)); + run_test(L"0x2p+", 3, uint64_t(0x4000000000000000)); + + // This bug was in the handling of very large exponents in the exponent + // marker. Previously anything greater than 10,000 would be set to 10,000. + // This caused incorrect behavior if there were more than 10,000 '0's in the + // input number, and then a correspondingly large exponent. This test case has + // 24,744 zeroes. + run_test( + L"0x." + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000fp551615", + 24755, uint64_t(0x7ff0000000000000), ERANGE); +} diff --git a/libc/test/src/wchar/wcstold_test.cpp b/libc/test/src/wchar/wcstold_test.cpp new file mode 100644 index 0000000000000..3a7fdfce3e732 --- /dev/null +++ b/libc/test/src/wchar/wcstold_test.cpp @@ -0,0 +1,262 @@ +//===-- Unittests for wcstold ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/uint128.h" +#include "src/wchar/wcstold.h" + +#include "test/UnitTest/ErrnoCheckingTest.h" +#include "test/UnitTest/Test.h" + +#include + +#if defined(LIBC_TYPES_LONG_DOUBLE_IS_FLOAT64) +#define SELECT_CONST(val, _, __) val +#elif defined(LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80) +#define SELECT_CONST(_, val, __) val +#elif defined(LIBC_TYPES_LONG_DOUBLE_IS_FLOAT128) +#define SELECT_CONST(_, __, val) val +#else +#error "Unknown long double type" +#endif + +class LlvmLibcWcstoldTest : public LIBC_NAMESPACE::testing::ErrnoCheckingTest { +public: +#if defined(LIBC_TYPES_LONG_DOUBLE_IS_FLOAT64) + void run_test(const wchar_t *inputString, const ptrdiff_t expectedStrLen, + const uint64_t expectedRawData, const int expectedErrno = 0) +#else + void run_test(const wchar_t *inputString, const ptrdiff_t expectedStrLen, + const UInt128 expectedRawData, const int expectedErrno = 0) +#endif + { + // expectedRawData64 is the expected long double result as a uint64_t, + // organized according to the IEEE754 double precision format: + // + // +-- 1 Sign Bit +-- 52 Mantissa bits + // | | + // | +-------------------------+------------------------+ + // | | | + // SEEEEEEEEEEEMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM + // | | + // +----+----+ + // | + // +-- 11 Exponent Bits + + // expectedRawData80 is the expected long double result as a UInt128, + // organized according to the x86 extended precision format: + // + // +-- 1 Sign Bit + // | + // | +-- 1 Integer part bit (1 unless this is a subnormal) + // | | + // SEEEEEEEEEEEEEEEIMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM...M + // | | | | + // +------+------+ +---------------------------+--------------------------+ + // | | + // +-- 15 Exponent Bits +-- 63 Mantissa bits + + // expectedRawData128 is the expected long double result as a UInt128, + // organized according to IEEE754 quadruple precision format: + // + // +-- 1 Sign Bit +-- 112 Mantissa bits + // | | + // | +----------------------------+--------------------------+ + // | | | + // SEEEEEEEEEEEEEEEMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM...M + // | | + // +------+------+ + // | + // +-- 15 Exponent Bits + wchar_t *str_end = nullptr; + + using FPBits = LIBC_NAMESPACE::fputil::FPBits; + FPBits expected_fp = + FPBits(static_cast(expectedRawData)); + const int expected_errno = expectedErrno; + + long double result = LIBC_NAMESPACE::wcstold(inputString, &str_end); + + LIBC_NAMESPACE::fputil::FPBits actual_fp = + LIBC_NAMESPACE::fputil::FPBits(); + actual_fp = LIBC_NAMESPACE::fputil::FPBits(result); + + EXPECT_EQ(str_end - inputString, expectedStrLen); + + EXPECT_EQ(actual_fp.uintval(), expected_fp.uintval()); + EXPECT_EQ(actual_fp.is_neg(), expected_fp.is_neg()); + EXPECT_EQ(actual_fp.get_exponent(), expected_fp.get_exponent()); + EXPECT_EQ(actual_fp.get_mantissa(), expected_fp.get_mantissa()); + ASSERT_ERRNO_EQ(expected_errno); + } +}; + +TEST_F(LlvmLibcWcstoldTest, SimpleTest) { + run_test(L"123", 3, + SELECT_CONST(uint64_t(0x405ec00000000000), + UInt128(0x4005f60000) << 40, + UInt128(0x4005ec0000000000) << 64)); + + // This should fail on Eisel-Lemire, forcing a fallback to simple decimal + // conversion. + run_test(L"12345678901234549760", 20, + SELECT_CONST(uint64_t(0x43e56a95319d63d8), + (UInt128(0x403eab54a9) << 40) + UInt128(0x8ceb1ec400), + (UInt128(0x403e56a95319d63d) << 64) + + UInt128(0x8800000000000000))); + + // Found while looking for difficult test cases here: + // https://github.com/nigeltao/parse-number-fxx-test-data/blob/main/more-test-cases/golang-org-issue-36657.txt + run_test(L"1090544144181609348835077142190", 31, + SELECT_CONST(uint64_t(0x462b8779f2474dfb), + (UInt128(0x4062dc3bcf) << 40) + UInt128(0x923a6fd402), + (UInt128(0x4062b8779f2474df) << 64) + + UInt128(0xa804bfd8c6d5c000))); + + run_test(L"0x123", 5, + SELECT_CONST(uint64_t(0x4072300000000000), + (UInt128(0x4007918000) << 40), + (UInt128(0x4007230000000000) << 64))); +} + +// These are tests that have caused problems for doubles in the past. +TEST_F(LlvmLibcWcstoldTest, Float64SpecificFailures) { + run_test(L"3E70000000000000", 16, + SELECT_CONST(uint64_t(0x7FF0000000000000), + (UInt128(0x7fff800000) << 40), + (UInt128(0x7fff000000000000) << 64)), + ERANGE); + run_test(L"358416272e-33", 13, + SELECT_CONST(uint64_t(0x3adbbb2a68c9d0b9), + (UInt128(0x3fadddd953) << 40) + UInt128(0x464e85c400), + (UInt128(0x3fadbbb2a68c9d0b) << 64) + + UInt128(0x8800e7969e1c5fc8))); + run_test(L"2.16656806400000023841857910156251e9", 36, + SELECT_CONST(uint64_t(0x41e0246690000001), + (UInt128(0x401e812334) << 40) + UInt128(0x8000000400), + (UInt128(0x401e024669000000) << 64) + + UInt128(0x800000000000018))); + run_test(L"27949676547093071875", 20, + SELECT_CONST(uint64_t(0x43f83e132bc608c9), + (UInt128(0x403fc1f099) << 40) + UInt128(0x5e30464402), + (UInt128(0x403f83e132bc608c) << 64) + + UInt128(0x8803000000000000))); +} + +TEST_F(LlvmLibcWcstoldTest, Float80SpecificFailures) { + run_test(L"777777777777777777777777777777777777777777777777777777777777777777" + "7777777777777777777777777777777777", + 100, + SELECT_CONST(uint64_t(0x54ac729b8fcaf734), + (UInt128(0x414ae394dc) << 40) + UInt128(0x7e57b9a0c2), + (UInt128(0x414ac729b8fcaf73) << 64) + + UInt128(0x4184a3d793224129))); +} + +TEST_F(LlvmLibcWcstoldTest, MaxSizeNumbers) { + run_test(L"1.1897314953572317650e4932", 26, + SELECT_CONST(uint64_t(0x7FF0000000000000), + (UInt128(0x7ffeffffff) << 40) + UInt128(0xffffffffff), + (UInt128(0x7ffeffffffffffff) << 64) + + UInt128(0xfffd57322e3f8675)), + SELECT_CONST(ERANGE, 0, 0)); + run_test(L"1.18973149535723176508e4932", 27, + SELECT_CONST(uint64_t(0x7FF0000000000000), + (UInt128(0x7fff800000) << 40), + (UInt128(0x7ffeffffffffffff) << 64) + + UInt128(0xffffd2478338036c)), + SELECT_CONST(ERANGE, ERANGE, 0)); +} + +// These tests check subnormal behavior for 80 bit and 128 bit floats. They will +// be too small for 64 bit floats. +TEST_F(LlvmLibcWcstoldTest, SubnormalTests) { + run_test(L"1e-4950", 7, + SELECT_CONST(uint64_t(0), (UInt128(0x00000000000000000003)), + (UInt128(0x000000000000000000057c9647e1a018))), + ERANGE); + run_test(L"1.89e-4951", 10, + SELECT_CONST(uint64_t(0), (UInt128(0x00000000000000000001)), + (UInt128(0x0000000000000000000109778a006738))), + ERANGE); + run_test(L"4e-4966", 7, + SELECT_CONST(uint64_t(0), (UInt128(0)), + (UInt128(0x00000000000000000000000000000001))), + ERANGE); +} + +TEST_F(LlvmLibcWcstoldTest, SmallNormalTests) { + run_test(L"3.37e-4932", 10, + SELECT_CONST( + uint64_t(0), (UInt128(0x1804cf7) << 40) + UInt128(0x908850712), + (UInt128(0x10099ee12110a) << 64) + UInt128(0xe24b75c0f50dc0c)), + SELECT_CONST(ERANGE, 0, 0)); +} + +TEST_F(LlvmLibcWcstoldTest, ComplexHexadecimalTests) { + run_test(L"0x1p16383", 9, + SELECT_CONST(0x7ff0000000000000, (UInt128(0x7ffe800000) << 40), + (UInt128(0x7ffe000000000000) << 64)), + SELECT_CONST(ERANGE, 0, 0)); + run_test(L"0x123456789abcdef", 17, + SELECT_CONST(0x43723456789abcdf, + (UInt128(0x403791a2b3) << 40) + UInt128(0xc4d5e6f780), + (UInt128(0x403723456789abcd) << 64) + + UInt128(0xef00000000000000))); + run_test(L"0x123456789abcdef0123456789ABCDEF", 33, + SELECT_CONST(0x47723456789abcdf, + (UInt128(0x407791a2b3) << 40) + UInt128(0xc4d5e6f781), + (UInt128(0x407723456789abcd) << 64) + + UInt128(0xef0123456789abce))); +} + +TEST_F(LlvmLibcWcstoldTest, InfTests) { + run_test(L"INF", 3, + SELECT_CONST(0x7ff0000000000000, (UInt128(0x7fff800000) << 40), + (UInt128(0x7fff000000000000) << 64))); + run_test(L"INFinity", 8, + SELECT_CONST(0x7ff0000000000000, (UInt128(0x7fff800000) << 40), + (UInt128(0x7fff000000000000) << 64))); + run_test(L"-inf", 4, + SELECT_CONST(0xfff0000000000000, (UInt128(0xffff800000) << 40), + (UInt128(0xffff000000000000) << 64))); +} + +TEST_F(LlvmLibcWcstoldTest, NaNTests) { + run_test(L"NaN", 3, + SELECT_CONST(0x7ff8000000000000, (UInt128(0x7fffc00000) << 40), + (UInt128(0x7fff800000000000) << 64))); + run_test(L"-nAn", 4, + SELECT_CONST(0xfff8000000000000, (UInt128(0xffffc00000) << 40), + (UInt128(0xffff800000000000) << 64))); + run_test(L"NaN()", 5, + SELECT_CONST(0x7ff8000000000000, (UInt128(0x7fffc00000) << 40), + (UInt128(0x7fff800000000000) << 64))); + run_test(L"NaN(1234)", 9, + SELECT_CONST(0x7ff80000000004d2, + (UInt128(0x7fffc00000) << 40) + UInt128(0x4d2), + (UInt128(0x7fff800000000000) << 64) + UInt128(0x4d2))); + run_test(L"NaN(0xffffffffffff)", 19, + SELECT_CONST(0x7ff8ffffffffffff, + (UInt128(0x7fffc000ff) << 40) + UInt128(0xffffffffff), + (UInt128(0x7fff800000000000) << 64) + + UInt128(0xffffffffffff))); + run_test(L"NaN(0xfffffffffffff)", 20, + SELECT_CONST(0x7fffffffffffffff, + (UInt128(0x7fffc00fff) << 40) + UInt128(0xffffffffff), + (UInt128(0x7fff800000000000) << 64) + + UInt128(0xfffffffffffff))); + run_test(L"NaN(0xffffffffffffffff)", 23, + SELECT_CONST(0x7fffffffffffffff, + (UInt128(0x7fffffffff) << 40) + UInt128(0xffffffffff), + (UInt128(0x7fff800000000000) << 64) + + UInt128(0xffffffffffffffff))); + run_test(L"NaN( 1234)", 3, + SELECT_CONST(0x7ff8000000000000, (UInt128(0x7fffc00000) << 40), + (UInt128(0x7fff800000000000) << 64))); +} diff --git a/libcxx/docs/VendorDocumentation.rst b/libcxx/docs/VendorDocumentation.rst index 15677c7428263..b14c7a70aee04 100644 --- a/libcxx/docs/VendorDocumentation.rst +++ b/libcxx/docs/VendorDocumentation.rst @@ -81,12 +81,14 @@ CMake invocation at ``/llvm``: .. code-block:: bash $ mkdir build - $ cmake -G Ninja -S llvm -B build -DLLVM_ENABLE_PROJECTS="clang" \ # Configure - -DLLVM_ENABLE_RUNTIMES="libcxx;libcxxabi;libunwind" \ - -DLLVM_RUNTIME_TARGETS="" - $ ninja -C build runtimes # Build - $ ninja -C build check-runtimes # Test - $ ninja -C build install-runtimes # Install + $ cmake -G Ninja -S llvm -B build \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DLLVM_ENABLE_PROJECTS="clang" \ # Configure + -DLLVM_ENABLE_RUNTIMES="libcxx;libcxxabi;libunwind;compiler-rt" \ + -DLLVM_RUNTIME_TARGETS="" + $ ninja -C build runtimes # Build + $ ninja -C build check-runtimes # Test + $ ninja -C build install-runtimes # Install .. note:: - This type of build is also commonly called a "Runtimes build", but we would like to move diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table index e1897949a47e6..ef487fb06dd5e 100644 --- a/libcxx/include/__hash_table +++ b/libcxx/include/__hash_table @@ -1910,6 +1910,8 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __first, const_it __bucket_list_[__next_chash] = __before_first; __chash = __next_chash; } + } else { // When __next is a nullptr we've fully erased the last bucket. Update the bucket list accordingly. + __bucket_list_[__chash] = nullptr; } } diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.pass.cpp index 532413437f6be..81371638143c9 100644 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.pass.cpp @@ -57,6 +57,28 @@ int main(int, char**) { assert(c.size() == 0); assert(k == c.end()); } + { // Make sure that we're properly updating the bucket list when we're erasing to the end + std::unordered_map m; + m.insert(std::make_pair(1, 1)); + m.insert(std::make_pair(2, 2)); + + { + auto pair = m.equal_range(1); + assert(pair.first != pair.second); + m.erase(pair.first, pair.second); + } + + { + auto pair = m.equal_range(2); + assert(pair.first != pair.second); + m.erase(pair.first, pair.second); + } + + m.insert(std::make_pair(3, 3)); + assert(m.size() == 1); + assert(*m.begin() == std::make_pair(3, 3)); + assert(++m.begin() == m.end()); + } #if TEST_STD_VER >= 11 { typedef std::unordered_map m; + m.insert(std::make_pair(1, 1)); + m.insert(std::make_pair(2, 2)); + + { + auto pair = m.equal_range(1); + assert(pair.first != pair.second); + m.erase(pair.first, pair.second); + } + + { + auto pair = m.equal_range(2); + assert(pair.first != pair.second); + m.erase(pair.first, pair.second); + } + + m.insert(std::make_pair(3, 3)); + assert(m.size() == 1); + assert(*m.begin() == std::make_pair(3, 3)); + assert(++m.begin() == m.end()); + } #if TEST_STD_VER >= 11 { typedef std::unordered_multimap m; + m.insert(1); + m.insert(2); + + { + auto pair = m.equal_range(1); + assert(pair.first != pair.second); + m.erase(pair.first, pair.second); + } + + { + auto pair = m.equal_range(2); + assert(pair.first != pair.second); + m.erase(pair.first, pair.second); + } + + m.insert(3); + assert(m.size() == 1); + assert(*m.begin() == 3); + assert(++m.begin() == m.end()); + } #if TEST_STD_VER >= 11 { typedef std::unordered_multiset, std::equal_to, min_allocator> C; diff --git a/libcxx/test/std/containers/unord/unord.set/erase_range.pass.cpp b/libcxx/test/std/containers/unord/unord.set/erase_range.pass.cpp index 5fa6e4199f756..1f049a295b8c3 100644 --- a/libcxx/test/std/containers/unord/unord.set/erase_range.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.set/erase_range.pass.cpp @@ -47,6 +47,28 @@ int main(int, char**) { assert(c.size() == 0); assert(k == c.end()); } + { // Make sure that we're properly updating the bucket list when we're erasing to the end + std::unordered_set m; + m.insert(1); + m.insert(2); + + { + auto pair = m.equal_range(1); + assert(pair.first != pair.second); + m.erase(pair.first, pair.second); + } + + { + auto pair = m.equal_range(2); + assert(pair.first != pair.second); + m.erase(pair.first, pair.second); + } + + m.insert(3); + assert(m.size() == 1); + assert(*m.begin() == 3); + assert(++m.begin() == m.end()); + } #if TEST_STD_VER >= 11 { typedef std::unordered_set, std::equal_to, min_allocator> C; diff --git a/libcxx/test/std/input.output/file.streams/c.files/gets.compile.fail.cpp b/libcxx/test/std/input.output/file.streams/c.files/gets-removed.verify.cpp similarity index 70% rename from libcxx/test/std/input.output/file.streams/c.files/gets.compile.fail.cpp rename to libcxx/test/std/input.output/file.streams/c.files/gets-removed.verify.cpp index 1a92cc925e2aa..281ef37e92d27 100644 --- a/libcxx/test/std/input.output/file.streams/c.files/gets.compile.fail.cpp +++ b/libcxx/test/std/input.output/file.streams/c.files/gets-removed.verify.cpp @@ -7,15 +7,11 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11 -// test -// gets +// Verify that std::gets has been removed in C++14 and later #include -int main(int, char**) -{ - (void) std::gets((char *) NULL); - - return 0; +void f(char const* str) { + (void)std::gets(str); // expected-error {{no member named 'gets' in namespace 'std'}} } diff --git a/libcxx/test/std/re/re.iter/re.tokiter/re.tokiter.cnstr/array.compile.fail.cpp b/libcxx/test/std/re/re.iter/re.tokiter/re.tokiter.cnstr/array.compile.fail.cpp deleted file mode 100644 index a03fd52c03562..0000000000000 --- a/libcxx/test/std/re/re.iter/re.tokiter/re.tokiter.cnstr/array.compile.fail.cpp +++ /dev/null @@ -1,40 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// class regex_iterator - -// template -// regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, -// const regex_type&& re, -// const int (&submatches)[N], -// regex_constants::match_flag_type m = -// regex_constants::match_default); - -#include -#include -#include -#include "test_macros.h" - -#if TEST_STD_VER < 14 -#error -#endif - -int main(int, char**) -{ - { - std::regex phone_numbers("\\d{3}-(\\d{4})"); - const char phone_book[] = "start 555-1234, 555-2345, 555-3456 end"; - const int indices[] = {-1, 0, 1}; - std::cregex_token_iterator i(std::begin(phone_book), std::end(phone_book)-1, - std::regex("\\d{3}-\\d{4}"), indices); - } - - return 0; -} diff --git a/libcxx/test/std/re/re.iter/re.tokiter/re.tokiter.cnstr/init.compile.fail.cpp b/libcxx/test/std/re/re.iter/re.tokiter/re.tokiter.cnstr/init.compile.fail.cpp deleted file mode 100644 index b6913e6b32d12..0000000000000 --- a/libcxx/test/std/re/re.iter/re.tokiter/re.tokiter.cnstr/init.compile.fail.cpp +++ /dev/null @@ -1,37 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// class regex_iterator - -// regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, -// const regex_type&& re, -// initializer_list submatches, -// regex_constants::match_flag_type m = -// regex_constants::match_default); - -#include -#include -#include "test_macros.h" - -#if TEST_STD_VER < 14 -#error -#endif - -int main(int, char**) -{ - { - std::regex phone_numbers("\\d{3}-(\\d{4})"); - const char phone_book[] = "start 555-1234, 555-2345, 555-3456 end"; - std::cregex_token_iterator i(std::begin(phone_book), std::end(phone_book)-1, - std::regex("\\d{3}-\\d{4}"), {-1, 0, 1}); - } - - return 0; -} diff --git a/libcxx/test/std/re/re.iter/re.tokiter/re.tokiter.cnstr/int.compile.fail.cpp b/libcxx/test/std/re/re.iter/re.tokiter/re.tokiter.cnstr/int.compile.fail.cpp deleted file mode 100644 index 3c39d4983e26c..0000000000000 --- a/libcxx/test/std/re/re.iter/re.tokiter/re.tokiter.cnstr/int.compile.fail.cpp +++ /dev/null @@ -1,36 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// class regex_iterator - -// regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, -// const regex_type&& re, int submatch = 0, -// regex_constants::match_flag_type m = -// regex_constants::match_default); - -#include -#include -#include "test_macros.h" - -#if TEST_STD_VER < 14 -#error -#endif - -int main(int, char**) -{ - { - std::regex phone_numbers("\\d{3}-\\d{4}"); - const char phone_book[] = "start 555-1234, 555-2345, 555-3456 end"; - std::cregex_token_iterator i(std::begin(phone_book), std::end(phone_book)-1, - std::regex("\\d{3}-\\d{4}"), -1); - } - - return 0; -} diff --git a/libcxx/test/std/re/re.iter/re.tokiter/re.tokiter.cnstr/temporary-objects.verify.cpp b/libcxx/test/std/re/re.iter/re.tokiter/re.tokiter.cnstr/temporary-objects.verify.cpp new file mode 100644 index 0000000000000..b1ab0f337de2f --- /dev/null +++ b/libcxx/test/std/re/re.iter/re.tokiter/re.tokiter.cnstr/temporary-objects.verify.cpp @@ -0,0 +1,72 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11 + +// Ensure that we don't allow iterators into temporary std::regex objects. + +// +// +// class regex_iterator +// +// regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, +// const regex_type&& re, int submatch = 0, +// regex_constants::match_flag_type m = +// regex_constants::match_default); +// +// template +// regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, +// const regex_type&& re, +// const int (&submatches)[N], +// regex_constants::match_flag_type m = +// regex_constants::match_default); +// +// regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, +// const regex_type&& re, +// initializer_list submatches, +// regex_constants::match_flag_type m = +// regex_constants::match_default); +// +// template +// regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, +// const regex_type&& re, +// const std::vector& submatches, +// regex_constants::match_flag_type m = +// regex_constants::match_default); + +#include +#include +#include + +void f() { + std::regex phone_numbers("\\d{3}-\\d{4}"); + const char phone_book[] = "start 555-1234, 555-2345, 555-3456 end"; + + { // int submatch + std::cregex_token_iterator i(std::begin(phone_book), std::end(phone_book) - 1, std::regex("\\d{3}-\\d{4}"), -1); + // expected-error@-1 {{call to deleted constructor of 'std::cregex_token_iterator'}} + } + { // const int (&submatches)[N] + const int indices[] = {-1, 0, 1}; + std::cregex_token_iterator i( + std::begin(phone_book), std::end(phone_book) - 1, std::regex("\\d{3}-\\d{4}"), indices); + // expected-error@-2 {{call to deleted constructor of 'std::cregex_token_iterator'}} + } + { // initializer_list submatches + std::cregex_token_iterator i( + std::begin(phone_book), std::end(phone_book) - 1, std::regex("\\d{3}-\\d{4}"), {-1, 0, 1}); + // expected-error@-2 {{call to deleted constructor of 'std::cregex_token_iterator'}} + } + { // const std::vector& submatches + std::vector v; + v.push_back(-1); + v.push_back(-1); + std::cregex_token_iterator i(std::begin(phone_book), std::end(phone_book) - 1, std::regex("\\d{3}-\\d{4}"), v); + // expected-error@-1 {{call to deleted constructor of 'std::cregex_token_iterator'}} + } +} diff --git a/libcxx/test/std/re/re.iter/re.tokiter/re.tokiter.cnstr/vector.compile.fail.cpp b/libcxx/test/std/re/re.iter/re.tokiter/re.tokiter.cnstr/vector.compile.fail.cpp deleted file mode 100644 index 9b07df9d1a783..0000000000000 --- a/libcxx/test/std/re/re.iter/re.tokiter/re.tokiter.cnstr/vector.compile.fail.cpp +++ /dev/null @@ -1,41 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// class regex_iterator - -// template -// regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, -// const regex_type&& re, -// const std::vector& submatches, -// regex_constants::match_flag_type m = -// regex_constants::match_default); - -#include -#include -#include "test_macros.h" - -#if TEST_STD_VER < 14 -#error -#endif - -int main(int, char**) -{ - { - std::regex phone_numbers("\\d{3}-(\\d{4})"); - const char phone_book[] = "start 555-1234, 555-2345, 555-3456 end"; - std::vector v; - v.push_back(-1); - v.push_back(-1); - std::cregex_token_iterator i(std::begin(phone_book), std::end(phone_book)-1, - std::regex("\\d{3}-\\d{4}"), v); - } - - return 0; -} diff --git a/libcxx/utils/ci/run-buildbot b/libcxx/utils/ci/run-buildbot index d265dddebe11f..7442361627104 100755 --- a/libcxx/utils/ci/run-buildbot +++ b/libcxx/utils/ci/run-buildbot @@ -366,11 +366,12 @@ bootstrapping-build) -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX="${INSTALL_DIR}" \ -DLLVM_ENABLE_PROJECTS="clang;lldb" \ - -DLLVM_ENABLE_RUNTIMES="libcxx;libcxxabi;libunwind" \ + -DLLVM_ENABLE_RUNTIMES="libcxx;libcxxabi;libunwind;compiler-rt" \ -DLLVM_RUNTIME_TARGETS="$(${CXX} --print-target-triple)" \ -DLLVM_HOST_TRIPLE="$(${CXX} --print-target-triple)" \ -DLLVM_TARGETS_TO_BUILD="host" \ -DRUNTIMES_BUILD_ALLOW_DARWIN=ON \ + -DCOMPILER_RT_INCLUDE_TESTS=OFF \ -DLLVM_ENABLE_ASSERTIONS=ON \ -DLLVM_LIT_ARGS="-sv --xunit-xml-output test-results.xml --timeout=1500 --time-tests" diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 9a70c0d19c41d..19b08152ae081 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -2747,9 +2747,9 @@ RelroPaddingSection::RelroPaddingSection(Ctx &ctx) : SyntheticSection(ctx, ".relro_padding", SHT_NOBITS, SHF_ALLOC | SHF_WRITE, 1) {} -PaddingSection::PaddingSection(Ctx &ctx, uint64_t size, OutputSection *parent) - : SyntheticSection(ctx, ".padding", SHT_PROGBITS, SHF_ALLOC, 1), - size(size) { +PaddingSection::PaddingSection(Ctx &ctx, uint64_t amount, OutputSection *parent) + : SyntheticSection(ctx, ".padding", SHT_PROGBITS, SHF_ALLOC, 1) { + size = amount; this->parent = parent; } diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index 38e68110e4bc0..66c866d7e8cde 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -78,8 +78,6 @@ class EhFrameSection final : public SyntheticSection { // allocating one for each EhInputSection. llvm::DenseMap offsetToCie; - uint64_t size = 0; - template void addRecords(EhInputSection *s); template void iterateFDEWithLSDAAux(EhInputSection &sec, @@ -127,7 +125,6 @@ class GotSection final : public SyntheticSection { protected: size_t numEntries = 0; uint32_t tlsIndexOff = -1; - uint64_t size = 0; struct AuthEntryInfo { size_t offset; bool isSymbolFunc; @@ -182,7 +179,6 @@ class BssSection final : public SyntheticSection { static bool classof(const SectionBase *s) { return isa(s) && cast(s)->bss; } - uint64_t size; }; class MipsGotSection final : public SyntheticSection { @@ -312,8 +308,6 @@ class MipsGotSection final : public SyntheticSection { // Number of "Header" entries. static const unsigned headerEntriesNum = 2; - uint64_t size = 0; - // Symbol and addend. using GotEntry = std::pair; @@ -407,8 +401,6 @@ class StringTableSection final : public SyntheticSection { private: const bool dynamic; - uint64_t size = 0; - llvm::DenseMap stringMap; SmallVector strings; }; @@ -475,7 +467,6 @@ template class DynamicSection final : public SyntheticSection { private: std::vector> computeContents(); - uint64_t size = 0; }; class RelocationBaseSection : public SyntheticSection { @@ -780,10 +771,8 @@ class RelroPaddingSection final : public SyntheticSection { }; class PaddingSection final : public SyntheticSection { - uint64_t size; - public: - PaddingSection(Ctx &ctx, uint64_t size, OutputSection *parent); + PaddingSection(Ctx &ctx, uint64_t amount, OutputSection *parent); size_t getSize() const override { return size; } void writeTo(uint8_t *buf) override; }; diff --git a/lld/MachO/SectionPriorities.cpp b/lld/MachO/SectionPriorities.cpp index cf657aad5d145..b652d1ee8325f 100644 --- a/lld/MachO/SectionPriorities.cpp +++ b/lld/MachO/SectionPriorities.cpp @@ -27,6 +27,7 @@ #include "llvm/Support/Path.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/xxhash.h" #include @@ -246,33 +247,45 @@ DenseMap CallGraphSort::run() { return orderMap; } -std::optional -macho::PriorityBuilder::getSymbolOrCStringPriority(const StringRef key, - InputFile *f) { +void macho::PriorityBuilder::SymbolPriorityEntry::setPriority( + int priority, StringRef objectFile) { + if (!objectFile.empty()) + objectFiles.try_emplace(objectFile, priority); + else + anyObjectFile = std::min(anyObjectFile, priority); +} - auto it = priorities.find(key); - if (it == priorities.end()) - return std::nullopt; - const SymbolPriorityEntry &entry = it->second; +int macho::PriorityBuilder::SymbolPriorityEntry::getPriority( + const InputFile *f) const { if (!f) - return entry.anyObjectFile; + return anyObjectFile; // We don't use toString(InputFile *) here because it returns the full path // for object files, and we only want the basename. - StringRef filename; - if (f->archiveName.empty()) - filename = path::filename(f->getName()); - else - filename = saver().save(path::filename(f->archiveName) + "(" + - path::filename(f->getName()) + ")"); - return std::min(entry.objectFiles.lookup(filename), entry.anyObjectFile); + StringRef basename = path::filename(f->getName()); + StringRef filename = + f->archiveName.empty() + ? basename + : saver().save(path::filename(f->archiveName) + "(" + basename + ")"); + return std::min(objectFiles.lookup(filename), anyObjectFile); } std::optional -macho::PriorityBuilder::getSymbolPriority(const Defined *sym) { +macho::PriorityBuilder::getCStringPriority(uint32_t hash, + const InputFile *f) const { + auto it = cStringPriorities.find(hash); + if (it == cStringPriorities.end()) + return std::nullopt; + return it->second.getPriority(f); +} + +std::optional +macho::PriorityBuilder::getSymbolPriority(const Defined *sym) const { if (sym->isAbsolute()) return std::nullopt; - return getSymbolOrCStringPriority(utils::getRootSymbol(sym->getName()), - sym->isec()->getFile()); + auto it = priorities.find(utils::getRootSymbol(sym->getName())); + if (it == priorities.end()) + return std::nullopt; + return it->second.getPriority(sym->isec()->getFile()); } void macho::PriorityBuilder::extractCallGraphProfile() { @@ -307,7 +320,7 @@ void macho::PriorityBuilder::parseOrderFile(StringRef path) { int prio = std::numeric_limits::min(); MemoryBufferRef mbref = *buffer; for (StringRef line : args::getLines(mbref)) { - StringRef objectFile, symbolOrCStrHash; + StringRef objectFile; line = line.take_until([](char c) { return c == '#'; }); // ignore comments line = line.ltrim(); @@ -338,22 +351,16 @@ void macho::PriorityBuilder::parseOrderFile(StringRef path) { } // The rest of the line is either or - // CStringEntryPrefix + // cStringEntryPrefix line = line.trim(); - if (line.starts_with(CStringEntryPrefix)) { - StringRef possibleHash = line.drop_front(CStringEntryPrefix.size()); + if (line.consume_front(cStringEntryPrefix)) { uint32_t hash = 0; - if (to_integer(possibleHash, hash)) - symbolOrCStrHash = possibleHash; - } else - symbolOrCStrHash = utils::getRootSymbol(line); - - if (!symbolOrCStrHash.empty()) { - SymbolPriorityEntry &entry = priorities[symbolOrCStrHash]; - if (!objectFile.empty()) - entry.objectFiles.insert(std::make_pair(objectFile, prio)); - else - entry.anyObjectFile = std::min(entry.anyObjectFile, prio); + if (to_integer(line, hash)) + cStringPriorities[hash].setPriority(prio, objectFile); + } else { + StringRef symbol = utils::getRootSymbol(line); + if (!symbol.empty()) + priorities[symbol].setPriority(prio, objectFile); } ++prio; @@ -405,40 +412,39 @@ macho::PriorityBuilder::buildInputSectionPriorities() { return sectionPriorities; } -std::vector macho::PriorityBuilder::buildCStringPriorities( - ArrayRef inputs) { - // Split the input strings into hold and cold sets. - // Order hot set based on -order_file_cstring for performance improvement; - // TODO: Order cold set of cstrings for compression via BP. - std::vector> - hotStringPrioritiesAndStringPieces; - std::vector coldStringPieces; - std::vector orderedStringPieces; - +void macho::PriorityBuilder::forEachStringPiece( + ArrayRef inputs, + std::function f, + bool forceInputOrder, bool computeHash) const { + std::vector> orderedPieces; + std::vector> unorderedPieces; for (CStringInputSection *isec : inputs) { for (const auto &[stringPieceIdx, piece] : llvm::enumerate(isec->pieces)) { if (!piece.live) continue; - - std::optional priority = getSymbolOrCStringPriority( - std::to_string(piece.hash), isec->getFile()); - if (!priority) - coldStringPieces.emplace_back(isec, stringPieceIdx); + // Process pieces in input order if we have no cstrings in our orderfile + if (forceInputOrder || cStringPriorities.empty()) { + f(*isec, piece, stringPieceIdx); + continue; + } + uint32_t hash = + computeHash + ? (xxh3_64bits(isec->getStringRef(stringPieceIdx)) & 0x7fffffff) + : piece.hash; + if (auto priority = getCStringPriority(hash, isec->getFile())) + orderedPieces.emplace_back(*priority, isec, stringPieceIdx); else - hotStringPrioritiesAndStringPieces.emplace_back( - *priority, std::make_pair(isec, stringPieceIdx)); + unorderedPieces.emplace_back(isec, stringPieceIdx); } } - - // Order hot set for perf - llvm::stable_sort(hotStringPrioritiesAndStringPieces); - for (auto &[priority, stringPiecePair] : hotStringPrioritiesAndStringPieces) - orderedStringPieces.push_back(stringPiecePair); - - // TODO: Order cold set for compression - - orderedStringPieces.insert(orderedStringPieces.end(), - coldStringPieces.begin(), coldStringPieces.end()); - - return orderedStringPieces; + if (orderedPieces.empty() && unorderedPieces.empty()) + return; + llvm::stable_sort(orderedPieces, [](const auto &left, const auto &right) { + return std::get<0>(left) < std::get<0>(right); + }); + for (auto &[priority, isec, pieceIdx] : orderedPieces) + f(*isec, isec->pieces[pieceIdx], pieceIdx); + // TODO: Add option to order the remaining cstrings for compression + for (auto &[isec, pieceIdx] : unorderedPieces) + f(*isec, isec->pieces[pieceIdx], pieceIdx); } diff --git a/lld/MachO/SectionPriorities.h b/lld/MachO/SectionPriorities.h index cc4e30fffc600..24d2dbc47e498 100644 --- a/lld/MachO/SectionPriorities.h +++ b/lld/MachO/SectionPriorities.h @@ -16,7 +16,6 @@ namespace lld::macho { using SectionPair = std::pair; -using StringPiecePair = std::pair; class PriorityBuilder { public: @@ -29,7 +28,7 @@ class PriorityBuilder { // // An order file has one entry per line, in the following format: // - // ::[ | CStringEntryPrefix ] + // ::[ | cStringEntryPrefix ] // // and are optional. // If not specified, then that entry tries to match either, @@ -42,7 +41,7 @@ class PriorityBuilder { // lowest-ordered entry (the one nearest to the front of the list.) // // or 2) any cstring literal with the given hash, if the entry has the - // CStringEntryPrefix prefix defined below in the file. is the + // cStringEntryPrefix prefix defined below in the file. is the // hash of cstring literal content. // // Cstring literals are not symbolized, we can't identify them by name @@ -54,6 +53,16 @@ class PriorityBuilder { // The file can also have line comments that start with '#'. void parseOrderFile(StringRef path); + /// Call \p f for each string piece in \p inputs. If there are any cstring + /// literals in the orderfile (and \p forceInputOrder is false) then string + /// pieces are ordered by the orderfile. \p computeHash must be set when + /// \p deduplicateLiterals is false because then the string piece hash is not + /// set. + void forEachStringPiece( + ArrayRef inputs, + std::function f, + bool forceInputOrder = false, bool computeHash = false) const; + // Returns layout priorities for some or all input sections. Sections are laid // out in decreasing order; that is, a higher priority section will be closer // to the beginning of its output section. @@ -66,8 +75,6 @@ class PriorityBuilder { // Each section gets assigned the priority of the highest-priority symbol it // contains. llvm::DenseMap buildInputSectionPriorities(); - std::vector - buildCStringPriorities(ArrayRef); private: // The symbol with the smallest priority should be ordered first in the output @@ -78,13 +85,16 @@ class PriorityBuilder { int anyObjectFile = 0; // The priority given to a matching symbol from a particular object file. llvm::DenseMap objectFiles; + void setPriority(int priority, StringRef objectFile); + int getPriority(const InputFile *f) const; }; - const llvm::StringRef CStringEntryPrefix = "CSTR;"; + const llvm::StringRef cStringEntryPrefix = "CSTR;"; - std::optional getSymbolPriority(const Defined *sym); - std::optional getSymbolOrCStringPriority(const StringRef key, - InputFile *f); + std::optional getSymbolPriority(const Defined *sym) const; + std::optional getCStringPriority(uint32_t hash, + const InputFile *f) const; llvm::DenseMap priorities; + llvm::DenseMap cStringPriorities; llvm::MapVector callGraphProfile; }; diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 187cccbe90dbc..fecc51f912b08 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -1721,26 +1721,24 @@ void CStringSection::writeTo(uint8_t *buf) const { // and don't need this alignment. They will be emitted at some arbitrary address // `A`, but ld64 will treat them as being 16-byte aligned with an offset of // `16 % A`. -static Align getStringPieceAlignment(const CStringInputSection *isec, +static Align getStringPieceAlignment(const CStringInputSection &isec, const StringPiece &piece) { - return llvm::Align(1ULL << llvm::countr_zero(isec->align | piece.inSecOff)); + return llvm::Align(1ULL << llvm::countr_zero(isec.align | piece.inSecOff)); } void CStringSection::finalizeContents() { size = 0; - // TODO: Call buildCStringPriorities() to support cstring ordering when - // deduplication is off, although this may negatively impact build - // performance. - for (CStringInputSection *isec : inputs) { - for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) { - if (!piece.live) - continue; - piece.outSecOff = alignTo(size, getStringPieceAlignment(isec, piece)); - StringRef string = isec->getStringRef(i); - size = piece.outSecOff + string.size() + 1; // account for null terminator - } + priorityBuilder.forEachStringPiece( + inputs, + [&](CStringInputSection &isec, StringPiece &piece, size_t pieceIdx) { + piece.outSecOff = alignTo(size, getStringPieceAlignment(isec, piece)); + StringRef string = isec.getStringRef(pieceIdx); + size = + piece.outSecOff + string.size() + 1; // account for null terminator + }, + /*forceInputOrder=*/false, /*computeHash=*/true); + for (CStringInputSection *isec : inputs) isec->isFinal = true; - } } void DeduplicatedCStringSection::finalizeContents() { @@ -1748,20 +1746,19 @@ void DeduplicatedCStringSection::finalizeContents() { DenseMap strToAlignment; // Used for tail merging only std::vector deduplicatedStrs; - for (const CStringInputSection *isec : inputs) { - for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) { - if (!piece.live) - continue; - auto s = isec->getCachedHashStringRef(i); - assert(isec->align != 0); - auto align = getStringPieceAlignment(isec, piece); - auto [it, wasInserted] = strToAlignment.try_emplace(s, align); - if (config->tailMergeStrings && wasInserted) - deduplicatedStrs.push_back(s); - if (!wasInserted && it->second < align) - it->second = align; - } - } + priorityBuilder.forEachStringPiece( + inputs, + [&](CStringInputSection &isec, StringPiece &piece, size_t pieceIdx) { + auto s = isec.getCachedHashStringRef(pieceIdx); + assert(isec.align != 0); + auto align = getStringPieceAlignment(isec, piece); + auto [it, wasInserted] = strToAlignment.try_emplace(s, align); + if (config->tailMergeStrings && wasInserted) + deduplicatedStrs.push_back(s); + if (!wasInserted && it->second < align) + it->second = align; + }, + /*forceInputOrder=*/true); // Like lexigraphical sort, except we read strings in reverse and take the // longest string first @@ -1801,9 +1798,10 @@ void DeduplicatedCStringSection::finalizeContents() { // Sort the strings for performance and compression size win, and then // assign an offset for each string and save it to the corresponding // StringPieces for easy access. - for (auto &[isec, i] : priorityBuilder.buildCStringPriorities(inputs)) { - auto &piece = isec->pieces[i]; - auto s = isec->getCachedHashStringRef(i); + priorityBuilder.forEachStringPiece(inputs, [&](CStringInputSection &isec, + StringPiece &piece, + size_t pieceIdx) { + auto s = isec.getCachedHashStringRef(pieceIdx); // Any string can be tail merged with itself with an offset of zero uint64_t tailMergeOffset = 0; auto mergeIt = @@ -1829,7 +1827,7 @@ void DeduplicatedCStringSection::finalizeContents() { stringOffsetMap[tailMergedString] = piece.outSecOff; assert(isAligned(strToAlignment.at(tailMergedString), piece.outSecOff)); } - } + }); for (CStringInputSection *isec : inputs) isec->isFinal = true; } diff --git a/lld/test/MachO/order-file-cstring.s b/lld/test/MachO/order-file-cstring.s index 3c6d2a377dc38..d6734308fffdf 100644 --- a/lld/test/MachO/order-file-cstring.s +++ b/lld/test/MachO/order-file-cstring.s @@ -4,32 +4,34 @@ # RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/test.s -o %t/test.o # RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/more-cstrings.s -o %t/more-cstrings.o -# RUN: %lld --deduplicate-strings -arch arm64 -lSystem -e _main -o %t/test-0 %t/test.o %t/more-cstrings.o +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/test-0 %t/test.o %t/more-cstrings.o # RUN: llvm-nm --numeric-sort --format=just-symbols %t/test-0 | FileCheck %s --check-prefix=ORIGIN_SYM # RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/test-0 | FileCheck %s --check-prefix=ORIGIN_SEC -# RUN: %lld --deduplicate-strings -arch arm64 -lSystem -e _main -o %t/test-1 %t/test.o %t/more-cstrings.o -order_file %t/ord-1 +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/test-1 %t/test.o %t/more-cstrings.o -order_file %t/ord-1 # RUN: llvm-nm --numeric-sort --format=just-symbols %t/test-1 | FileCheck %s --check-prefix=ONE_SYM # RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/test-1 | FileCheck %s --check-prefix=ONE_SEC +# RUN: %lld --no-deduplicate-strings -arch arm64 -lSystem -e _main -o %t/test-1-dup %t/test.o %t/more-cstrings.o -order_file %t/ord-1 +# RUN: llvm-nm --numeric-sort --format=just-symbols %t/test-1-dup | FileCheck %s --check-prefix=ONE_SYM +# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/test-1-dup | FileCheck %s --check-prefix=ONE_SEC -# RUN: %lld --deduplicate-strings -arch arm64 -lSystem -e _main -o %t/test-2 %t/test.o %t/more-cstrings.o -order_file %t/ord-2 +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/test-2 %t/test.o %t/more-cstrings.o -order_file %t/ord-2 # RUN: llvm-nm --numeric-sort --format=just-symbols %t/test-2 | FileCheck %s --check-prefix=TWO_SYM # RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/test-2 | FileCheck %s --check-prefix=TWO_SEC -# RUN: %lld --deduplicate-strings -arch arm64 -lSystem -e _main -o %t/test-3 %t/test.o %t/more-cstrings.o -order_file %t/ord-3 +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/test-3 %t/test.o %t/more-cstrings.o -order_file %t/ord-3 # RUN: llvm-nm --numeric-sort --format=just-symbols %t/test-3 | FileCheck %s --check-prefix=THREE_SYM # RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/test-3 | FileCheck %s --check-prefix=THREE_SEC -# RUN: %lld --deduplicate-strings -arch arm64 -lSystem -e _main -o %t/test-4 %t/test.o %t/more-cstrings.o -order_file %t/ord-4 +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/test-4 %t/test.o %t/more-cstrings.o -order_file %t/ord-4 # RUN: llvm-nm --numeric-sort --format=just-symbols %t/test-4 | FileCheck %s --check-prefix=FOUR_SYM # RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/test-4 | FileCheck %s --check-prefix=FOUR_SEC # RUN: llvm-readobj --string-dump=__cstring %t/test-4 | FileCheck %s --check-prefix=FOUR_SEC_ESCAPE - # We expect: -# 1) Covered cstring symbols are reordered -# 2) the rest of the cstring symbols remain original relative order within the cstring section +# 1) Covered cstring symbols to be reordered +# 2) the rest of the cstring symbols remain in the original relative order within the cstring section # ORIGIN_SYM: _local_foo1 # ORIGIN_SYM: _globl_foo2 @@ -58,8 +60,8 @@ CSTR;1496286555 #foo3 CSTR;1343999025 -# ONE_SYM: _globl_foo2 -# ONE_SYM: _local_foo2 +# ONE_SYM-DAG: _globl_foo2 +# ONE_SYM-DAG: _local_foo2 # ONE_SYM: _bar # ONE_SYM: _bar2 # ONE_SYM: _globl_foo3 diff --git a/lldb/bindings/python/CMakeLists.txt b/lldb/bindings/python/CMakeLists.txt index 28a8af8f06319..2ebcf5a8e7aca 100644 --- a/lldb/bindings/python/CMakeLists.txt +++ b/lldb/bindings/python/CMakeLists.txt @@ -60,8 +60,10 @@ endfunction() function(finish_swig_python swig_target lldb_python_bindings_dir lldb_python_target_dir) # Add a Post-Build Event to copy over Python files and create the symlink to # liblldb.so for the Python API(hardlink on Windows). + # Note that Swig-generated code is located one level deeper in the `native` + # module, in order to avoid cyclic importing. add_custom_target(${swig_target} ALL VERBATIM - COMMAND ${CMAKE_COMMAND} -E make_directory ${lldb_python_target_dir} + COMMAND ${CMAKE_COMMAND} -E make_directory ${lldb_python_target_dir}/native/ DEPENDS ${lldb_python_bindings_dir}/lldb.py COMMENT "Python script sym-linking LLDB Python API") @@ -75,6 +77,8 @@ function(finish_swig_python swig_target lldb_python_bindings_dir lldb_python_tar "${LLDB_SOURCE_DIR}/source/Interpreter/embedded_interpreter.py" "${lldb_python_target_dir}") + create_python_package(${swig_target} ${lldb_python_target_dir} "native" FILES) + # Distribute the examples as python packages. create_python_package( ${swig_target} @@ -143,7 +147,7 @@ function(finish_swig_python swig_target lldb_python_bindings_dir lldb_python_tar endif() set(LIBLLDB_SYMLINK_OUTPUT_FILE "_lldb${LLDB_PYTHON_EXT_SUFFIX}") create_relative_symlink(${swig_target} ${LIBLLDB_SYMLINK_DEST} - ${lldb_python_target_dir} ${LIBLLDB_SYMLINK_OUTPUT_FILE}) + ${lldb_python_target_dir}/native/ ${LIBLLDB_SYMLINK_OUTPUT_FILE}) if (NOT WIN32) diff --git a/lldb/bindings/python/python.swig b/lldb/bindings/python/python.swig index b2823f98acac8..3d2caa65f1658 100644 --- a/lldb/bindings/python/python.swig +++ b/lldb/bindings/python/python.swig @@ -50,7 +50,12 @@ Older swig versions will simply ignore this setting. import $module except ImportError: # Relative import should work if we are being loaded by Python. - from . import $module" + # The cpython module built by swig is pushed one level down into + # the native submodule, because at this point the interpreter + # is still constructing the lldb module itself. + # Simply importing anything using `from . import` constitutes + # a cyclic importing. + from .native import $module" %enddef // The name of the module to be created. diff --git a/lldb/include/lldb/API/SBFile.h b/lldb/include/lldb/API/SBFile.h index ebdc5607b7942..8cf4fe1b405fa 100644 --- a/lldb/include/lldb/API/SBFile.h +++ b/lldb/include/lldb/API/SBFile.h @@ -27,7 +27,10 @@ class LLDB_API SBFile { SBFile(FileSP file_sp); #ifndef SWIG SBFile(const SBFile &rhs); + LLDB_DEPRECATED_FIXME("Use the constructor that specifies mode instead", + "SBFile(FILE*, const char*, bool)") SBFile(FILE *file, bool transfer_ownership); + SBFile(FILE *file, const char *mode, bool transfer_ownership); #endif SBFile(int fd, const char *mode, bool transfer_ownership); ~SBFile(); diff --git a/lldb/include/lldb/Host/File.h b/lldb/include/lldb/Host/File.h index 7402a2231735a..590c9fa523b29 100644 --- a/lldb/include/lldb/Host/File.h +++ b/lldb/include/lldb/Host/File.h @@ -66,6 +66,9 @@ class File : public IOObject { LLVM_MARK_AS_BITMASK_ENUM(/* largest_value= */ eOpenOptionInvalid) }; + static constexpr OpenOptions OpenOptionsModeMask = + eOpenOptionReadOnly | eOpenOptionWriteOnly | eOpenOptionReadWrite; + static mode_t ConvertOpenOptionsForPOSIXOpen(OpenOptions open_options); static llvm::Expected GetOptionsFromMode(llvm::StringRef mode); static bool DescriptorIsValid(int descriptor) { return descriptor >= 0; }; @@ -384,7 +387,7 @@ class NativeFile : public File { NativeFile(); - NativeFile(FILE *fh, bool transfer_ownership); + NativeFile(FILE *fh, OpenOptions options, bool transfer_ownership); NativeFile(int fd, OpenOptions options, bool transfer_ownership); diff --git a/lldb/include/lldb/Host/StreamFile.h b/lldb/include/lldb/Host/StreamFile.h index e37661a9938c0..8b01eeab6f586 100644 --- a/lldb/include/lldb/Host/StreamFile.h +++ b/lldb/include/lldb/Host/StreamFile.h @@ -81,7 +81,8 @@ class LockableStreamFile { LockableStreamFile(StreamFile &stream_file, Mutex &mutex) : m_file_sp(stream_file.GetFileSP()), m_mutex(mutex) {} LockableStreamFile(FILE *fh, bool transfer_ownership, Mutex &mutex) - : m_file_sp(std::make_shared(fh, transfer_ownership)), + : m_file_sp(std::make_shared(fh, File::eOpenOptionWriteOnly, + transfer_ownership)), m_mutex(mutex) {} LockableStreamFile(std::shared_ptr file_sp, Mutex &mutex) : m_file_sp(file_sp), m_mutex(mutex) {} diff --git a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules index 0122fe8409c29..55dbd3934860f 100644 --- a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules +++ b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules @@ -339,9 +339,11 @@ endif # library to make ASAN tests work for most users, including the bots. ifeq "$(OS)" "Darwin" ifneq "$(ASAN_OPTIONS)" "" -LDFLAGS += -Wl,-lto_library -Wl,$(shell dirname $(shell xcrun -find clang))/../lib/libLTO.dylib +ASAN_LDFLAGS = -Wl,-lto_library -Wl,$(shell dirname $(shell xcrun -find clang))/../lib/libLTO.dylib endif endif +LDFLAGS += $(ASAN_LDFLAGS) + OBJECTS = EXE ?= a.out diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py index ac550962cfb85..f85ab1910a2eb 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py @@ -191,6 +191,11 @@ class NotSupportedError(KeyError): class DebugCommunication(object): + @property + def is_stopped(self) -> bool: + """Returns True if the debuggee is stopped, otherwise False.""" + return len(self.thread_stop_reasons) > 0 or self.exit_status is not None + def __init__( self, recv: BinaryIO, @@ -860,7 +865,17 @@ def request_configurationDone(self): response = self._send_recv(command_dict) if response: self.configuration_done_sent = True + stopped_on_entry = self.is_stopped self.request_threads() + if not stopped_on_entry: + # Drop the initial cached threads if we did not stop-on-entry. + # In VSCode, immediately following 'configurationDone', a + # 'threads' request is made to get the initial set of threads, + # specifically the main threads id and name. + # We issue the threads request to mimic this pattern but in our + # tests we don't want to cache the result unless the process is + # actually stopped. + self.threads = None return response def _process_stopped(self): @@ -978,9 +993,10 @@ def request_evaluate(self, expression, frameIndex=0, threadId=None, context=None return [] args_dict = { "expression": expression, - "context": context, "frameId": stackFrame["id"], } + if context: + args_dict["context"] = context command_dict = { "command": "evaluate", "type": "request", diff --git a/lldb/source/API/SBCommandReturnObject.cpp b/lldb/source/API/SBCommandReturnObject.cpp index e78e213aa23af..da7e288e38d28 100644 --- a/lldb/source/API/SBCommandReturnObject.cpp +++ b/lldb/source/API/SBCommandReturnObject.cpp @@ -15,6 +15,7 @@ #include "lldb/API/SBValue.h" #include "lldb/API/SBValueList.h" #include "lldb/Core/StructuredDataImpl.h" +#include "lldb/Host/File.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Utility/ConstString.h" #include "lldb/Utility/Instrumentation.h" @@ -275,14 +276,16 @@ void SBCommandReturnObject::SetImmediateErrorFile(FILE *fh) { void SBCommandReturnObject::SetImmediateOutputFile(FILE *fh, bool transfer_ownership) { LLDB_INSTRUMENT_VA(this, fh, transfer_ownership); - FileSP file = std::make_shared(fh, transfer_ownership); + FileSP file = std::make_shared(fh, File::eOpenOptionWriteOnly, + transfer_ownership); ref().SetImmediateOutputFile(file); } void SBCommandReturnObject::SetImmediateErrorFile(FILE *fh, bool transfer_ownership) { LLDB_INSTRUMENT_VA(this, fh, transfer_ownership); - FileSP file = std::make_shared(fh, transfer_ownership); + FileSP file = std::make_shared(fh, File::eOpenOptionWriteOnly, + transfer_ownership); ref().SetImmediateErrorFile(file); } diff --git a/lldb/source/API/SBDebugger.cpp b/lldb/source/API/SBDebugger.cpp index 5c4c653d95a81..7a4bebfdf998e 100644 --- a/lldb/source/API/SBDebugger.cpp +++ b/lldb/source/API/SBDebugger.cpp @@ -327,8 +327,8 @@ void SBDebugger::SkipAppInitFiles(bool b) { void SBDebugger::SetInputFileHandle(FILE *fh, bool transfer_ownership) { LLDB_INSTRUMENT_VA(this, fh, transfer_ownership); if (m_opaque_sp) - m_opaque_sp->SetInputFile( - (FileSP)std::make_shared(fh, transfer_ownership)); + m_opaque_sp->SetInputFile((FileSP)std::make_shared( + fh, File::eOpenOptionReadOnly, transfer_ownership)); } SBError SBDebugger::SetInputString(const char *data) { @@ -385,7 +385,8 @@ SBError SBDebugger::SetOutputFile(FileSP file_sp) { void SBDebugger::SetOutputFileHandle(FILE *fh, bool transfer_ownership) { LLDB_INSTRUMENT_VA(this, fh, transfer_ownership); - SetOutputFile((FileSP)std::make_shared(fh, transfer_ownership)); + SetOutputFile((FileSP)std::make_shared( + fh, File::eOpenOptionWriteOnly, transfer_ownership)); } SBError SBDebugger::SetOutputFile(SBFile file) { @@ -405,7 +406,8 @@ SBError SBDebugger::SetOutputFile(SBFile file) { void SBDebugger::SetErrorFileHandle(FILE *fh, bool transfer_ownership) { LLDB_INSTRUMENT_VA(this, fh, transfer_ownership); - SetErrorFile((FileSP)std::make_shared(fh, transfer_ownership)); + SetErrorFile((FileSP)std::make_shared( + fh, File::eOpenOptionWriteOnly, transfer_ownership)); } SBError SBDebugger::SetErrorFile(FileSP file_sp) { @@ -576,8 +578,10 @@ void SBDebugger::HandleProcessEvent(const SBProcess &process, FILE *err) { LLDB_INSTRUMENT_VA(this, process, event, out, err); - FileSP outfile = std::make_shared(out, false); - FileSP errfile = std::make_shared(err, false); + FileSP outfile = + std::make_shared(out, File::eOpenOptionWriteOnly, false); + FileSP errfile = + std::make_shared(err, File::eOpenOptionWriteOnly, false); return HandleProcessEvent(process, event, outfile, errfile); } diff --git a/lldb/source/API/SBFile.cpp b/lldb/source/API/SBFile.cpp index 2ae4b1481afbf..56909923d4b2d 100644 --- a/lldb/source/API/SBFile.cpp +++ b/lldb/source/API/SBFile.cpp @@ -39,7 +39,22 @@ SBFile::SBFile() { LLDB_INSTRUMENT_VA(this); } SBFile::SBFile(FILE *file, bool transfer_ownership) { LLDB_INSTRUMENT_VA(this, file, transfer_ownership); - m_opaque_sp = std::make_shared(file, transfer_ownership); + // For backwards comptability, this defaulted to ReadOnly previously. + m_opaque_sp = std::make_shared(file, File::eOpenOptionReadOnly, + transfer_ownership); +} + +SBFile::SBFile(FILE *file, const char *mode, bool transfer_ownership) { + LLDB_INSTRUMENT_VA(this, file, transfer_ownership); + + auto options = File::GetOptionsFromMode(mode); + if (!options) { + llvm::consumeError(options.takeError()); + return; + } + + m_opaque_sp = + std::make_shared(file, options.get(), transfer_ownership); } SBFile::SBFile(int fd, const char *mode, bool transfer_owndership) { diff --git a/lldb/source/API/SBInstruction.cpp b/lldb/source/API/SBInstruction.cpp index 6755089af39a4..5921511f3b239 100644 --- a/lldb/source/API/SBInstruction.cpp +++ b/lldb/source/API/SBInstruction.cpp @@ -10,8 +10,8 @@ #include "lldb/Utility/Instrumentation.h" #include "lldb/API/SBAddress.h" -#include "lldb/API/SBFrame.h" #include "lldb/API/SBFile.h" +#include "lldb/API/SBFrame.h" #include "lldb/API/SBStream.h" #include "lldb/API/SBTarget.h" @@ -268,7 +268,8 @@ bool SBInstruction::GetDescription(lldb::SBStream &s) { void SBInstruction::Print(FILE *outp) { LLDB_INSTRUMENT_VA(this, outp); - FileSP out = std::make_shared(outp, /*take_ownership=*/false); + FileSP out = std::make_shared(outp, File::eOpenOptionWriteOnly, + /*take_ownership=*/false); Print(out); } diff --git a/lldb/source/API/SBProcess.cpp b/lldb/source/API/SBProcess.cpp index d4be64b815369..14aa9432eed83 100644 --- a/lldb/source/API/SBProcess.cpp +++ b/lldb/source/API/SBProcess.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBProcess.h" +#include "lldb/Host/File.h" #include "lldb/Utility/Instrumentation.h" #include @@ -310,7 +311,8 @@ void SBProcess::ReportEventState(const SBEvent &event, SBFile out) const { void SBProcess::ReportEventState(const SBEvent &event, FILE *out) const { LLDB_INSTRUMENT_VA(this, event, out); - FileSP outfile = std::make_shared(out, false); + FileSP outfile = + std::make_shared(out, File::eOpenOptionWriteOnly, false); return ReportEventState(event, outfile); } diff --git a/lldb/source/API/SBStream.cpp b/lldb/source/API/SBStream.cpp index fc8f09a7bb9ae..2fc5fcfa8b0c4 100644 --- a/lldb/source/API/SBStream.cpp +++ b/lldb/source/API/SBStream.cpp @@ -116,7 +116,8 @@ void SBStream::RedirectToFile(const char *path, bool append) { void SBStream::RedirectToFileHandle(FILE *fh, bool transfer_fh_ownership) { LLDB_INSTRUMENT_VA(this, fh, transfer_fh_ownership); - FileSP file = std::make_unique(fh, transfer_fh_ownership); + FileSP file = std::make_unique(fh, File::eOpenOptionReadWrite, + transfer_fh_ownership); return RedirectToFile(file); } diff --git a/lldb/source/Core/Debugger.cpp b/lldb/source/Core/Debugger.cpp index b37d9d3ed85e3..02f38e9094ec5 100644 --- a/lldb/source/Core/Debugger.cpp +++ b/lldb/source/Core/Debugger.cpp @@ -965,7 +965,8 @@ llvm::StringRef Debugger::GetStaticBroadcasterClass() { Debugger::Debugger(lldb::LogOutputCallback log_callback, void *baton) : UserID(g_unique_id++), Properties(std::make_shared()), - m_input_file_sp(std::make_shared(stdin, NativeFile::Unowned)), + m_input_file_sp(std::make_shared( + stdin, File::eOpenOptionReadOnly, NativeFile::Unowned)), m_output_stream_sp(std::make_shared( stdout, NativeFile::Unowned, m_output_mutex)), m_error_stream_sp(std::make_shared( @@ -1172,7 +1173,8 @@ Status Debugger::SetInputString(const char *data) { return result; } - SetInputFile((FileSP)std::make_shared(commands_file, true)); + SetInputFile((FileSP)std::make_shared( + commands_file, File::eOpenOptionReadOnly, true)); return result; } @@ -1378,7 +1380,8 @@ void Debugger::AdoptTopIOHandlerFilesIfInvalid(FileSP &in, in = GetInputFileSP(); // If there is nothing, use stdin if (!in) - in = std::make_shared(stdin, NativeFile::Unowned); + in = std::make_shared(stdin, File::eOpenOptionReadOnly, + NativeFile::Unowned); } // If no STDOUT has been set, then set it appropriately if (!out || !out->GetUnlockedFile().IsValid()) { diff --git a/lldb/source/Host/common/File.cpp b/lldb/source/Host/common/File.cpp index 65b75bd647c5d..4fad93fca9ea3 100644 --- a/lldb/source/Host/common/File.cpp +++ b/lldb/source/Host/common/File.cpp @@ -249,8 +249,8 @@ uint32_t File::GetPermissions(Status &error) const { NativeFile::NativeFile() = default; -NativeFile::NativeFile(FILE *fh, bool transfer_ownership) - : m_stream(fh), m_own_stream(transfer_ownership) { +NativeFile::NativeFile(FILE *fh, OpenOptions options, bool transfer_ownership) + : m_stream(fh), m_options(options), m_own_stream(transfer_ownership) { #ifdef _WIN32 // In order to properly display non ASCII characters in Windows, we need to // use Windows APIs to print to the console. This is only required if the @@ -258,6 +258,26 @@ NativeFile::NativeFile(FILE *fh, bool transfer_ownership) int fd = _fileno(fh); is_windows_console = ::GetFileType((HANDLE)::_get_osfhandle(fd)) == FILE_TYPE_CHAR; +#else +#ifndef NDEBUG + int fd = fileno(fh); + if (fd != -1) { + int required_mode = ConvertOpenOptionsForPOSIXOpen(options) & O_ACCMODE; + int mode = fcntl(fd, F_GETFL); + if (mode != -1) { + mode &= O_ACCMODE; + // Check that the file is open with a valid subset of the requested file + // access mode, e.g. if we expected the file to be writable then ensure it + // was opened with O_WRONLY or O_RDWR. + assert( + (required_mode == O_RDWR && mode == O_RDWR) || + (required_mode == O_RDONLY && (mode == O_RDWR || mode == O_RDONLY) || + (required_mode == O_WRONLY && + (mode == O_RDWR || mode == O_WRONLY))) && + "invalid file access mode"); + } + } +#endif #endif } @@ -274,7 +294,8 @@ NativeFile::NativeFile(int fd, OpenOptions options, bool transfer_ownership) } bool NativeFile::IsValid() const { - std::scoped_lock lock(m_descriptor_mutex, m_stream_mutex); + std::scoped_lock lock(m_descriptor_mutex, + m_stream_mutex); return DescriptorIsValidUnlocked() || StreamIsValidUnlocked(); } @@ -343,7 +364,8 @@ FILE *NativeFile::GetStream() { } Status NativeFile::Close() { - std::scoped_lock lock(m_descriptor_mutex, m_stream_mutex); + std::scoped_lock lock(m_descriptor_mutex, + m_stream_mutex); Status error; @@ -548,6 +570,10 @@ Status NativeFile::Sync() { Status NativeFile::Read(void *buf, size_t &num_bytes) { Status error; + // Ensure the file is open for reading. + if ((m_options & File::OpenOptionsModeMask) == eOpenOptionWriteOnly) + return Status(std::make_error_code(std::errc::bad_file_descriptor)); + #if defined(MAX_READ_SIZE) if (num_bytes > MAX_READ_SIZE) { uint8_t *p = (uint8_t *)buf; @@ -612,6 +638,10 @@ Status NativeFile::Read(void *buf, size_t &num_bytes) { Status NativeFile::Write(const void *buf, size_t &num_bytes) { Status error; + // Ensure the file is open for writing. + if ((m_options & File::OpenOptionsModeMask) == File::eOpenOptionReadOnly) + return Status(std::make_error_code(std::errc::bad_file_descriptor)); + #if defined(MAX_WRITE_SIZE) if (num_bytes > MAX_WRITE_SIZE) { const uint8_t *p = (const uint8_t *)buf; @@ -776,8 +806,8 @@ Status NativeFile::Write(const void *buf, size_t &num_bytes, off_t &offset) { int fd = GetDescriptor(); if (fd != kInvalidDescriptor) { #ifndef _WIN32 - ssize_t bytes_written = - llvm::sys::RetryAfterSignal(-1, ::pwrite, m_descriptor, buf, num_bytes, offset); + ssize_t bytes_written = llvm::sys::RetryAfterSignal( + -1, ::pwrite, m_descriptor, buf, num_bytes, offset); if (bytes_written < 0) { num_bytes = 0; error = Status::FromErrno(); diff --git a/lldb/source/Host/common/StreamFile.cpp b/lldb/source/Host/common/StreamFile.cpp index 099980a0993c6..131412d81983b 100644 --- a/lldb/source/Host/common/StreamFile.cpp +++ b/lldb/source/Host/common/StreamFile.cpp @@ -27,7 +27,8 @@ StreamFile::StreamFile(int fd, bool transfer_ownership) : Stream() { } StreamFile::StreamFile(FILE *fh, bool transfer_ownership) : Stream() { - m_file_sp = std::make_shared(fh, transfer_ownership); + m_file_sp = std::make_shared(fh, File::eOpenOptionWriteOnly, + transfer_ownership); } StreamFile::StreamFile(const char *path, File::OpenOptions options, diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp index 3493fa9fef635..35a772c1454df 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp @@ -272,6 +272,7 @@ void ScriptInterpreterPython::SharedLibraryDirectoryHelper( // does. if (this_file.GetFileNameExtension() == ".pyd") { this_file.RemoveLastPathComponent(); // _lldb.pyd or _lldb_d.pyd + this_file.RemoveLastPathComponent(); // native this_file.RemoveLastPathComponent(); // lldb llvm::StringRef libdir = LLDB_PYTHON_RELATIVE_LIBDIR; for (auto it = llvm::sys::path::begin(libdir), diff --git a/lldb/source/Symbol/Symtab.cpp b/lldb/source/Symbol/Symtab.cpp index 6080703998ff2..9964ae492bc00 100644 --- a/lldb/source/Symbol/Symtab.cpp +++ b/lldb/source/Symbol/Symtab.cpp @@ -722,15 +722,11 @@ Symtab::AppendSymbolIndexesWithNameAndType(ConstString symbol_name, std::vector &indexes) { std::lock_guard guard(m_mutex); - if (AppendSymbolIndexesWithName(symbol_name, indexes) > 0) { - std::vector::iterator pos = indexes.begin(); - while (pos != indexes.end()) { - if (symbol_type == eSymbolTypeAny || - m_symbols[*pos].GetType() == symbol_type) - ++pos; - else - pos = indexes.erase(pos); - } + if (AppendSymbolIndexesWithName(symbol_name, indexes) > 0 && + symbol_type != eSymbolTypeAny) { + llvm::erase_if(indexes, [this, symbol_type](uint32_t index) { + return m_symbols[index].GetType() != symbol_type; + }); } return indexes.size(); } @@ -742,15 +738,11 @@ uint32_t Symtab::AppendSymbolIndexesWithNameAndType( std::lock_guard guard(m_mutex); if (AppendSymbolIndexesWithName(symbol_name, symbol_debug_type, - symbol_visibility, indexes) > 0) { - std::vector::iterator pos = indexes.begin(); - while (pos != indexes.end()) { - if (symbol_type == eSymbolTypeAny || - m_symbols[*pos].GetType() == symbol_type) - ++pos; - else - pos = indexes.erase(pos); - } + symbol_visibility, indexes) > 0 && + symbol_type != eSymbolTypeAny) { + llvm::erase_if(indexes, [this, symbol_type](uint32_t index) { + return m_symbols[index].GetType() != symbol_type; + }); } return indexes.size(); } diff --git a/lldb/test/API/commands/target/auto-install-main-executable/Makefile b/lldb/test/API/commands/target/auto-install-main-executable/Makefile index 07e6c9a1d0f15..d0578fb699d1b 100644 --- a/lldb/test/API/commands/target/auto-install-main-executable/Makefile +++ b/lldb/test/API/commands/target/auto-install-main-executable/Makefile @@ -6,4 +6,4 @@ a.out: a.device.out include Makefile.rules a.device.out: - $(CXX) $(CXXFLAGS) -DBUILD=74 -o $@ $(SRCDIR)/main.cpp + $(CXX) $(ASAN_LDFLAGS) $(CXXFLAGS) -DBUILD=74 -o $@ $(SRCDIR)/main.cpp diff --git a/lldb/test/API/macosx/find-dsym/bundle-with-dot-in-filename/Makefile b/lldb/test/API/macosx/find-dsym/bundle-with-dot-in-filename/Makefile index 12781fd847768..f13584041fb51 100644 --- a/lldb/test/API/macosx/find-dsym/bundle-with-dot-in-filename/Makefile +++ b/lldb/test/API/macosx/find-dsym/bundle-with-dot-in-filename/Makefile @@ -5,7 +5,7 @@ all: clean $(EXE) include Makefile.rules $(EXE): - $(CC) $(CFLAGS) -dynamiclib -o com.apple.sbd $(SRCDIR)/bundle.c + $(CC) $(ASAN_LDFLAGS) $(CFLAGS) -dynamiclib -o com.apple.sbd $(SRCDIR)/bundle.c mkdir com.apple.sbd.xpc mv com.apple.sbd com.apple.sbd.xpc/ mkdir -p com.apple.sbd.xpc.dSYM/Contents/Resources/DWARF @@ -13,7 +13,7 @@ $(EXE): rm -rf com.apple.sbd.dSYM mkdir hide.app tar cf - com.apple.sbd.xpc com.apple.sbd.xpc.dSYM | ( cd hide.app;tar xBpf -) - $(CC) $(CFLAGS) -o find-bundle-with-dots-in-fn $(SRCDIR)/main.c + $(CC) $(ASAN_LDFLAGS) $(CFLAGS) -o find-bundle-with-dots-in-fn $(SRCDIR)/main.c clean:: rm -rf a.out a.out.dSYM hide.app com.apple.sbd com.apple.sbd.dSYM com.apple.sbd.xpc com.apple.sbd.xpc.dSYM find-bundle-with-dots-in-fn find-bundle-with-dots-in-fn.dSYM diff --git a/lldb/test/API/macosx/find-dsym/deep-bundle/Makefile b/lldb/test/API/macosx/find-dsym/deep-bundle/Makefile index 806c840c9f2ee..c041d9e7a0e95 100644 --- a/lldb/test/API/macosx/find-dsym/deep-bundle/Makefile +++ b/lldb/test/API/macosx/find-dsym/deep-bundle/Makefile @@ -4,7 +4,7 @@ all: clean $(EXE) include Makefile.rules $(EXE): - $(CC) $(CFLAGS) -install_name $(shell pwd)/MyFramework.framework/Versions/A/MyFramework -dynamiclib -o MyFramework $(SRCDIR)/myframework.c + $(CC) $(ASAN_LDFLAGS) $(CFLAGS) -install_name $(shell pwd)/MyFramework.framework/Versions/A/MyFramework -dynamiclib -o MyFramework $(SRCDIR)/myframework.c mkdir -p MyFramework.framework/Versions/A/Headers mkdir -p MyFramework.framework/Versions/A/Resources cp MyFramework MyFramework.framework/Versions/A @@ -18,7 +18,7 @@ $(EXE): mkdir hide.app rm -f MyFramework tar cf - MyFramework.framework MyFramework.framework.dSYM | ( cd hide.app;tar xBpf -) - $(CC) $(CFLAGS) -o deep-bundle $(SRCDIR)/main.c -F. -framework MyFramework + $(CC) $(ASAN_LDFLAGS) $(CFLAGS) -o deep-bundle $(SRCDIR)/main.c -F. -framework MyFramework clean:: rm -rf a.out a.out.dSYM deep-bundle deep-bundle.dSYM MyFramework.framework MyFramework.framework.dSYM MyFramework MyFramework.dSYM hide.app diff --git a/lldb/test/API/macosx/posix_spawn/Makefile b/lldb/test/API/macosx/posix_spawn/Makefile index 7ae46ca95828d..cbdee9122e3f2 100644 --- a/lldb/test/API/macosx/posix_spawn/Makefile +++ b/lldb/test/API/macosx/posix_spawn/Makefile @@ -6,13 +6,13 @@ include Makefile.rules all: fat.out x86_64.out: x86_64.c - $(CC) -isysroot $(SDKROOT) -target x86_64-apple-macosx10.9 -o x86_64.out $< + $(CC) $(ASAN_LDFLAGS) -isysroot $(SDKROOT) -target x86_64-apple-macosx10.9 -o x86_64.out $< x86_64h.out: x86_64h.c - $(CC) -isysroot $(SDKROOT) -target x86_64h-apple-macosx10.9 -o x86_64h.out $< + $(CC) $(ASAN_LDFLAGS) -isysroot $(SDKROOT) -target x86_64h-apple-macosx10.9 -o x86_64h.out $< arm64.out: arm64.c - $(CC) -isysroot $(SDKROOT) -target arm64-apple-macosx10.9 -o arm64.out $< + $(CC) $(ASAN_LDFLAGS) -isysroot $(SDKROOT) -target arm64-apple-macosx10.9 -o arm64.out $< fat.out: x86_64.out x86_64h.out arm64.out $(LIPO) -o fat.out -create $^ diff --git a/lldb/test/API/tools/lldb-dap/evaluate/TestDAP_evaluate.py b/lldb/test/API/tools/lldb-dap/evaluate/TestDAP_evaluate.py index 20a75f4076e42..3c233a5b43ebb 100644 --- a/lldb/test/API/tools/lldb-dap/evaluate/TestDAP_evaluate.py +++ b/lldb/test/API/tools/lldb-dap/evaluate/TestDAP_evaluate.py @@ -1,5 +1,5 @@ """ -Test lldb-dap completions request +Test lldb-dap evaluate request """ import re @@ -7,16 +7,67 @@ import lldbdap_testcase from lldbsuite.test.decorators import skipIfWindows from lldbsuite.test.lldbtest import line_number +from typing import TypedDict, Optional + + +class EvaluateResponseBody(TypedDict, total=False): + result: str + variablesReference: int + type: Optional[str] + memoryReference: Optional[str] + valueLocationReference: Optional[int] class TestDAP_evaluate(lldbdap_testcase.DAPTestCaseBase): - def assertEvaluate(self, expression, regex): + def assertEvaluate( + self, + expression, + result: str, + want_type="", + want_varref=False, + want_memref=True, + want_locref=False, + ): + resp = self.dap_server.request_evaluate(expression, context=self.context) + self.assertTrue( + resp["success"], f"Failed to evaluate expression {expression!r}" + ) + body: EvaluateResponseBody = resp["body"] self.assertRegex( - self.dap_server.request_evaluate(expression, context=self.context)["body"][ - "result" - ], - regex, + body["result"], + result, + f"Unexpected 'result' for expression {expression!r} in response body {body}", ) + if want_varref: + self.assertNotEqual( + body["variablesReference"], + 0, + f"Unexpected 'variablesReference' for expression {expression!r} in response body {body}", + ) + else: + self.assertEqual( + body["variablesReference"], + 0, + f"Unexpected 'variablesReference' for expression {expression!r} in response body {body}", + ) + if want_type: + self.assertEqual( + body["type"], + want_type, + f"Unexpected 'type' for expression {expression!r} in response body {body}", + ) + if want_memref: + self.assertIn( + "memoryReference", + body, + f"Unexpected 'memoryReference' for expression {expression!r} in response body {body}", + ) + if want_locref: + self.assertIn( + "valueLocationReference", + body, + f"Unexpected 'valueLocationReference' for expression {expression!r} in response body {body}", + ) def assertEvaluateFailure(self, expression): self.assertNotIn( @@ -71,29 +122,39 @@ def run_test_evaluate_expressions( self.continue_to_breakpoint(breakpoint_1) # Expressions at breakpoint 1, which is in main - self.assertEvaluate("var1", "20") + self.assertEvaluate("var1", "20", want_type="int") # Empty expression should equate to the previous expression. if context == "repl": self.assertEvaluate("", "20") else: self.assertEvaluateFailure("") - self.assertEvaluate("var2", "21") + self.assertEvaluate("var2", "21", want_type="int") if context == "repl": - self.assertEvaluate("", "21") - self.assertEvaluate("", "21") - self.assertEvaluate("static_int", "42") - self.assertEvaluate("non_static_int", "43") - self.assertEvaluate("struct1.foo", "15") - self.assertEvaluate("struct2->foo", "16") + self.assertEvaluate("", "21", want_type="int") + self.assertEvaluate("", "21", want_type="int") + self.assertEvaluate("static_int", "42", want_type="int") + self.assertEvaluate("non_static_int", "43", want_type="int") + self.assertEvaluate("struct1.foo", "15", want_type="int") + self.assertEvaluate("struct2->foo", "16", want_type="int") if self.isResultExpandedDescription(): self.assertEvaluate( "struct1", r"\(my_struct\) (struct1|\$\d+) = \(foo = 15\)", + want_type="my_struct", + want_varref=True, + ) + self.assertEvaluate( + "struct2", + r"\(my_struct \*\) (struct2|\$\d+) = 0x.*", + want_type="my_struct *", + want_varref=True, ) - self.assertEvaluate("struct2", r"\(my_struct \*\) (struct2|\$\d+) = 0x.*") self.assertEvaluate( - "struct3", r"\(my_struct \*\) (struct3|\$\d+) = nullptr" + "struct3", + r"\(my_struct \*\) (struct3|\$\d+) = nullptr", + want_type="my_struct *", + want_varref=True, ) else: self.assertEvaluate( @@ -103,16 +164,22 @@ def run_test_evaluate_expressions( if enableAutoVariableSummaries else "my_struct @ 0x" ), + want_varref=True, + ) + self.assertEvaluate( + "struct2", + "0x.* {foo:16}" if enableAutoVariableSummaries else "0x.*", + want_varref=True, + want_type="my_struct *", ) self.assertEvaluate( - "struct2", "0x.* {foo:16}" if enableAutoVariableSummaries else "0x.*" + "struct3", "0x.*0", want_varref=True, want_type="my_struct *" ) - self.assertEvaluate("struct3", "0x.*0") if context == "repl": # In the repl context expressions may be interpreted as lldb # commands since no variables have the same name as the command. - self.assertEvaluate("list", r".*") + self.assertEvaluate("list", r".*", want_memref=False) else: self.assertEvaluateFailure("list") # local variable of a_function @@ -121,10 +188,26 @@ def run_test_evaluate_expressions( self.assertEvaluateFailure("foo") # member of my_struct if self.isExpressionParsedExpected(): - self.assertEvaluate("a_function", "0x.*a.out`a_function.*") - self.assertEvaluate("a_function(1)", "1") - self.assertEvaluate("var2 + struct1.foo", "36") - self.assertEvaluate("foo_func", "0x.*a.out`foo_func.*") + self.assertEvaluate( + "a_function", + "0x.*a.out`a_function.*", + want_type="int (*)(int)", + want_varref=True, + want_memref=False, + want_locref=True, + ) + self.assertEvaluate( + "a_function(1)", "1", want_memref=False, want_type="int" + ) + self.assertEvaluate("var2 + struct1.foo", "36", want_memref=False) + self.assertEvaluate( + "foo_func", + "0x.*a.out`foo_func.*", + want_type="int (*)()", + want_varref=True, + want_memref=False, + want_locref=True, + ) self.assertEvaluate("foo_var", "44") else: self.assertEvaluateFailure("a_function") @@ -145,6 +228,8 @@ def run_test_evaluate_expressions( self.assertEvaluate( "struct1", r"\(my_struct\) (struct1|\$\d+) = \(foo = 15\)", + want_type="my_struct", + want_varref=True, ) else: self.assertEvaluate( @@ -154,15 +239,26 @@ def run_test_evaluate_expressions( if enableAutoVariableSummaries else "my_struct @ 0x" ), + want_type="my_struct", + want_varref=True, ) self.assertEvaluate("struct1.foo", "15") self.assertEvaluate("struct2->foo", "16") if self.isExpressionParsedExpected(): - self.assertEvaluate("a_function", "0x.*a.out`a_function.*") - self.assertEvaluate("a_function(1)", "1") - self.assertEvaluate("var2 + struct1.foo", "17") - self.assertEvaluate("foo_func", "0x.*a.out`foo_func.*") + self.assertEvaluate( + "a_function", + "0x.*a.out`a_function.*", + want_type="int (*)(int)", + want_varref=True, + want_memref=False, + want_locref=True, + ) + self.assertEvaluate("a_function(1)", "1", want_memref=False) + self.assertEvaluate("var2 + struct1.foo", "17", want_memref=False) + self.assertEvaluate( + "foo_func", "0x.*a.out`foo_func.*", want_varref=True, want_memref=False + ) self.assertEvaluate("foo_var", "44") else: self.assertEvaluateFailure("a_function") @@ -185,10 +281,18 @@ def run_test_evaluate_expressions( self.assertEvaluateFailure("var2 + struct1.foo") if self.isExpressionParsedExpected(): - self.assertEvaluate("a_function", "0x.*a.out`a_function.*") - self.assertEvaluate("a_function(1)", "1") - self.assertEvaluate("list + 1", "43") - self.assertEvaluate("foo_func", "0x.*a.out`foo_func.*") + self.assertEvaluate( + "a_function", + "0x.*a.out`a_function.*", + want_varref=True, + want_memref=False, + want_locref=True, + ) + self.assertEvaluate("a_function(1)", "1", want_memref=False) + self.assertEvaluate("list + 1", "43", want_memref=False) + self.assertEvaluate( + "foo_func", "0x.*a.out`foo_func.*", want_varref=True, want_memref=False + ) self.assertEvaluate("foo_var", "44") else: self.assertEvaluateFailure("a_function") @@ -199,26 +303,28 @@ def run_test_evaluate_expressions( # Now we check that values are updated after stepping self.continue_to_breakpoint(breakpoint_4) - self.assertEvaluate("my_vec", "size=2") + self.assertEvaluate("my_vec", "size=2", want_varref=True) self.continue_to_breakpoint(breakpoint_5) - self.assertEvaluate("my_vec", "size=3") + self.assertEvaluate("my_vec", "size=3", want_varref=True) - self.assertEvaluate("my_map", "size=2") + self.assertEvaluate("my_map", "size=2", want_varref=True) self.continue_to_breakpoint(breakpoint_6) - self.assertEvaluate("my_map", "size=3") + self.assertEvaluate("my_map", "size=3", want_varref=True) - self.assertEvaluate("my_bool_vec", "size=1") + self.assertEvaluate("my_bool_vec", "size=1", want_varref=True) self.continue_to_breakpoint(breakpoint_7) - self.assertEvaluate("my_bool_vec", "size=2") + self.assertEvaluate("my_bool_vec", "size=2", want_varref=True) self.continue_to_breakpoint(breakpoint_8) # Test memory read, especially with 'empty' repeat commands. if context == "repl": - self.assertEvaluate("memory read -c 1 &my_ints", ".* 05 .*\n") - self.assertEvaluate("", ".* 0a .*\n") - self.assertEvaluate("", ".* 0f .*\n") - self.assertEvaluate("", ".* 14 .*\n") - self.assertEvaluate("", ".* 19 .*\n") + self.assertEvaluate( + "memory read -c 1 &my_ints", ".* 05 .*\n", want_memref=False + ) + self.assertEvaluate("", ".* 0a .*\n", want_memref=False) + self.assertEvaluate("", ".* 0f .*\n", want_memref=False) + self.assertEvaluate("", ".* 14 .*\n", want_memref=False) + self.assertEvaluate("", ".* 19 .*\n", want_memref=False) self.continue_to_exit() @@ -245,4 +351,6 @@ def test_hover_evaluate_expressions(self): @skipIfWindows def test_variable_evaluate_expressions(self): # Tests expression evaluations that are triggered in the variable explorer - self.run_test_evaluate_expressions("variable", enableAutoVariableSummaries=True) + self.run_test_evaluate_expressions( + "variables", enableAutoVariableSummaries=True + ) diff --git a/lldb/tools/lldb-dap/Handler/EvaluateRequestHandler.cpp b/lldb/tools/lldb-dap/Handler/EvaluateRequestHandler.cpp index e1556846dff19..ea8c3a2a4a296 100644 --- a/lldb/tools/lldb-dap/Handler/EvaluateRequestHandler.cpp +++ b/lldb/tools/lldb-dap/Handler/EvaluateRequestHandler.cpp @@ -10,148 +10,31 @@ #include "EventHelper.h" #include "JSONUtils.h" #include "LLDBUtils.h" +#include "Protocol/ProtocolRequests.h" +#include "Protocol/ProtocolTypes.h" #include "RequestHandler.h" +#include "lldb/lldb-enumerations.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" + +using namespace llvm; +using namespace lldb_dap; +using namespace lldb_dap::protocol; namespace lldb_dap { -// "EvaluateRequest": { -// "allOf": [ { "$ref": "#/definitions/Request" }, { -// "type": "object", -// "description": "Evaluate request; value of command field is 'evaluate'. -// Evaluates the given expression in the context of the -// top most stack frame. The expression has access to any -// variables and arguments that are in scope.", -// "properties": { -// "command": { -// "type": "string", -// "enum": [ "evaluate" ] -// }, -// "arguments": { -// "$ref": "#/definitions/EvaluateArguments" -// } -// }, -// "required": [ "command", "arguments" ] -// }] -// }, -// "EvaluateArguments": { -// "type": "object", -// "description": "Arguments for 'evaluate' request.", -// "properties": { -// "expression": { -// "type": "string", -// "description": "The expression to evaluate." -// }, -// "frameId": { -// "type": "integer", -// "description": "Evaluate the expression in the scope of this stack -// frame. If not specified, the expression is evaluated -// in the global scope." -// }, -// "context": { -// "type": "string", -// "_enum": [ "watch", "repl", "hover" ], -// "enumDescriptions": [ -// "evaluate is run in a watch.", -// "evaluate is run from REPL console.", -// "evaluate is run from a data hover." -// ], -// "description": "The context in which the evaluate request is run." -// }, -// "format": { -// "$ref": "#/definitions/ValueFormat", -// "description": "Specifies details on how to format the Evaluate -// result." -// } -// }, -// "required": [ "expression" ] -// }, -// "EvaluateResponse": { -// "allOf": [ { "$ref": "#/definitions/Response" }, { -// "type": "object", -// "description": "Response to 'evaluate' request.", -// "properties": { -// "body": { -// "type": "object", -// "properties": { -// "result": { -// "type": "string", -// "description": "The result of the evaluate request." -// }, -// "type": { -// "type": "string", -// "description": "The optional type of the evaluate result." -// }, -// "presentationHint": { -// "$ref": "#/definitions/VariablePresentationHint", -// "description": "Properties of a evaluate result that can be -// used to determine how to render the result in -// the UI." -// }, -// "variablesReference": { -// "type": "number", -// "description": "If variablesReference is > 0, the evaluate -// result is structured and its children can be -// retrieved by passing variablesReference to the -// VariablesRequest." -// }, -// "namedVariables": { -// "type": "number", -// "description": "The number of named child variables. The -// client can use this optional information to -// present the variables in a paged UI and fetch -// them in chunks." -// }, -// "indexedVariables": { -// "type": "number", -// "description": "The number of indexed child variables. The -// client can use this optional information to -// present the variables in a paged UI and fetch -// them in chunks." -// }, -// "valueLocationReference": { -// "type": "integer", -// "description": "A reference that allows the client to request -// the location where the returned value is -// declared. For example, if a function pointer is -// returned, the adapter may be able to look up the -// function's location. This should be present only -// if the adapter is likely to be able to resolve -// the location.\n\nThis reference shares the same -// lifetime as the `variablesReference`. See -// 'Lifetime of Object References' in the -// Overview section for details." -// } -// "memoryReference": { -// "type": "string", -// "description": "A memory reference to a location appropriate -// for this result. For pointer type eval -// results, this is generally a reference to the -// memory address contained in the pointer. This -// attribute may be returned by a debug adapter -// if corresponding capability -// `supportsMemoryReferences` is true." -// }, -// }, -// "required": [ "result", "variablesReference" ] -// } -// }, -// "required": [ "body" ] -// }] -// } -void EvaluateRequestHandler::operator()( - const llvm::json::Object &request) const { - llvm::json::Object response; - FillResponse(request, response); - llvm::json::Object body; - const auto *arguments = request.getObject("arguments"); - lldb::SBFrame frame = dap.GetLLDBFrame(*arguments); - std::string expression = - GetString(arguments, "expression").value_or("").str(); - const llvm::StringRef context = GetString(arguments, "context").value_or(""); +/// Evaluates the given expression in the context of a stack frame. +/// +/// The expression has access to any variables and arguments that are in scope. +Expected +EvaluateRequestHandler::Run(const EvaluateArguments &arguments) const { + EvaluateResponseBody body; + lldb::SBFrame frame = dap.GetLLDBFrame(arguments.frameId); + std::string expression = arguments.expression; bool repeat_last_command = expression.empty() && dap.last_nonempty_var_expression.empty(); - if (context == "repl" && + if (arguments.context == protocol::eEvaluateContextRepl && (repeat_last_command || (!expression.empty() && dap.DetectReplMode(frame, expression, false) == ReplMode::Command))) { @@ -165,70 +48,60 @@ void EvaluateRequestHandler::operator()( } bool required_command_failed = false; - std::string result = RunLLDBCommands( + body.result = RunLLDBCommands( dap.debugger, llvm::StringRef(), {expression}, required_command_failed, /*parse_command_directives=*/false, /*echo_commands=*/false); + return body; + } - EmplaceSafeString(body, "result", result); - body.try_emplace("variablesReference", (int64_t)0); - } else { - if (context == "repl") { - // If the expression is empty and the last expression was for a - // variable, set the expression to the previous expression (repeat the - // evaluation); otherwise save the current non-empty expression for the - // next (possibly empty) variable expression. - if (expression.empty()) - expression = dap.last_nonempty_var_expression; - else - dap.last_nonempty_var_expression = expression; - } - // Always try to get the answer from the local variables if possible. If - // this fails, then if the context is not "hover", actually evaluate an - // expression using the expression parser. - // - // "frame variable" is more reliable than the expression parser in - // many cases and it is faster. - lldb::SBValue value = frame.GetValueForVariablePath( - expression.data(), lldb::eDynamicDontRunTarget); - - // Freeze dry the value in case users expand it later in the debug console - if (value.GetError().Success() && context == "repl") - value = value.Persist(); - - if (value.GetError().Fail() && context != "hover") - value = frame.EvaluateExpression(expression.data()); - - if (value.GetError().Fail()) { - response["success"] = llvm::json::Value(false); - // This error object must live until we're done with the pointer returned - // by GetCString(). - lldb::SBError error = value.GetError(); - const char *error_cstr = error.GetCString(); - if (error_cstr && error_cstr[0]) - EmplaceSafeString(response, "message", error_cstr); - else - EmplaceSafeString(response, "message", "evaluate failed"); - } else { - VariableDescription desc(value, - dap.configuration.enableAutoVariableSummaries); - EmplaceSafeString(body, "result", desc.GetResult(context)); - EmplaceSafeString(body, "type", desc.display_type_name); - int64_t var_ref = 0; - if (value.MightHaveChildren() || ValuePointsToCode(value)) - var_ref = dap.variables.InsertVariable( - value, /*is_permanent=*/context == "repl"); - if (value.MightHaveChildren()) - body.try_emplace("variablesReference", var_ref); - else - body.try_emplace("variablesReference", (int64_t)0); - if (lldb::addr_t addr = value.GetLoadAddress(); - addr != LLDB_INVALID_ADDRESS) - body.try_emplace("memoryReference", EncodeMemoryReference(addr)); - if (ValuePointsToCode(value)) - body.try_emplace("valueLocationReference", var_ref); - } + if (arguments.context == eEvaluateContextRepl) { + // If the expression is empty and the last expression was for a + // variable, set the expression to the previous expression (repeat the + // evaluation); otherwise save the current non-empty expression for the + // next (possibly empty) variable expression. + if (expression.empty()) + expression = dap.last_nonempty_var_expression; + else + dap.last_nonempty_var_expression = expression; } - response.try_emplace("body", std::move(body)); - dap.SendJSON(llvm::json::Value(std::move(response))); + + // Always try to get the answer from the local variables if possible. If + // this fails, then if the context is not "hover", actually evaluate an + // expression using the expression parser. + // + // "frame variable" is more reliable than the expression parser in + // many cases and it is faster. + lldb::SBValue value = frame.GetValueForVariablePath( + expression.data(), lldb::eDynamicDontRunTarget); + + // Freeze dry the value in case users expand it later in the debug console + if (value.GetError().Success() && arguments.context == eEvaluateContextRepl) + value = value.Persist(); + + if (value.GetError().Fail() && arguments.context != eEvaluateContextHover) + value = frame.EvaluateExpression(expression.data()); + + if (value.GetError().Fail()) + return ToError(value.GetError(), /*show_user=*/false); + + VariableDescription desc(value, + dap.configuration.enableAutoVariableSummaries); + + body.result = desc.GetResult(arguments.context); + body.type = desc.display_type_name; + + if (value.MightHaveChildren() || ValuePointsToCode(value)) + body.variablesReference = dap.variables.InsertVariable( + value, /*is_permanent=*/arguments.context == eEvaluateContextRepl); + + if (lldb::addr_t addr = value.GetLoadAddress(); addr != LLDB_INVALID_ADDRESS) + body.memoryReference = EncodeMemoryReference(addr); + + if (ValuePointsToCode(value) && + body.variablesReference != LLDB_DAP_INVALID_VARRERF) + body.valueLocationReference = PackLocation(body.variablesReference, true); + + return body; } + } // namespace lldb_dap diff --git a/lldb/tools/lldb-dap/Handler/RequestHandler.h b/lldb/tools/lldb-dap/Handler/RequestHandler.h index bc22133d92453..65a52075ebd79 100644 --- a/lldb/tools/lldb-dap/Handler/RequestHandler.h +++ b/lldb/tools/lldb-dap/Handler/RequestHandler.h @@ -292,11 +292,14 @@ class DisconnectRequestHandler Run(const std::optional &args) const override; }; -class EvaluateRequestHandler : public LegacyRequestHandler { +class EvaluateRequestHandler + : public RequestHandler> { public: - using LegacyRequestHandler::LegacyRequestHandler; + using RequestHandler::RequestHandler; static llvm::StringLiteral GetCommand() { return "evaluate"; } - void operator()(const llvm::json::Object &request) const override; + llvm::Expected + Run(const protocol::EvaluateArguments &) const override; FeatureSet GetSupportedFeatures() const override { return {protocol::eAdapterFeatureEvaluateForHovers}; } diff --git a/lldb/tools/lldb-dap/JSONUtils.cpp b/lldb/tools/lldb-dap/JSONUtils.cpp index 1a3a6701b194d..81eadae03bb48 100644 --- a/lldb/tools/lldb-dap/JSONUtils.cpp +++ b/lldb/tools/lldb-dap/JSONUtils.cpp @@ -11,6 +11,7 @@ #include "ExceptionBreakpoint.h" #include "LLDBUtils.h" #include "Protocol/ProtocolBase.h" +#include "Protocol/ProtocolRequests.h" #include "ProtocolUtils.h" #include "lldb/API/SBAddress.h" #include "lldb/API/SBCompileUnit.h" @@ -817,10 +818,10 @@ VariableDescription::VariableDescription(lldb::SBValue v, evaluate_name = llvm::StringRef(evaluateStream.GetData()).str(); } -std::string VariableDescription::GetResult(llvm::StringRef context) { +std::string VariableDescription::GetResult(protocol::EvaluateContext context) { // In repl context, the results can be displayed as multiple lines so more // detailed descriptions can be returned. - if (context != "repl") + if (context != protocol::eEvaluateContextRepl) return display_value; if (!v.IsValid()) diff --git a/lldb/tools/lldb-dap/JSONUtils.h b/lldb/tools/lldb-dap/JSONUtils.h index 0c865a33a6ce4..329dc8ab02f99 100644 --- a/lldb/tools/lldb-dap/JSONUtils.h +++ b/lldb/tools/lldb-dap/JSONUtils.h @@ -10,7 +10,7 @@ #define LLDB_TOOLS_LLDB_DAP_JSONUTILS_H #include "DAPForward.h" -#include "Protocol/ProtocolTypes.h" +#include "Protocol/ProtocolRequests.h" #include "lldb/API/SBCompileUnit.h" #include "lldb/API/SBFormat.h" #include "lldb/API/SBType.h" @@ -28,7 +28,7 @@ namespace lldb_dap { -/// Emplace a StringRef in a json::Object after enusring that the +/// Emplace a StringRef in a json::Object after ensuring that the /// string is valid UTF8. If not, first call llvm::json::fixUTF8 /// before emplacing. /// @@ -351,7 +351,7 @@ struct VariableDescription { std::optional custom_name = {}); /// Returns a description of the value appropriate for the specified context. - std::string GetResult(llvm::StringRef context); + std::string GetResult(protocol::EvaluateContext context); }; /// Does the given variable have an associated value location? diff --git a/lldb/tools/lldb-dap/LLDBUtils.cpp b/lldb/tools/lldb-dap/LLDBUtils.cpp index 4db6caa1af38b..e2ba2ee64103d 100644 --- a/lldb/tools/lldb-dap/LLDBUtils.cpp +++ b/lldb/tools/lldb-dap/LLDBUtils.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "LLDBUtils.h" +#include "DAPError.h" #include "JSONUtils.h" #include "lldb/API/SBCommandInterpreter.h" #include "lldb/API/SBCommandReturnObject.h" @@ -17,6 +18,7 @@ #include "lldb/API/SBThread.h" #include "lldb/lldb-enumerations.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/Error.h" #include "llvm/Support/JSON.h" #include "llvm/Support/raw_ostream.h" @@ -214,13 +216,14 @@ GetStopDisassemblyDisplay(lldb::SBDebugger &debugger) { return result; } -llvm::Error ToError(const lldb::SBError &error) { +llvm::Error ToError(const lldb::SBError &error, bool show_user) { if (error.Success()) return llvm::Error::success(); - return llvm::createStringError( - std::error_code(error.GetError(), std::generic_category()), - error.GetCString()); + return llvm::make_error( + /*message=*/error.GetCString(), + /*EC=*/std::error_code(error.GetError(), std::generic_category()), + /*show_user=*/show_user); } std::string GetStringValue(const lldb::SBStructuredData &data) { diff --git a/lldb/tools/lldb-dap/LLDBUtils.h b/lldb/tools/lldb-dap/LLDBUtils.h index 9db721a47ccf7..a29d3d88789a0 100644 --- a/lldb/tools/lldb-dap/LLDBUtils.h +++ b/lldb/tools/lldb-dap/LLDBUtils.h @@ -243,7 +243,7 @@ class ScopeSyncMode { lldb::StopDisassemblyType GetStopDisassemblyDisplay(lldb::SBDebugger &debugger); /// Take ownership of the stored error. -llvm::Error ToError(const lldb::SBError &error); +llvm::Error ToError(const lldb::SBError &error, bool show_user = true); /// Provides the string value if this data structure is a string type. std::string GetStringValue(const lldb::SBStructuredData &data); diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.cpp b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.cpp index 44ae79f8b9f43..ac01cfb95dd41 100644 --- a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.cpp +++ b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.cpp @@ -8,6 +8,7 @@ #include "Protocol/ProtocolRequests.h" #include "JSONUtils.h" +#include "Protocol/ProtocolTypes.h" #include "lldb/lldb-defines.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringMap.h" @@ -639,6 +640,54 @@ json::Value toJSON(const ExceptionInfoResponseBody &ERB) { result.insert({"description", ERB.description}); if (ERB.details.has_value()) result.insert({"details", *ERB.details}); + return result; +} + +static bool fromJSON(const llvm::json::Value &Params, EvaluateContext &C, + llvm::json::Path P) { + auto rawContext = Params.getAsString(); + if (!rawContext) { + P.report("expected a string"); + return false; + } + C = StringSwitch(*rawContext) + .Case("watch", EvaluateContext::eEvaluateContextWatch) + .Case("repl", EvaluateContext::eEvaluateContextRepl) + .Case("hover", EvaluateContext::eEvaluateContextHover) + .Case("clipboard", EvaluateContext::eEvaluateContextClipboard) + .Case("variables", EvaluateContext::eEvaluateContextVariables) + .Default(eEvaluateContextUnknown); + return true; +} + +bool fromJSON(const llvm::json::Value &Params, EvaluateArguments &Args, + llvm::json::Path P) { + json::ObjectMapper O(Params, P); + return O && O.map("expression", Args.expression) && + O.mapOptional("frameId", Args.frameId) && + O.mapOptional("line", Args.line) && + O.mapOptional("column", Args.column) && + O.mapOptional("source", Args.source) && + O.mapOptional("context", Args.context) && + O.mapOptional("format", Args.format); +} + +llvm::json::Value toJSON(const EvaluateResponseBody &Body) { + json::Object result{{"result", Body.result}, + {"variablesReference", Body.variablesReference}}; + + if (!Body.type.empty()) + result.insert({"type", Body.type}); + if (Body.presentationHint) + result.insert({"presentationHint", Body.presentationHint}); + if (Body.namedVariables) + result.insert({"namedVariables", Body.namedVariables}); + if (Body.indexedVariables) + result.insert({"indexedVariables", Body.indexedVariables}); + if (!Body.memoryReference.empty()) + result.insert({"memoryReference", Body.memoryReference}); + if (Body.valueLocationReference != LLDB_DAP_INVALID_VALUE_LOC) + result.insert({"valueLocationReference", Body.valueLocationReference}); return result; } diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h index b894f2b4ed44d..c1e1e93f1e44a 100644 --- a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h +++ b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h @@ -1061,6 +1061,123 @@ struct ExceptionInfoResponseBody { }; llvm::json::Value toJSON(const ExceptionInfoResponseBody &); +/// The context in which the evaluate request is used. +enum EvaluateContext : unsigned { + /// An unspecified or unknown evaluate context. + eEvaluateContextUnknown = 0, + /// 'watch': evaluate is called from a watch view context. + eEvaluateContextWatch = 1, + /// 'repl': evaluate is called from a REPL context. + eEvaluateContextRepl = 2, + /// 'hover': evaluate is called to generate the debug hover contents. + /// This value should only be used if the corresponding capability + /// `supportsEvaluateForHovers` is true. + eEvaluateContextHover = 3, + /// 'clipboard': evaluate is called to generate clipboard contents. + /// This value should only be used if the corresponding capability + /// `supportsClipboardContext` is true. + eEvaluateContextClipboard = 4, + /// 'variables': evaluate is called from a variables view context. + eEvaluateContextVariables = 5, +}; + +/// Arguments for `evaluate` request. +struct EvaluateArguments { + /// The expression to evaluate. + std::string expression; + + /// Evaluate the expression in the scope of this stack frame. If not + /// specified, the expression is evaluated in the global scope. + uint64_t frameId = LLDB_DAP_INVALID_FRAME_ID; + + /// The contextual line where the expression should be evaluated. In the + /// 'hover' context, this should be set to the start of the expression being + /// hovered. + uint32_t line = LLDB_INVALID_LINE_NUMBER; + + /// The contextual column where the expression should be evaluated. This may + /// be provided if `line` is also provided. + /// + /// It is measured in UTF-16 code units and the client capability + /// `columnsStartAt1` determines whether it is 0- or 1-based. + uint32_t column = LLDB_INVALID_COLUMN_NUMBER; + + /// The contextual source in which the `line` is found. This must be provided + /// if `line` is provided. + std::optional source; + + /// The context in which the evaluate request is used. + /// Values: + /// 'watch': evaluate is called from a watch view context. + /// 'repl': evaluate is called from a REPL context. + /// 'hover': evaluate is called to generate the debug hover contents. + /// This value should only be used if the corresponding capability + /// `supportsEvaluateForHovers` is true. + /// 'clipboard': evaluate is called to generate clipboard contents. + /// This value should only be used if the corresponding capability + /// `supportsClipboardContext` is true. + /// 'variables': evaluate is called from a variables view context. + /// etc. + EvaluateContext context = eEvaluateContextUnknown; + + /// Specifies details on how to format the result. + /// The attribute is only honored by a debug adapter if the corresponding + /// capability `supportsValueFormattingOptions` is true. + std::optional format; +}; +bool fromJSON(const llvm::json::Value &, EvaluateArguments &, llvm::json::Path); + +/// Response to 'evaluate' request. +struct EvaluateResponseBody { + /// The result of the evaluate request. + std::string result; + + /// The type of the evaluate result. + /// This attribute should only be returned by a debug adapter if the + /// corresponding capability `supportsVariableType` is true. + std::string type; + + /// Properties of an evaluate result that can be used to determine how to + /// render the result in the UI. + std::optional presentationHint; + + /// If `variablesReference` is > 0, the evaluate result is structured and its + /// children can be retrieved by passing `variablesReference` to the + /// `variables` request as long as execution remains suspended. See 'Lifetime + /// of Object References' in the Overview section for details. + int64_t variablesReference = 0; + + /// The number of named child variables. + /// The client can use this information to present the variables in a paged + /// UI and fetch them in chunks. + /// The value should be less than or equal to 2147483647 (2^31-1). + uint32_t namedVariables = 0; + + /// The number of indexed child variables. + /// The client can use this information to present the variables in a paged + /// UI and fetch them in chunks. + /// The value should be less than or equal to 2147483647 (2^31-1). + uint32_t indexedVariables = 0; + + /// A memory reference to a location appropriate for this result. + /// For pointer type eval results, this is generally a reference to the + /// memory address contained in the pointer. + /// This attribute may be returned by a debug adapter if corresponding + /// capability `supportsMemoryReferences` is true. + std::string memoryReference; + + /// A reference that allows the client to request the location where the + /// returned value is declared. For example, if a function pointer is + /// returned, the adapter may be able to look up the function's location. + /// This should be present only if the adapter is likely to be able to + /// resolve the location. + /// + /// This reference shares the same lifetime as the `variablesReference`. See + /// 'Lifetime of Object References' in the Overview section for details. + uint64_t valueLocationReference = LLDB_DAP_INVALID_VALUE_LOC; +}; +llvm::json::Value toJSON(const EvaluateResponseBody &); + } // namespace lldb_dap::protocol #endif diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h b/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h index 6d85c74377bd3..690a1d684d0e9 100644 --- a/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h +++ b/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h @@ -28,8 +28,9 @@ #include #include -#define LLDB_DAP_INVALID_VARRERF UINT64_MAX +#define LLDB_DAP_INVALID_VARRERF INT64_MAX #define LLDB_DAP_INVALID_SRC_REF 0 +#define LLDB_DAP_INVALID_VALUE_LOC 0 namespace lldb_dap::protocol { diff --git a/lldb/unittests/DAP/ProtocolRequestsTest.cpp b/lldb/unittests/DAP/ProtocolRequestsTest.cpp index 498195dc09325..ba9aef1e5fcc5 100644 --- a/lldb/unittests/DAP/ProtocolRequestsTest.cpp +++ b/lldb/unittests/DAP/ProtocolRequestsTest.cpp @@ -67,3 +67,54 @@ TEST(ProtocolRequestsTest, ExceptionInfoResponseBody) { ASSERT_THAT_EXPECTED(expected_opt, llvm::Succeeded()); EXPECT_EQ(PrettyPrint(*expected_opt), PrettyPrint(body)); } + +TEST(ProtocolRequestsTest, EvaluateArguments) { + llvm::Expected expected = parse(R"({ + "expression": "hello world", + "context": "repl" + })"); + ASSERT_THAT_EXPECTED(expected, llvm::Succeeded()); + EXPECT_EQ(expected->expression, "hello world"); + EXPECT_EQ(expected->context, eEvaluateContextRepl); + + // Check required keys; + EXPECT_THAT_EXPECTED(parse(R"({})"), + FailedWithMessage("missing value at (root).expression")); +} + +TEST(ProtocolRequestsTest, EvaluateResponseBody) { + EvaluateResponseBody body; + body.result = "hello world"; + body.variablesReference = 7; + + // Check required keys. + Expected expected = parse(R"({ + "result": "hello world", + "variablesReference": 7 + })"); + + ASSERT_THAT_EXPECTED(expected, llvm::Succeeded()); + EXPECT_EQ(PrettyPrint(*expected), PrettyPrint(body)); + + // Check optional keys. + body.result = "'abc'"; + body.type = "string"; + body.variablesReference = 42; + body.namedVariables = 1; + body.indexedVariables = 2; + body.memoryReference = "0x123"; + body.valueLocationReference = 22; + + Expected expected_opt = parse(R"({ + "result": "'abc'", + "type": "string", + "variablesReference": 42, + "namedVariables": 1, + "indexedVariables": 2, + "memoryReference": "0x123", + "valueLocationReference": 22 + })"); + + ASSERT_THAT_EXPECTED(expected_opt, llvm::Succeeded()); + EXPECT_EQ(PrettyPrint(*expected_opt), PrettyPrint(body)); +} diff --git a/lldb/unittests/Host/FileTest.cpp b/lldb/unittests/Host/FileTest.cpp index d973d19430596..85697c49f6fce 100644 --- a/lldb/unittests/Host/FileTest.cpp +++ b/lldb/unittests/Host/FileTest.cpp @@ -8,6 +8,7 @@ #include "lldb/Host/File.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FileUtilities.h" #include "llvm/Support/Path.h" @@ -35,7 +36,7 @@ TEST(File, GetWaitableHandleFileno) { FILE *stream = fdopen(fd, "r"); ASSERT_TRUE(stream); - NativeFile file(stream, true); + NativeFile file(stream, File::eOpenOptionReadWrite, true); #ifdef _WIN32 EXPECT_EQ(file.GetWaitableHandle(), (HANDLE)_get_osfhandle(fd)); #else @@ -67,3 +68,22 @@ TEST(File, GetStreamFromDescriptor) { EXPECT_EQ(file.GetWaitableHandle(), (file_t)fd); #endif } + +TEST(File, ReadOnlyModeNotWritable) { + const auto *Info = testing::UnitTest::GetInstance()->current_test_info(); + llvm::SmallString<128> name; + int fd; + llvm::sys::fs::createTemporaryFile(llvm::Twine(Info->test_case_name()) + "-" + + Info->name(), + "test", fd, name); + + llvm::FileRemover remover(name); + ASSERT_GE(fd, 0); + + NativeFile file(fd, File::eOpenOptionReadOnly, true); + ASSERT_TRUE(file.IsValid()); + llvm::StringLiteral buf = "Hello World"; + size_t bytes_written = buf.size(); + Status error = file.Write(buf.data(), bytes_written); + EXPECT_EQ(error.Fail(), true); +} diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake index 6a4610397967a..9bbc75ff7700c 100644 --- a/llvm/cmake/modules/HandleLLVMOptions.cmake +++ b/llvm/cmake/modules/HandleLLVMOptions.cmake @@ -450,13 +450,16 @@ if( LLVM_ENABLE_PIC ) # Enable interprocedural optimizations for non-inline functions which would # otherwise be disabled due to GCC -fPIC's default. # Note: GCC<10.3 has a bug on SystemZ. - # + # Note: Default on AIX is "no semantic interposition". # Note: Clang allows IPO for -fPIC so this optimization is less effective. # Clang 13 has a bug related to -fsanitize-coverage # -fno-semantic-interposition (https://reviews.llvm.org/D117183). - if ((CMAKE_COMPILER_IS_GNUCXX AND - NOT (LLVM_NATIVE_ARCH STREQUAL "SystemZ" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 10.3)) - OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION GREATER_EQUAL 14)) + if ((NOT ("${CMAKE_SYSTEM_NAME}" MATCHES "AIX")) + AND ((CMAKE_COMPILER_IS_GNUCXX AND + NOT (LLVM_NATIVE_ARCH STREQUAL "SystemZ" + AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 10.3)) + OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" + AND CMAKE_CXX_COMPILER_VERSION GREATER_EQUAL 14))) add_flag_if_supported("-fno-semantic-interposition" FNO_SEMANTIC_INTERPOSITION) endif() endif() diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h index 511cb56f73dcb..557dbf8c7ca39 100644 --- a/llvm/include/llvm/CodeGen/SDPatternMatch.h +++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h @@ -903,6 +903,11 @@ template inline BinaryOpc_match m_Srl(const LHS &L, const RHS &R) { return BinaryOpc_match(ISD::SRL, L, R); } +template +inline auto m_ExactSr(const LHS &L, const RHS &R) { + return m_AnyOf(BinaryOpc_match(ISD::SRA, L, R, SDNodeFlags::Exact), + BinaryOpc_match(ISD::SRL, L, R, SDNodeFlags::Exact)); +} template inline BinaryOpc_match m_Rotl(const LHS &L, const RHS &R) { diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h index 7c97afd5a7f5a..59848c582f6d5 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -201,7 +201,7 @@ enum class OMPDynGroupprivateFallbackType : uint64_t { }; // Default OpenMP mapper name suffix. -inline constexpr const char *OmpDefaultMapperName = ".omp.default.mapper"; +inline constexpr const char *OmpDefaultMapperName = "_omp_default_mapper"; /// Values for bit flags used to specify the mapping type for /// offloading. diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h index 9577d0141f168..c91fc254ebe11 100644 --- a/llvm/include/llvm/IR/Intrinsics.h +++ b/llvm/include/llvm/IR/Intrinsics.h @@ -30,6 +30,8 @@ class LLVMContext; class Module; class AttributeList; class AttributeSet; +class raw_ostream; +class Constant; /// This namespace contains an enum with a value for every intrinsic/builtin /// function known by LLVM. The enum values are returned by @@ -81,6 +83,9 @@ namespace Intrinsic { /// Returns true if the intrinsic can be overloaded. LLVM_ABI bool isOverloaded(ID id); + /// Returns true if the intrinsic has pretty printed immediate arguments. + LLVM_ABI bool hasPrettyPrintedArgs(ID id); + /// isTargetIntrinsic - Returns true if IID is an intrinsic specific to a /// certain target. If it is a generic intrinsic false is returned. LLVM_ABI bool isTargetIntrinsic(ID IID); @@ -284,6 +289,10 @@ namespace Intrinsic { /// N. LLVM_ABI Intrinsic::ID getDeinterleaveIntrinsicID(unsigned Factor); + /// Print the argument info for the arguments with ArgInfo. + LLVM_ABI void printImmArg(ID IID, unsigned ArgIdx, raw_ostream &OS, + const Constant *ImmArgVal); + } // namespace Intrinsic } // namespace llvm diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 9413c3a4d5b32..adec819432534 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -142,6 +142,25 @@ class Range : IntrinsicProperty { int Upper = upper; } +// ArgProperty - Base class for argument properties that can be specified in ArgInfo. +class ArgProperty; + +// ArgName - Specifies the name of an argument for pretty-printing. +class ArgName : ArgProperty { + string Name = name; +} + +// ImmArgPrinter - Specifies a custom printer function for immediate arguments. +class ImmArgPrinter : ArgProperty { + string FuncName = funcname; +} + +// ArgInfo - The specified argument has properties defined by a list of ArgProperty objects. +class ArgInfo arg_properties> : IntrinsicProperty { + int ArgNo = idx.Value; + list Properties = arg_properties; +} + def IntrNoReturn : IntrinsicProperty; // Applied by default. diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 21badc2692037..1b485dc8ccd1e 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -2955,7 +2955,14 @@ foreach sp = [0, 1] in { defvar nargs = !size(args); defvar scale_d_imm = ArgIndex; defvar scale_d_imm_range = [ImmArg, Range]; - defvar intrinsic_properties = !listconcat( + + // Check if this is the specific llvm.nvvm.tcgen05.mma.tensor intrinsic. + defvar is_target_intrinsic = !and(!eq(sp, 0), + !eq(space, "tensor"), + !eq(scale_d, 0), + !eq(ashift, 0)); + + defvar base_properties = !listconcat( mma.common_intr_props, !if(!eq(scale_d, 1), scale_d_imm_range, []), [Range, 0, !if(!eq(scale_d, 1), 2, 4)>, // kind @@ -2965,6 +2972,13 @@ foreach sp = [0, 1] in { ] ); + defvar intrinsic_properties = !if(is_target_intrinsic, + !listconcat(base_properties, + [ArgInfo, [ArgName<"kind">, ImmArgPrinter<"printTcgen05MMAKind">]>, + ArgInfo, [ArgName<"cta_group">]>, + ArgInfo, [ArgName<"collector">, ImmArgPrinter<"printTcgen05CollectorUsageOp">]>]), + base_properties); + def mma.record_name: DefaultAttrsIntrinsicFlags<[], args, flags, intrinsic_properties, mma.intr_name>; diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h index d55100e5e709d..d383769043605 100644 --- a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h +++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h @@ -18,8 +18,11 @@ #include #include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsNVPTX.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { namespace nvvm { @@ -659,6 +662,51 @@ inline APFloat::roundingMode GetFMARoundingMode(Intrinsic::ID IntrinsicID) { llvm_unreachable("Invalid FP instrinsic rounding mode for NVVM fma"); } +inline void printTcgen05MMAKind(raw_ostream &OS, const Constant *ImmArgVal) { + if (const auto *CI = dyn_cast(ImmArgVal)) { + uint64_t Val = CI->getZExtValue(); + switch (static_cast(Val)) { + case Tcgen05MMAKind::F16: + OS << "f16"; + return; + case Tcgen05MMAKind::TF32: + OS << "tf32"; + return; + case Tcgen05MMAKind::F8F6F4: + OS << "f8f6f4"; + return; + case Tcgen05MMAKind::I8: + OS << "i8"; + return; + } + } + llvm_unreachable( + "printTcgen05MMAKind called with invalid value for immediate argument"); +} + +inline void printTcgen05CollectorUsageOp(raw_ostream &OS, + const Constant *ImmArgVal) { + if (const auto *CI = dyn_cast(ImmArgVal)) { + uint64_t Val = CI->getZExtValue(); + switch (static_cast(Val)) { + case Tcgen05CollectorUsageOp::DISCARD: + OS << "discard"; + return; + case Tcgen05CollectorUsageOp::LASTUSE: + OS << "lastuse"; + return; + case Tcgen05CollectorUsageOp::FILL: + OS << "fill"; + return; + case Tcgen05CollectorUsageOp::USE: + OS << "use"; + return; + } + } + llvm_unreachable("printTcgen05CollectorUsageOp called with invalid value for " + "immediate argument"); +} + } // namespace nvvm } // namespace llvm #endif // LLVM_IR_NVVMINTRINSICUTILS_H diff --git a/llvm/include/llvm/MC/MCRegisterInfo.h b/llvm/include/llvm/MC/MCRegisterInfo.h index 6e36e580358e7..f4897b6a406fb 100644 --- a/llvm/include/llvm/MC/MCRegisterInfo.h +++ b/llvm/include/llvm/MC/MCRegisterInfo.h @@ -438,7 +438,7 @@ class LLVM_ABI MCRegisterInfo { /// number. Returns -1 if there is no equivalent value. The second /// parameter allows targets to use different numberings for EH info and /// debugging info. - virtual int64_t getDwarfRegNum(MCRegister RegNum, bool isEH) const; + virtual int64_t getDwarfRegNum(MCRegister Reg, bool isEH) const; /// Map a dwarf register back to a target register. Returns std::nullopt if /// there is no mapping. @@ -450,11 +450,11 @@ class LLVM_ABI MCRegisterInfo { /// Map a target register to an equivalent SEH register /// number. Returns LLVM register number if there is no equivalent value. - int getSEHRegNum(MCRegister RegNum) const; + int getSEHRegNum(MCRegister Reg) const; /// Map a target register to an equivalent CodeView register /// number. - int getCodeViewRegNum(MCRegister RegNum) const; + int getCodeViewRegNum(MCRegister Reg) const; regclass_iterator regclass_begin() const { return Classes; } regclass_iterator regclass_end() const { return Classes+NumClasses; } diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 530fa9518f40e..a3e9b039f9225 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -317,9 +317,9 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) { if (InsertElementInst *III = dyn_cast(V)) { // If this is an insert to a variable element, we don't know what it is. - if (!isa(III->getOperand(2))) + uint64_t IIElt; + if (!match(III->getOperand(2), m_ConstantInt(IIElt))) return nullptr; - unsigned IIElt = cast(III->getOperand(2))->getZExtValue(); // If this is an insert to the element we are looking for, return the // inserted value. diff --git a/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp index 32b6c46303828..34531dd7ab17f 100644 --- a/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp +++ b/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp @@ -133,10 +133,6 @@ INITIALIZE_PASS(RegAllocScoring, "regallocscoringpass", // Common ML Advisor declarations // =================================== namespace { -// The model can only accept a specified number of opcodes and will error it if -// fed an opcode it hasn't seen before. This constant sets the current cutoff. -static const int OpcodeValueCutoff = 17716; - // Most features are as described above, so we'll reuse this vector in defining // them. static const std::vector PerLiveRangeShape{1, NumberOfInterferences}; @@ -948,139 +944,6 @@ void MLEvictAdvisor::extractFeatures( #undef SET } -void llvm::extractInstructionFeatures( - SmallVectorImpl &LRPosInfo, MLModelRunner *RegallocRunner, - function_ref GetOpcode, - function_ref GetMBBFreq, - function_ref GetMBBReference, - const int InstructionsIndex, const int InstructionsMappingIndex, - const int MBBFreqIndex, const int MBBMappingIndex, - const SlotIndex LastIndex) { - // This function extracts instruction based features relevant to the eviction - // problem currently being solved. This function ends up extracting two - // tensors. - // 1 - A vector of size max instruction count. It contains the opcodes of the - // instructions spanned by all the intervals in the current instance of the - // eviction problem. - // 2 - A binary mapping matrix of size (LR count * max - // instruction count) which maps where the LRs are live to the actual opcodes - // for which they are live. - // 3 - A vector of size max supported MBB count storing MBB frequencies, - // encompassing all of the MBBs covered by the eviction problem. - // 4 - A vector of size max instruction count of indices to members of the MBB - // frequency vector, mapping each instruction to its associated MBB. - - // Start off by sorting the segments based on the beginning slot index. - std::sort( - LRPosInfo.begin(), LRPosInfo.end(), - [](LRStartEndInfo A, LRStartEndInfo B) { return A.Begin < B.Begin; }); - size_t InstructionIndex = 0; - size_t CurrentSegmentIndex = 0; - SlotIndex CurrentIndex = LRPosInfo[0].Begin; - std::map VisitedMBBs; - size_t CurrentMBBIndex = 0; - // This loop processes all the segments sequentially by starting at the - // beginning slot index of the first segment, iterating through all the slot - // indices before the end slot index of that segment (while checking for - // overlaps with segments that start at greater slot indices). After hitting - // that end index, the current segment being processed gets bumped until they - // are all processed or the max instruction count is hit, where everything is - // just truncated. - while (true) { - // If the index that we are currently at is within the current segment and - // we haven't hit the max instruction count, continue processing the current - // segment. - while (CurrentIndex <= LRPosInfo[CurrentSegmentIndex].End && - InstructionIndex < ModelMaxSupportedInstructionCount) { - int CurrentOpcode = GetOpcode(CurrentIndex); - // If the current machine instruction is null, skip it - if (CurrentOpcode == -1) { - // If we're currently at the last index in the SlotIndex analysis, - // we can't go any further, so return from the function - if (CurrentIndex >= LastIndex) { - return; - } - CurrentIndex = CurrentIndex.getNextIndex(); - continue; - } - MachineBasicBlock *CurrentMBBReference = GetMBBReference(CurrentIndex); - if (VisitedMBBs.count(CurrentMBBReference) == 0) { - VisitedMBBs[CurrentMBBReference] = CurrentMBBIndex; - ++CurrentMBBIndex; - } - extractMBBFrequency(CurrentIndex, InstructionIndex, VisitedMBBs, - GetMBBFreq, CurrentMBBReference, RegallocRunner, - MBBFreqIndex, MBBMappingIndex); - // Current code assumes we're not going to get any disjointed segments - assert(LRPosInfo[CurrentSegmentIndex].Begin <= CurrentIndex); - RegallocRunner->getTensor(InstructionsIndex)[InstructionIndex] = - CurrentOpcode < OpcodeValueCutoff ? CurrentOpcode : 0; - // set value in the binary mapping matrix for the current instruction - auto CurrentSegmentPosition = LRPosInfo[CurrentSegmentIndex].Pos; - RegallocRunner->getTensor( - InstructionsMappingIndex)[CurrentSegmentPosition * - ModelMaxSupportedInstructionCount + - InstructionIndex] = 1; - // All of the segments are sorted based on the beginning slot index, but - // this doesn't mean that the beginning slot index of the next segment is - // after the end segment of the one being currently processed. This while - // loop checks for overlapping segments and modifies the portion of the - // column in the mapping matrix for the currently processed instruction - // for the LR it is checking. Also make sure that the beginning of the - // current segment we're checking for overlap in is less than the current - // index, otherwise we're done checking overlaps. - size_t OverlapCheckCurrentSegment = CurrentSegmentIndex + 1; - while (OverlapCheckCurrentSegment < LRPosInfo.size() && - LRPosInfo[OverlapCheckCurrentSegment].Begin <= CurrentIndex) { - auto OverlapCurrentSegmentPosition = - LRPosInfo[OverlapCheckCurrentSegment].Pos; - if (LRPosInfo[OverlapCheckCurrentSegment].End >= CurrentIndex) { - RegallocRunner->getTensor( - InstructionsMappingIndex)[OverlapCurrentSegmentPosition * - ModelMaxSupportedInstructionCount + - InstructionIndex] = 1; - } - ++OverlapCheckCurrentSegment; - } - ++InstructionIndex; - if (CurrentIndex >= LastIndex) { - return; - } - CurrentIndex = CurrentIndex.getNextIndex(); - } - // if we've just finished processing through the last segment or if we've - // hit the maximum number of instructions, break out of the loop. - if (CurrentSegmentIndex == LRPosInfo.size() - 1 || - InstructionIndex >= ModelMaxSupportedInstructionCount) { - break; - } - // If the segments are not overlapping, we need to move to the beginning - // index of the next segment to avoid having instructions not attached to - // any register. - if (LRPosInfo[CurrentSegmentIndex + 1].Begin > - LRPosInfo[CurrentSegmentIndex].End) { - CurrentIndex = LRPosInfo[CurrentSegmentIndex + 1].Begin; - } - ++CurrentSegmentIndex; - } -} - -void llvm::extractMBBFrequency( - const SlotIndex CurrentIndex, const size_t CurrentInstructionIndex, - std::map &VisitedMBBs, - function_ref GetMBBFreq, - MachineBasicBlock *CurrentMBBReference, MLModelRunner *RegallocRunner, - const int MBBFreqIndex, const int MBBMappingIndex) { - size_t CurrentMBBIndex = VisitedMBBs[CurrentMBBReference]; - float CurrentMBBFreq = GetMBBFreq(CurrentIndex); - if (CurrentMBBIndex < ModelMaxSupportedMBBCount) { - RegallocRunner->getTensor(MBBFreqIndex)[CurrentMBBIndex] = - CurrentMBBFreq; - RegallocRunner->getTensor( - MBBMappingIndex)[CurrentInstructionIndex] = CurrentMBBIndex; - } -} - // Development mode-specific implementations #ifdef LLVM_HAVE_TFLITE diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index ea69d47b234e2..b9c724156631e 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -53,6 +53,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" @@ -4675,12 +4676,38 @@ void AssemblyWriter::printInstruction(const Instruction &I) { Out << ' '; writeOperand(Operand, false); Out << '('; + bool HasPrettyPrintedArgs = + isa(CI) && + Intrinsic::hasPrettyPrintedArgs(CI->getIntrinsicID()); + ListSeparator LS; - for (unsigned op = 0, Eop = CI->arg_size(); op < Eop; ++op) { - Out << LS; - writeParamOperand(CI->getArgOperand(op), PAL.getParamAttrs(op)); + Function *CalledFunc = CI->getCalledFunction(); + auto PrintArgComment = [&](unsigned ArgNo) { + const auto *ConstArg = dyn_cast(CI->getArgOperand(ArgNo)); + if (!ConstArg) + return; + std::string ArgComment; + raw_string_ostream ArgCommentStream(ArgComment); + Intrinsic::ID IID = CalledFunc->getIntrinsicID(); + Intrinsic::printImmArg(IID, ArgNo, ArgCommentStream, ConstArg); + if (ArgComment.empty()) + return; + Out << "/* " << ArgComment << " */ "; + }; + if (HasPrettyPrintedArgs) { + for (unsigned ArgNo = 0, NumArgs = CI->arg_size(); ArgNo < NumArgs; + ++ArgNo) { + Out << LS; + PrintArgComment(ArgNo); + writeParamOperand(CI->getArgOperand(ArgNo), PAL.getParamAttrs(ArgNo)); + } + } else { + for (unsigned ArgNo = 0, NumArgs = CI->arg_size(); ArgNo < NumArgs; + ++ArgNo) { + Out << LS; + writeParamOperand(CI->getArgOperand(ArgNo), PAL.getParamAttrs(ArgNo)); + } } - // Emit an ellipsis if this is a musttail call in a vararg function. This // is only to aid readability, musttail calls forward varargs by default. if (CI->isMustTailCall() && CI->getParent() && @@ -5104,12 +5131,10 @@ void AssemblyWriter::printUseLists(const Function *F) { //===----------------------------------------------------------------------===// void Function::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW, - bool ShouldPreserveUseListOrder, - bool IsForDebug) const { + bool ShouldPreserveUseListOrder, bool IsForDebug) const { SlotTracker SlotTable(this->getParent()); formatted_raw_ostream OS(ROS); - AssemblyWriter W(OS, SlotTable, this->getParent(), AAW, - IsForDebug, + AssemblyWriter W(OS, SlotTable, this->getParent(), AAW, IsForDebug, ShouldPreserveUseListOrder); W.printFunction(this); } diff --git a/llvm/lib/IR/Intrinsics.cpp b/llvm/lib/IR/Intrinsics.cpp index 526800e217399..859689b9cf168 100644 --- a/llvm/lib/IR/Intrinsics.cpp +++ b/llvm/lib/IR/Intrinsics.cpp @@ -32,6 +32,7 @@ #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/IntrinsicsXCore.h" #include "llvm/IR/Module.h" +#include "llvm/IR/NVVMIntrinsicUtils.h" #include "llvm/IR/Type.h" using namespace llvm; @@ -601,6 +602,12 @@ bool Intrinsic::isOverloaded(ID id) { #undef GET_INTRINSIC_OVERLOAD_TABLE } +bool Intrinsic::hasPrettyPrintedArgs(ID id){ +#define GET_INTRINSIC_PRETTY_PRINT_TABLE +#include "llvm/IR/IntrinsicImpl.inc" +#undef GET_INTRINSIC_PRETTY_PRINT_TABLE +} + /// Table of per-target intrinsic name tables. #define GET_INTRINSIC_TARGET_DATA #include "llvm/IR/IntrinsicImpl.inc" @@ -1142,3 +1149,7 @@ Intrinsic::ID Intrinsic::getDeinterleaveIntrinsicID(unsigned Factor) { assert(Factor >= 2 && Factor <= 8 && "Unexpected factor"); return InterleaveIntrinsics[Factor - 2].Deinterleave; } + +#define GET_INTRINSIC_PRETTY_PRINT_ARGUMENTS +#include "llvm/IR/IntrinsicImpl.inc" +#undef GET_INTRINSIC_PRETTY_PRINT_ARGUMENTS diff --git a/llvm/lib/MC/MCInst.cpp b/llvm/lib/MC/MCInst.cpp index 46a6a18e15963..61eeb5e5a5c71 100644 --- a/llvm/lib/MC/MCInst.cpp +++ b/llvm/lib/MC/MCInst.cpp @@ -29,7 +29,7 @@ void MCOperand::print(raw_ostream &OS, const MCContext *Ctx) const { if (Ctx && Ctx->getRegisterInfo()) OS << Ctx->getRegisterInfo()->getName(getReg()); else - OS << getReg(); + OS << getReg().id(); } else if (isImm()) OS << "Imm:" << getImm(); else if (isSFPImm()) diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index 233176ebe2b1f..dc482210d873c 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -46,6 +46,7 @@ #include "llvm/MC/MCSymbolMachO.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/MCValue.h" +#include "llvm/Support/Base64.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -532,6 +533,7 @@ class AsmParser : public MCAsmParser { DK_LTO_SET_CONDITIONAL, DK_CFI_MTE_TAGGED_FRAME, DK_MEMTAG, + DK_BASE64, DK_END }; @@ -554,6 +556,7 @@ class AsmParser : public MCAsmParser { // ".ascii", ".asciz", ".string" bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated); + bool parseDirectiveBase64(); // ".base64" bool parseDirectiveReloc(SMLoc DirectiveLoc); // ".reloc" bool parseDirectiveValue(StringRef IDVal, unsigned Size); // ".byte", ".long", ... @@ -1959,6 +1962,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, case DK_ASCIZ: case DK_STRING: return parseDirectiveAscii(IDVal, true); + case DK_BASE64: + return parseDirectiveBase64(); case DK_BYTE: case DK_DC_B: return parseDirectiveValue(IDVal, 1); @@ -3088,6 +3093,37 @@ bool AsmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) { return parseMany(parseOp); } +/// parseDirectiveBase64: +// ::= .base64 "string" (, "string" )* +bool AsmParser::parseDirectiveBase64() { + auto parseOp = [&]() -> bool { + if (checkForValidSection()) + return true; + + if (getTok().isNot(AsmToken::String)) { + return true; + } + + std::vector Decoded; + std::string const str = getTok().getStringContents().str(); + if (check(str.empty(), "expected nonempty string")) { + return true; + } + + llvm::Error e = decodeBase64(str, Decoded); + if (e) { + consumeError(std::move(e)); + return Error(Lexer.getLoc(), "failed to base64 decode string data"); + } + + getStreamer().emitBytes(std::string(Decoded.begin(), Decoded.end())); + Lex(); + return false; + }; + + return check(parseMany(parseOp), "expected string"); +} + /// parseDirectiveReloc /// ::= .reloc expression , identifier [ , expression ] bool AsmParser::parseDirectiveReloc(SMLoc DirectiveLoc) { @@ -5442,6 +5478,7 @@ void AsmParser::initializeDirectiveKindMap() { DirectiveKindMap[".asciz"] = DK_ASCIZ; DirectiveKindMap[".string"] = DK_STRING; DirectiveKindMap[".byte"] = DK_BYTE; + DirectiveKindMap[".base64"] = DK_BASE64; DirectiveKindMap[".short"] = DK_SHORT; DirectiveKindMap[".value"] = DK_VALUE; DirectiveKindMap[".2byte"] = DK_2BYTE; diff --git a/llvm/lib/MC/MCRegisterInfo.cpp b/llvm/lib/MC/MCRegisterInfo.cpp index 7fd92bf974b95..77fb7332619cd 100644 --- a/llvm/lib/MC/MCRegisterInfo.cpp +++ b/llvm/lib/MC/MCRegisterInfo.cpp @@ -89,7 +89,7 @@ ArrayRef MCRegisterInfo::getCachedAliasesOf(MCRegister R) const { return Aliases; for (MCRegAliasIteratorImpl It(R, this); It.isValid(); ++It) - Aliases.push_back(*It); + Aliases.push_back((*It).id()); sort(Aliases); Aliases.erase(unique(Aliases), Aliases.end()); @@ -141,15 +141,15 @@ unsigned MCRegisterInfo::getSubRegIndex(MCRegister Reg, return 0; } -int64_t MCRegisterInfo::getDwarfRegNum(MCRegister RegNum, bool isEH) const { +int64_t MCRegisterInfo::getDwarfRegNum(MCRegister Reg, bool isEH) const { const DwarfLLVMRegPair *M = isEH ? EHL2DwarfRegs : L2DwarfRegs; unsigned Size = isEH ? EHL2DwarfRegsSize : L2DwarfRegsSize; if (!M) return -1; - DwarfLLVMRegPair Key = { RegNum, 0 }; + DwarfLLVMRegPair Key = {Reg.id(), 0}; const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key); - if (I == M+Size || I->FromReg != RegNum) + if (I == M + Size || I->FromReg != Reg) return -1; // Consumers need to be able to detect -1 and -2, but at various points // the numbers move between unsigned and signed representations, as well as @@ -191,20 +191,21 @@ int64_t MCRegisterInfo::getDwarfRegNumFromDwarfEHRegNum(uint64_t RegNum) const { return RegNum; } -int MCRegisterInfo::getSEHRegNum(MCRegister RegNum) const { - const DenseMap::const_iterator I = L2SEHRegs.find(RegNum); - if (I == L2SEHRegs.end()) return (int)RegNum; +int MCRegisterInfo::getSEHRegNum(MCRegister Reg) const { + const DenseMap::const_iterator I = L2SEHRegs.find(Reg); + if (I == L2SEHRegs.end()) + return (int)Reg.id(); return I->second; } -int MCRegisterInfo::getCodeViewRegNum(MCRegister RegNum) const { +int MCRegisterInfo::getCodeViewRegNum(MCRegister Reg) const { if (L2CVRegs.empty()) report_fatal_error("target does not implement codeview register mapping"); - const DenseMap::const_iterator I = L2CVRegs.find(RegNum); + const DenseMap::const_iterator I = L2CVRegs.find(Reg); if (I == L2CVRegs.end()) - report_fatal_error("unknown codeview register " + (RegNum < getNumRegs() - ? getName(RegNum) - : Twine(RegNum))); + report_fatal_error("unknown codeview register " + (Reg.id() < getNumRegs() + ? getName(Reg) + : Twine(Reg.id()))); return I->second; } diff --git a/llvm/lib/Support/InitLLVM.cpp b/llvm/lib/Support/InitLLVM.cpp index b8fbfd21c4f28..b90f4e0714458 100644 --- a/llvm/lib/Support/InitLLVM.cpp +++ b/llvm/lib/Support/InitLLVM.cpp @@ -32,6 +32,34 @@ #endif #endif +static void RaiseLimits() { +#ifdef _AIX + // AIX has restrictive memory soft-limits out-of-box, so raise them if needed. + auto RaiseLimit = [](int resource) { + struct rlimit r; + getrlimit(resource, &r); + + // Increase the soft limit to the hard limit, if necessary and + // possible. + if (r.rlim_cur != RLIM_INFINITY && r.rlim_cur != r.rlim_max) { + r.rlim_cur = r.rlim_max; + setrlimit(resource, &r); + } + }; + + // Address space size. + RaiseLimit(RLIMIT_AS); + // Heap size. + RaiseLimit(RLIMIT_DATA); + // Stack size. + RaiseLimit(RLIMIT_STACK); +#ifdef RLIMIT_RSS + // Resident set size. + RaiseLimit(RLIMIT_RSS); +#endif +#endif +} + void CleanupStdHandles(void *Cookie) { llvm::raw_ostream *Outs = &llvm::outs(), *Errs = &llvm::errs(); Outs->flush(); @@ -67,6 +95,7 @@ InitLLVM::InitLLVM(int &Argc, const char **&Argv, StackPrinter.emplace(Argc, Argv); sys::PrintStackTraceOnErrorSignal(Argv[0]); install_out_of_memory_new_handler(); + RaiseLimits(); #ifdef __MVS__ diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp index c3869c3fb9a5a..3330b70cdc2e1 100644 --- a/llvm/lib/TableGen/Main.cpp +++ b/llvm/lib/TableGen/Main.cpp @@ -167,12 +167,11 @@ int llvm::TableGenMain(const char *argv0, // Write output to memory. Timer.startBackendTimer("Backend overall"); - SmallString<128> FilenamePrefix(OutputFilename); - sys::path::replace_extension(FilenamePrefix, ""); TableGenOutputFiles OutFiles; unsigned status = 0; // ApplyCallback will return true if it did not apply any callback. In that // case, attempt to apply the MainFn. + StringRef FilenamePrefix(sys::path::stem(OutputFilename)); if (TableGen::Emitter::ApplyCallback(Records, OutFiles, FilenamePrefix)) status = MainFn ? MainFn(OutFiles, Records) : 1; Timer.stopBackendTimer(); @@ -195,7 +194,7 @@ int llvm::TableGenMain(const char *argv0, SmallString<128> Filename(OutputFilename); // TODO: Format using the split-file convention when writing to stdout? if (Filename != "-") { - Filename = FilenamePrefix; + sys::path::replace_extension(Filename, ""); Filename.append(Suffix); } if (int Ret = WriteOutput(Parser, argv0, Filename, Content)) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 35836af3c874b..42567883b2594 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1427,12 +1427,24 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::BITCAST, MVT::v2i16, Custom); setOperationAction(ISD::BITCAST, MVT::v4i8, Custom); - setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, MVT::v2i8, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, MVT::v2i8, Custom); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, MVT::v2i8, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i8, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i8, Custom); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i8, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom); setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom); setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom); - setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom); setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom); setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, MVT::v2i16, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, MVT::v2i16, Custom); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, MVT::v2i16, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i16, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i16, Custom); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i16, Custom); // ADDP custom lowering for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) @@ -6728,8 +6740,34 @@ bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend, return DataVT.isFixedLengthVector() || DataVT.getVectorMinNumElements() > 2; } +/// Helper function to check if a small vector load can be optimized. +static bool isEligibleForSmallVectorLoadOpt(LoadSDNode *LD, + const AArch64Subtarget &Subtarget) { + if (!Subtarget.isNeonAvailable()) + return false; + if (LD->isVolatile()) + return false; + + EVT MemVT = LD->getMemoryVT(); + if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8 && MemVT != MVT::v2i16) + return false; + + Align Alignment = LD->getAlign(); + Align RequiredAlignment = Align(MemVT.getStoreSize().getFixedValue()); + if (Subtarget.requiresStrictAlign() && Alignment < RequiredAlignment) + return false; + + return true; +} + bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { EVT ExtVT = ExtVal.getValueType(); + // Small, illegal vectors can be extended inreg. + if (auto *Load = dyn_cast(ExtVal.getOperand(0))) { + if (ExtVT.isFixedLengthVector() && ExtVT.getStoreSizeInBits() <= 128 && + isEligibleForSmallVectorLoadOpt(Load, *Subtarget)) + return true; + } if (!ExtVT.isScalableVector() && !Subtarget->useSVEForFixedLengthVectors()) return false; @@ -7188,12 +7226,86 @@ SDValue AArch64TargetLowering::LowerStore128(SDValue Op, return Result; } +/// Helper function to optimize loads of extended small vectors. +/// These patterns would otherwise get scalarized into inefficient sequences. +static SDValue tryLowerSmallVectorExtLoad(LoadSDNode *Load, SelectionDAG &DAG) { + const AArch64Subtarget &Subtarget = DAG.getSubtarget(); + if (!isEligibleForSmallVectorLoadOpt(Load, Subtarget)) + return SDValue(); + + EVT MemVT = Load->getMemoryVT(); + EVT ResVT = Load->getValueType(0); + unsigned NumElts = ResVT.getVectorNumElements(); + unsigned DstEltBits = ResVT.getScalarSizeInBits(); + unsigned SrcEltBits = MemVT.getScalarSizeInBits(); + + unsigned ExtOpcode; + switch (Load->getExtensionType()) { + case ISD::EXTLOAD: + case ISD::ZEXTLOAD: + ExtOpcode = ISD::ZERO_EXTEND; + break; + case ISD::SEXTLOAD: + ExtOpcode = ISD::SIGN_EXTEND; + break; + case ISD::NON_EXTLOAD: + return SDValue(); + } + + SDLoc DL(Load); + SDValue Chain = Load->getChain(); + SDValue BasePtr = Load->getBasePtr(); + const MachinePointerInfo &PtrInfo = Load->getPointerInfo(); + Align Alignment = Load->getAlign(); + + // Load the data as an FP scalar to avoid issues with integer loads. + unsigned LoadBits = MemVT.getStoreSizeInBits(); + MVT ScalarLoadType = MVT::getFloatingPointVT(LoadBits); + SDValue ScalarLoad = + DAG.getLoad(ScalarLoadType, DL, Chain, BasePtr, PtrInfo, Alignment); + + MVT ScalarToVecTy = MVT::getVectorVT(ScalarLoadType, 128 / LoadBits); + SDValue ScalarToVec = + DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ScalarToVecTy, ScalarLoad); + MVT BitcastTy = + MVT::getVectorVT(MVT::getIntegerVT(SrcEltBits), 128 / SrcEltBits); + SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, BitcastTy, ScalarToVec); + + SDValue Res = Bitcast; + unsigned CurrentEltBits = Res.getValueType().getScalarSizeInBits(); + unsigned CurrentNumElts = Res.getValueType().getVectorNumElements(); + while (CurrentEltBits < DstEltBits) { + if (Res.getValueSizeInBits() >= 128) { + CurrentNumElts = CurrentNumElts / 2; + MVT ExtractVT = + MVT::getVectorVT(MVT::getIntegerVT(CurrentEltBits), CurrentNumElts); + Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, Res, + DAG.getConstant(0, DL, MVT::i64)); + } + CurrentEltBits = CurrentEltBits * 2; + MVT ExtVT = + MVT::getVectorVT(MVT::getIntegerVT(CurrentEltBits), CurrentNumElts); + Res = DAG.getNode(ExtOpcode, DL, ExtVT, Res); + } + + if (CurrentNumElts != NumElts) { + MVT FinalVT = MVT::getVectorVT(MVT::getIntegerVT(CurrentEltBits), NumElts); + Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FinalVT, Res, + DAG.getConstant(0, DL, MVT::i64)); + } + + return DAG.getMergeValues({Res, ScalarLoad.getValue(1)}, DL); +} + SDValue AArch64TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); LoadSDNode *LoadNode = cast(Op); assert(LoadNode && "Expected custom lowering of a load node"); + if (SDValue Result = tryLowerSmallVectorExtLoad(LoadNode, DAG)) + return Result; + if (LoadNode->getMemoryVT() == MVT::i64x8) { SmallVector Ops; SDValue Base = LoadNode->getBasePtr(); @@ -7212,37 +7324,7 @@ SDValue AArch64TargetLowering::LowerLOAD(SDValue Op, return DAG.getMergeValues({Loaded, Chain}, DL); } - // Custom lowering for extending v4i8 vector loads. - EVT VT = Op->getValueType(0); - assert((VT == MVT::v4i16 || VT == MVT::v4i32) && "Expected v4i16 or v4i32"); - - if (LoadNode->getMemoryVT() != MVT::v4i8) - return SDValue(); - - // Avoid generating unaligned loads. - if (Subtarget->requiresStrictAlign() && LoadNode->getAlign() < Align(4)) - return SDValue(); - - unsigned ExtType; - if (LoadNode->getExtensionType() == ISD::SEXTLOAD) - ExtType = ISD::SIGN_EXTEND; - else if (LoadNode->getExtensionType() == ISD::ZEXTLOAD || - LoadNode->getExtensionType() == ISD::EXTLOAD) - ExtType = ISD::ZERO_EXTEND; - else - return SDValue(); - - SDValue Load = DAG.getLoad(MVT::f32, DL, LoadNode->getChain(), - LoadNode->getBasePtr(), MachinePointerInfo()); - SDValue Chain = Load.getValue(1); - SDValue Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f32, Load); - SDValue BC = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Vec); - SDValue Ext = DAG.getNode(ExtType, DL, MVT::v8i16, BC); - Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Ext, - DAG.getConstant(0, DL, MVT::i64)); - if (VT == MVT::v4i32) - Ext = DAG.getNode(ExtType, DL, MVT::v4i32, Ext); - return DAG.getMergeValues({Ext, Chain}, DL); + return SDValue(); } SDValue AArch64TargetLowering::LowerVECTOR_COMPRESS(SDValue Op, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 221812f1ebc7b..00fe8ee8b9b4d 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1144,6 +1144,28 @@ static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) { return Is.size() <= 2; } +// Check if a COPY instruction is cheap. +static bool isCheapCopy(const MachineInstr &MI, const AArch64RegisterInfo &RI) { + assert(MI.isCopy() && "Expected COPY instruction"); + const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); + + // Cross-bank copies (e.g., between GPR and FPR) are expensive on AArch64, + // typically requiring an FMOV instruction with a 2-6 cycle latency. + auto GetRegClass = [&](Register Reg) -> const TargetRegisterClass * { + if (Reg.isVirtual()) + return MRI.getRegClass(Reg); + if (Reg.isPhysical()) + return RI.getMinimalPhysRegClass(Reg); + return nullptr; + }; + const TargetRegisterClass *DstRC = GetRegClass(MI.getOperand(0).getReg()); + const TargetRegisterClass *SrcRC = GetRegClass(MI.getOperand(1).getReg()); + if (DstRC && SrcRC && !RI.getCommonSubClass(DstRC, SrcRC)) + return false; + + return MI.isAsCheapAsAMove(); +} + // FIXME: this implementation should be micro-architecture dependent, so a // micro-architecture target hook should be introduced here in future. bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { @@ -1157,6 +1179,9 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { default: return MI.isAsCheapAsAMove(); + case TargetOpcode::COPY: + return isCheapCopy(MI, RI); + case AArch64::ADDWrs: case AArch64::ADDXrs: case AArch64::SUBWrs: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 29f8f9bc8b54c..8bfdbb7c5c310 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -358,6 +358,32 @@ bool AMDGPUAsmPrinter::doInitialization(Module &M) { return AsmPrinter::doInitialization(M); } +/// Mimics GCNSubtarget::computeOccupancy for MCExpr. +/// +/// Remove dependency on GCNSubtarget and depend only only the necessary values +/// for said occupancy computation. Should match computeOccupancy implementation +/// without passing \p STM on. +const AMDGPUMCExpr *createOccupancy(unsigned InitOcc, const MCExpr *NumSGPRs, + const MCExpr *NumVGPRs, + unsigned DynamicVGPRBlockSize, + const GCNSubtarget &STM, MCContext &Ctx) { + unsigned MaxWaves = IsaInfo::getMaxWavesPerEU(&STM); + unsigned Granule = IsaInfo::getVGPRAllocGranule(&STM, DynamicVGPRBlockSize); + unsigned TargetTotalNumVGPRs = IsaInfo::getTotalNumVGPRs(&STM); + unsigned Generation = STM.getGeneration(); + + auto CreateExpr = [&Ctx](unsigned Value) { + return MCConstantExpr::create(Value, Ctx); + }; + + return AMDGPUMCExpr::create(AMDGPUMCExpr::AGVK_Occupancy, + {CreateExpr(MaxWaves), CreateExpr(Granule), + CreateExpr(TargetTotalNumVGPRs), + CreateExpr(Generation), CreateExpr(InitOcc), + NumSGPRs, NumVGPRs}, + Ctx); +} + void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) { if (F.isDeclaration() || !AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())) return; @@ -459,7 +485,7 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) { MaxWaves, MFI.getDynamicVGPRBlockSize())}); uint64_t NumSGPRsForWavesPerEU = std::max( {NumSgpr, (uint64_t)1, (uint64_t)STM.getMinNumSGPRs(MaxWaves)}); - const MCExpr *OccupancyExpr = AMDGPUMCExpr::createOccupancy( + const MCExpr *OccupancyExpr = createOccupancy( STM.getOccupancyWithWorkGroupSizes(*MF).second, MCConstantExpr::create(NumSGPRsForWavesPerEU, OutContext), MCConstantExpr::create(NumVGPRsForWavesPerEU, OutContext), @@ -1270,7 +1296,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT, amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT); - ProgInfo.Occupancy = AMDGPUMCExpr::createOccupancy( + ProgInfo.Occupancy = createOccupancy( STM.computeOccupancy(F, ProgInfo.LDSSize).second, ProgInfo.NumSGPRsForWavesPerEU, ProgInfo.NumVGPRsForWavesPerEU, MFI->getDynamicVGPRBlockSize(), STM, Ctx); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 90114e44f1a48..b81a08de383d9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -935,7 +935,7 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, bool hasSALUFloat = ST->hasSALUFloatInsts(); - addRulesForGOpcs({G_FADD}, Standard) + addRulesForGOpcs({G_FADD, G_FMUL}, Standard) .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat) .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat) .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}}) diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp index c27be0250e386..093c85ecabab0 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp @@ -7,9 +7,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPUMCExpr.h" -#include "GCNSubtarget.h" #include "Utils/AMDGPUBaseInfo.h" -#include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -317,30 +315,6 @@ const AMDGPUMCExpr *AMDGPUMCExpr::createTotalNumVGPR(const MCExpr *NumAGPR, return create(AGVK_TotalNumVGPRs, {NumAGPR, NumVGPR}, Ctx); } -/// Mimics GCNSubtarget::computeOccupancy for MCExpr. -/// -/// Remove dependency on GCNSubtarget and depend only only the necessary values -/// for said occupancy computation. Should match computeOccupancy implementation -/// without passing \p STM on. -const AMDGPUMCExpr *AMDGPUMCExpr::createOccupancy( - unsigned InitOcc, const MCExpr *NumSGPRs, const MCExpr *NumVGPRs, - unsigned DynamicVGPRBlockSize, const GCNSubtarget &STM, MCContext &Ctx) { - unsigned MaxWaves = IsaInfo::getMaxWavesPerEU(&STM); - unsigned Granule = IsaInfo::getVGPRAllocGranule(&STM, DynamicVGPRBlockSize); - unsigned TargetTotalNumVGPRs = IsaInfo::getTotalNumVGPRs(&STM); - unsigned Generation = STM.getGeneration(); - - auto CreateExpr = [&Ctx](unsigned Value) { - return MCConstantExpr::create(Value, Ctx); - }; - - return create(AGVK_Occupancy, - {CreateExpr(MaxWaves), CreateExpr(Granule), - CreateExpr(TargetTotalNumVGPRs), CreateExpr(Generation), - CreateExpr(InitOcc), NumSGPRs, NumVGPRs}, - Ctx); -} - const AMDGPUMCExpr *AMDGPUMCExpr::createLit(LitModifier Lit, int64_t Value, MCContext &Ctx) { assert(Lit == LitModifier::Lit || Lit == LitModifier::Lit64); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h index 246a3f88ebce4..bf7b40b1851da 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h @@ -98,11 +98,6 @@ class AMDGPUMCExpr : public MCTargetExpr { return create(VariantKind::AGVK_AlignTo, {Value, Align}, Ctx); } - static const AMDGPUMCExpr * - createOccupancy(unsigned InitOcc, const MCExpr *NumSGPRs, - const MCExpr *NumVGPRs, unsigned DynamicVGPRBlockSize, - const GCNSubtarget &STM, MCContext &Ctx); - static const AMDGPUMCExpr *createLit(LitModifier Lit, int64_t Value, MCContext &Ctx); diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt index 2182039e0eef8..53d565013c4bc 100644 --- a/llvm/lib/Target/PowerPC/CMakeLists.txt +++ b/llvm/lib/Target/PowerPC/CMakeLists.txt @@ -11,6 +11,7 @@ tablegen(LLVM PPCGenFastISel.inc -gen-fast-isel) tablegen(LLVM PPCGenInstrInfo.inc -gen-instr-info) tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM PPCGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM PPCGenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM PPCGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM PPCGenExegesis.inc -gen-exegesis) tablegen(LLVM PPCGenRegisterBank.inc -gen-register-bank) diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp index ea4e597d0fd7d..ca3fe18273ff5 100644 --- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp +++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp @@ -17,6 +17,7 @@ #include "PPCCallingConv.h" #include "PPCISelLowering.h" #include "PPCMachineFunctionInfo.h" +#include "PPCSelectionDAGInfo.h" #include "PPCSubtarget.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/FastISel.h" diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 89165fa8f8fdb..dd537c204cec1 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -16,6 +16,7 @@ #include "PPC.h" #include "PPCISelLowering.h" #include "PPCMachineFunctionInfo.h" +#include "PPCSelectionDAGInfo.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" #include "llvm/ADT/APInt.h" diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index f55336bafd251..220010c4d3d34 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -20,6 +20,7 @@ #include "PPCMachineFunctionInfo.h" #include "PPCPerfectShuffle.h" #include "PPCRegisterInfo.h" +#include "PPCSelectionDAGInfo.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" #include "llvm/ADT/APFloat.h" @@ -1678,190 +1679,6 @@ bool PPCTargetLowering::shallExtractConstSplatVectorElementToStore( return false; } -const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch ((PPCISD::NodeType)Opcode) { - case PPCISD::FIRST_NUMBER: break; - case PPCISD::FSEL: return "PPCISD::FSEL"; - case PPCISD::XSMAXC: return "PPCISD::XSMAXC"; - case PPCISD::XSMINC: return "PPCISD::XSMINC"; - case PPCISD::FCFID: return "PPCISD::FCFID"; - case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; - case PPCISD::FCFIDS: return "PPCISD::FCFIDS"; - case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS"; - case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; - case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; - case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ"; - case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ"; - case PPCISD::FRE: return "PPCISD::FRE"; - case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; - case PPCISD::FTSQRT: - return "PPCISD::FTSQRT"; - case PPCISD::FSQRT: - return "PPCISD::FSQRT"; - case PPCISD::STFIWX: return "PPCISD::STFIWX"; - case PPCISD::VPERM: return "PPCISD::VPERM"; - case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; - case PPCISD::XXSPLTI_SP_TO_DP: - return "PPCISD::XXSPLTI_SP_TO_DP"; - case PPCISD::XXSPLTI32DX: - return "PPCISD::XXSPLTI32DX"; - case PPCISD::VECINSERT: return "PPCISD::VECINSERT"; - case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI"; - case PPCISD::XXPERM: - return "PPCISD::XXPERM"; - case PPCISD::VECSHL: return "PPCISD::VECSHL"; - case PPCISD::VSRQ: - return "PPCISD::VSRQ"; - case PPCISD::CMPB: return "PPCISD::CMPB"; - case PPCISD::Hi: return "PPCISD::Hi"; - case PPCISD::Lo: return "PPCISD::Lo"; - case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; - case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8"; - case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16"; - case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; - case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET"; - case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA"; - case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; - case PPCISD::SRL: return "PPCISD::SRL"; - case PPCISD::SRA: return "PPCISD::SRA"; - case PPCISD::SHL: return "PPCISD::SHL"; - case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE"; - case PPCISD::CALL: return "PPCISD::CALL"; - case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; - case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC"; - case PPCISD::CALL_RM: - return "PPCISD::CALL_RM"; - case PPCISD::CALL_NOP_RM: - return "PPCISD::CALL_NOP_RM"; - case PPCISD::CALL_NOTOC_RM: - return "PPCISD::CALL_NOTOC_RM"; - case PPCISD::MTCTR: return "PPCISD::MTCTR"; - case PPCISD::BCTRL: return "PPCISD::BCTRL"; - case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; - case PPCISD::BCTRL_RM: - return "PPCISD::BCTRL_RM"; - case PPCISD::BCTRL_LOAD_TOC_RM: - return "PPCISD::BCTRL_LOAD_TOC_RM"; - case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE"; - case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE"; - case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; - case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; - case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; - case PPCISD::MFVSR: return "PPCISD::MFVSR"; - case PPCISD::MTVSRA: return "PPCISD::MTVSRA"; - case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ"; - case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP"; - case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP"; - case PPCISD::SCALAR_TO_VECTOR_PERMUTED: - return "PPCISD::SCALAR_TO_VECTOR_PERMUTED"; - case PPCISD::ANDI_rec_1_EQ_BIT: - return "PPCISD::ANDI_rec_1_EQ_BIT"; - case PPCISD::ANDI_rec_1_GT_BIT: - return "PPCISD::ANDI_rec_1_GT_BIT"; - case PPCISD::VCMP: return "PPCISD::VCMP"; - case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec"; - case PPCISD::LBRX: return "PPCISD::LBRX"; - case PPCISD::STBRX: return "PPCISD::STBRX"; - case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; - case PPCISD::LFIWZX: return "PPCISD::LFIWZX"; - case PPCISD::LXSIZX: return "PPCISD::LXSIZX"; - case PPCISD::STXSIX: return "PPCISD::STXSIX"; - case PPCISD::VEXTS: return "PPCISD::VEXTS"; - case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; - case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; - case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE"; - case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE"; - case PPCISD::ST_VSR_SCAL_INT: - return "PPCISD::ST_VSR_SCAL_INT"; - case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; - case PPCISD::BDNZ: return "PPCISD::BDNZ"; - case PPCISD::BDZ: return "PPCISD::BDZ"; - case PPCISD::MFFS: return "PPCISD::MFFS"; - case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; - case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; - case PPCISD::CR6SET: return "PPCISD::CR6SET"; - case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; - case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; - case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT"; - case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; - case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; - case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; - case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; - case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; - case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; - case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX"; - case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER"; - case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR"; - case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX"; - case PPCISD::TLSLD_AIX: return "PPCISD::TLSLD_AIX"; - case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; - case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; - case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; - case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR"; - case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; - case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; - case PPCISD::PADDI_DTPREL: - return "PPCISD::PADDI_DTPREL"; - case PPCISD::VADD_SPLAT: - return "PPCISD::VADD_SPLAT"; - case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD"; - case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN"; - case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128"; - case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64"; - case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE"; - case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI"; - case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH"; - case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF"; - case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR"; - case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR: - return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR"; - case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR: - return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR"; - case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD"; - case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD"; - case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG"; - case PPCISD::XXMFACC: return "PPCISD::XXMFACC"; - case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT"; - case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT"; - case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT"; - case PPCISD::FNMSUB: return "PPCISD::FNMSUB"; - case PPCISD::STRICT_FADDRTZ: - return "PPCISD::STRICT_FADDRTZ"; - case PPCISD::STRICT_FCTIDZ: - return "PPCISD::STRICT_FCTIDZ"; - case PPCISD::STRICT_FCTIWZ: - return "PPCISD::STRICT_FCTIWZ"; - case PPCISD::STRICT_FCTIDUZ: - return "PPCISD::STRICT_FCTIDUZ"; - case PPCISD::STRICT_FCTIWUZ: - return "PPCISD::STRICT_FCTIWUZ"; - case PPCISD::STRICT_FCFID: - return "PPCISD::STRICT_FCFID"; - case PPCISD::STRICT_FCFIDU: - return "PPCISD::STRICT_FCFIDU"; - case PPCISD::STRICT_FCFIDS: - return "PPCISD::STRICT_FCFIDS"; - case PPCISD::STRICT_FCFIDUS: - return "PPCISD::STRICT_FCFIDUS"; - case PPCISD::LXVRZX: return "PPCISD::LXVRZX"; - case PPCISD::STORE_COND: - return "PPCISD::STORE_COND"; - case PPCISD::SETBC: - return "PPCISD::SETBC"; - case PPCISD::SETBCR: - return "PPCISD::SETBCR"; - case PPCISD::ADDC: - return "PPCISD::ADDC"; - case PPCISD::ADDE: - return "PPCISD::ADDE"; - case PPCISD::SUBC: - return "PPCISD::SUBC"; - case PPCISD::SUBE: - return "PPCISD::SUBE"; - } - return nullptr; -} - EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C, EVT VT) const { if (!VT.isVector()) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index d967018982734..680b529b4e2e5 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -34,580 +34,6 @@ namespace llvm { - namespace PPCISD { - - // When adding a NEW PPCISD node please add it to the correct position in - // the enum. The order of elements in this enum matters! - // Values that are added between FIRST_MEMORY_OPCODE and LAST_MEMORY_OPCODE - // are considered memory opcodes and are treated differently than other - // entries. - enum NodeType : unsigned { - // Start the numbering where the builtin ops and target ops leave off. - FIRST_NUMBER = ISD::BUILTIN_OP_END, - - /// FSEL - Traditional three-operand fsel node. - /// - FSEL, - - /// XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions. - XSMAXC, - XSMINC, - - /// FCFID - The FCFID instruction, taking an f64 operand and producing - /// and f64 value containing the FP representation of the integer that - /// was temporarily in the f64 operand. - FCFID, - - /// Newer FCFID[US] integer-to-floating-point conversion instructions for - /// unsigned integers and single-precision outputs. - FCFIDU, - FCFIDS, - FCFIDUS, - - /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 - /// operand, producing an f64 value containing the integer representation - /// of that FP value. - FCTIDZ, - FCTIWZ, - - /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for - /// unsigned integers with round toward zero. - FCTIDUZ, - FCTIWUZ, - - /// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in - /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer. - VEXTS, - - /// Reciprocal estimate instructions (unary FP ops). - FRE, - FRSQRTE, - - /// Test instruction for software square root. - FTSQRT, - - /// Square root instruction. - FSQRT, - - /// VPERM - The PPC VPERM Instruction. - /// - VPERM, - - /// XXSPLT - The PPC VSX splat instructions - /// - XXSPLT, - - /// XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for - /// converting immediate single precision numbers to double precision - /// vector or scalar. - XXSPLTI_SP_TO_DP, - - /// XXSPLTI32DX - The PPC XXSPLTI32DX instruction. - /// - XXSPLTI32DX, - - /// VECINSERT - The PPC vector insert instruction - /// - VECINSERT, - - /// VECSHL - The PPC vector shift left instruction - /// - VECSHL, - - /// XXPERMDI - The PPC XXPERMDI instruction - /// - XXPERMDI, - XXPERM, - - /// The CMPB instruction (takes two operands of i32 or i64). - CMPB, - - /// Hi/Lo - These represent the high and low 16-bit parts of a global - /// address respectively. These nodes have two operands, the first of - /// which must be a TargetGlobalAddress, and the second of which must be a - /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C', - /// though these are usually folded into other nodes. - Hi, - Lo, - - /// The following two target-specific nodes are used for calls through - /// function pointers in the 64-bit SVR4 ABI. - - /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX) - /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to - /// compute an allocation on the stack. - DYNALLOC, - - /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to - /// compute an offset from native SP to the address of the most recent - /// dynamic alloca. - DYNAREAOFFSET, - - /// To avoid stack clash, allocation is performed by block and each block is - /// probed. - PROBED_ALLOCA, - - /// The result of the mflr at function entry, used for PIC code. - GlobalBaseReg, - - /// These nodes represent PPC shifts. - /// - /// For scalar types, only the last `n + 1` bits of the shift amounts - /// are used, where n is log2(sizeof(element) * 8). See sld/slw, etc. - /// for exact behaviors. - /// - /// For vector types, only the last n bits are used. See vsld. - SRL, - SRA, - SHL, - - /// These nodes represent PPC arithmetic operations with carry. - ADDC, - ADDE, - SUBC, - SUBE, - - /// FNMSUB - Negated multiply-subtract instruction. - FNMSUB, - - /// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign - /// word and shift left immediate. - EXTSWSLI, - - /// The combination of sra[wd]i and addze used to implemented signed - /// integer division by a power of 2. The first operand is the dividend, - /// and the second is the constant shift amount (representing the - /// divisor). - SRA_ADDZE, - - /// CALL - A direct function call. - /// CALL_NOP is a call with the special NOP which follows 64-bit - /// CALL_NOTOC the caller does not use the TOC. - /// SVR4 calls and 32-bit/64-bit AIX calls. - CALL, - CALL_NOP, - CALL_NOTOC, - - /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a - /// MTCTR instruction. - MTCTR, - - /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a - /// BCTRL instruction. - BCTRL, - - /// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl - /// instruction and the TOC reload required on 64-bit ELF, 32-bit AIX - /// and 64-bit AIX. - BCTRL_LOAD_TOC, - - /// The variants that implicitly define rounding mode for calls with - /// strictfp semantics. - CALL_RM, - CALL_NOP_RM, - CALL_NOTOC_RM, - BCTRL_RM, - BCTRL_LOAD_TOC_RM, - - /// Return with a glue operand, matched by 'blr' - RET_GLUE, - - /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction. - /// This copies the bits corresponding to the specified CRREG into the - /// resultant GPR. Bits corresponding to other CR regs are undefined. - MFOCRF, - - /// Direct move from a VSX register to a GPR - MFVSR, - - /// Direct move from a GPR to a VSX register (algebraic) - MTVSRA, - - /// Direct move from a GPR to a VSX register (zero) - MTVSRZ, - - /// Direct move of 2 consecutive GPR to a VSX register. - BUILD_FP128, - - /// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and - /// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is - /// unsupported for this target. - /// Merge 2 GPRs to a single SPE register. - BUILD_SPE64, - - /// Extract SPE register component, second argument is high or low. - EXTRACT_SPE, - - /// Extract a subvector from signed integer vector and convert to FP. - /// It is primarily used to convert a (widened) illegal integer vector - /// type to a legal floating point vector type. - /// For example v2i32 -> widened to v4i32 -> v2f64 - SINT_VEC_TO_FP, - - /// Extract a subvector from unsigned integer vector and convert to FP. - /// As with SINT_VEC_TO_FP, used for converting illegal types. - UINT_VEC_TO_FP, - - /// PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to - /// place the value into the least significant element of the most - /// significant doubleword in the vector. This is not element zero for - /// anything smaller than a doubleword on either endianness. This node has - /// the same semantics as SCALAR_TO_VECTOR except that the value remains in - /// the aforementioned location in the vector register. - SCALAR_TO_VECTOR_PERMUTED, - - // FIXME: Remove these once the ANDI glue bug is fixed: - /// i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the - /// eq or gt bit of CR0 after executing andi. x, 1. This is used to - /// implement truncation of i32 or i64 to i1. - ANDI_rec_1_EQ_BIT, - ANDI_rec_1_GT_BIT, - - // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit - // target (returns (Lo, Hi)). It takes a chain operand. - READ_TIME_BASE, - - // EH_SJLJ_SETJMP - SjLj exception handling setjmp. - EH_SJLJ_SETJMP, - - // EH_SJLJ_LONGJMP - SjLj exception handling longjmp. - EH_SJLJ_LONGJMP, - - /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* - /// instructions. For lack of better number, we use the opcode number - /// encoding for the OPC field to identify the compare. For example, 838 - /// is VCMPGTSH. - VCMP, - - /// RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the - /// altivec VCMP*_rec instructions. For lack of better number, we use the - /// opcode number encoding for the OPC field to identify the compare. For - /// example, 838 is VCMPGTSH. - VCMP_rec, - - /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This - /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the - /// condition register to branch on, OPC is the branch opcode to use (e.g. - /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is - /// an optional input flag argument. - COND_BRANCH, - - /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based - /// loops. - BDNZ, - BDZ, - - /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding - /// towards zero. Used only as part of the long double-to-int - /// conversion sequence. - FADDRTZ, - - /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register. - MFFS, - - /// TC_RETURN - A tail call return. - /// operand #0 chain - /// operand #1 callee (register or absolute) - /// operand #2 stack adjustment - /// operand #3 optional in flag - TC_RETURN, - - /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls - CR6SET, - CR6UNSET, - - /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS - /// for non-position independent code on PPC32. - PPC32_GOT, - - /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and - /// local dynamic TLS and position indendepent code on PPC32. - PPC32_PICGOT, - - /// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec - /// TLS model, produces an ADDIS8 instruction that adds the GOT - /// base to sym\@got\@tprel\@ha. - ADDIS_GOT_TPREL_HA, - - /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec - /// TLS model, produces a LD instruction with base register G8RReg - /// and offset sym\@got\@tprel\@l. This completes the addition that - /// finds the offset of "sym" relative to the thread pointer. - LD_GOT_TPREL_L, - - /// G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec - /// and local-exec TLS models, produces an ADD instruction that adds - /// the contents of G8RReg to the thread pointer. Symbol contains a - /// relocation sym\@tls which is to be replaced by the thread pointer - /// and identifies to the linker that the instruction is part of a - /// TLS sequence. - ADD_TLS, - - /// G8RC = ADDIS_TLSGD_HA %x2, Symbol - For the general-dynamic TLS - /// model, produces an ADDIS8 instruction that adds the GOT base - /// register to sym\@got\@tlsgd\@ha. - ADDIS_TLSGD_HA, - - /// %x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS - /// model, produces an ADDI8 instruction that adds G8RReg to - /// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by - /// ADDIS_TLSGD_L_ADDR until after register assignment. - ADDI_TLSGD_L, - - /// %x3 = GET_TLS_ADDR %x3, Symbol - For the general-dynamic TLS - /// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by - /// ADDIS_TLSGD_L_ADDR until after register assignment. - GET_TLS_ADDR, - - /// %x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on - /// 32-bit AIX, produces a call to .__get_tpointer to retrieve the thread - /// pointer. At the end of the call, the thread pointer is found in R3. - GET_TPOINTER, - - /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that - /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following - /// register assignment. - ADDI_TLSGD_L_ADDR, - - /// GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY - /// G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY - /// Op that combines two register copies of TOC entries - /// (region handle into R3 and variable offset into R4) followed by a - /// GET_TLS_ADDR node which will be expanded to a call to .__tls_get_addr. - /// This node is used in 64-bit mode as well (in which case the result is - /// G8RC and inputs are X3/X4). - TLSGD_AIX, - - /// %x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model, - /// produces a call to .__tls_get_mod(_$TLSML\@ml). - GET_TLS_MOD_AIX, - - /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle) - /// Op that requires a single input of the module handle TOC entry in R3, - /// and generates a GET_TLS_MOD_AIX node which will be expanded into a call - /// to .__tls_get_mod. This node is used in both 32-bit and 64-bit modes. - /// The only difference is the register class. - TLSLD_AIX, - - /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS - /// model, produces an ADDIS8 instruction that adds the GOT base - /// register to sym\@got\@tlsld\@ha. - ADDIS_TLSLD_HA, - - /// %x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS - /// model, produces an ADDI8 instruction that adds G8RReg to - /// sym\@got\@tlsld\@l and stores the result in X3. Hidden by - /// ADDIS_TLSLD_L_ADDR until after register assignment. - ADDI_TLSLD_L, - - /// %x3 = GET_TLSLD_ADDR %x3, Symbol - For the local-dynamic TLS - /// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by - /// ADDIS_TLSLD_L_ADDR until after register assignment. - GET_TLSLD_ADDR, - - /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that - /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion - /// following register assignment. - ADDI_TLSLD_L_ADDR, - - /// G8RC = ADDIS_DTPREL_HA %x3, Symbol - For the local-dynamic TLS - /// model, produces an ADDIS8 instruction that adds X3 to - /// sym\@dtprel\@ha. - ADDIS_DTPREL_HA, - - /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS - /// model, produces an ADDI8 instruction that adds G8RReg to - /// sym\@got\@dtprel\@l. - ADDI_DTPREL_L, - - /// G8RC = PADDI_DTPREL %x3, Symbol - For the pc-rel based local-dynamic TLS - /// model, produces a PADDI8 instruction that adds X3 to sym\@dtprel. - PADDI_DTPREL, - - /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded - /// during instruction selection to optimize a BUILD_VECTOR into - /// operations on splats. This is necessary to avoid losing these - /// optimizations due to constant folding. - VADD_SPLAT, - - /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little - /// endian. Maps to an xxswapd instruction that corrects an lxvd2x - /// or stxvd2x instruction. The chain is necessary because the - /// sequence replaces a load and needs to provide the same number - /// of outputs. - XXSWAPD, - - /// An SDNode for swaps that are not associated with any loads/stores - /// and thereby have no chain. - SWAP_NO_CHAIN, - - /// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or - /// lower (IDX=1) half of v4f32 to v2f64. - FP_EXTEND_HALF, - - /// MAT_PCREL_ADDR = Materialize a PC Relative address. This can be done - /// either through an add like PADDI or through a PC Relative load like - /// PLD. - MAT_PCREL_ADDR, - - /// TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for - /// TLS global address when using dynamic access models. This can be done - /// through an add like PADDI. - TLS_DYNAMIC_MAT_PCREL_ADDR, - - /// TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address - /// when using local exec access models, and when prefixed instructions are - /// available. This is used with ADD_TLS to produce an add like PADDI. - TLS_LOCAL_EXEC_MAT_ADDR, - - /// ACC_BUILD = Build an accumulator register from 4 VSX registers. - ACC_BUILD, - - /// PAIR_BUILD = Build a vector pair register from 2 VSX registers. - PAIR_BUILD, - - /// EXTRACT_VSX_REG = Extract one of the underlying vsx registers of - /// an accumulator or pair register. This node is needed because - /// EXTRACT_SUBVECTOR expects the input and output vectors to have the same - /// element type. - EXTRACT_VSX_REG, - - /// XXMFACC = This corresponds to the xxmfacc instruction. - XXMFACC, - - // Constrained conversion from floating point to int - FIRST_STRICTFP_OPCODE, - STRICT_FCTIDZ = FIRST_STRICTFP_OPCODE, - STRICT_FCTIWZ, - STRICT_FCTIDUZ, - STRICT_FCTIWUZ, - - /// Constrained integer-to-floating-point conversion instructions. - STRICT_FCFID, - STRICT_FCFIDU, - STRICT_FCFIDS, - STRICT_FCFIDUS, - - /// Constrained floating point add in round-to-zero mode. - STRICT_FADDRTZ, - LAST_STRICTFP_OPCODE = STRICT_FADDRTZ, - - /// SETBC - The ISA 3.1 (P10) SETBC instruction. - SETBC, - - /// SETBCR - The ISA 3.1 (P10) SETBCR instruction. - SETBCR, - - /// VSRQ - The ISA 3.1 (P10) Vector Shift right quadword instruction - VSRQ, - - // NOTE: The nodes below may require PC-Rel specific patterns if the - // address could be PC-Relative. When adding new nodes below, consider - // whether or not the address can be PC-Relative and add the corresponding - // PC-relative patterns and tests. - - /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a - /// byte-swapping store instruction. It byte-swaps the low "Type" bits of - /// the GPRC input, then stores it through Ptr. Type can be either i16 or - /// i32. - FIRST_MEMORY_OPCODE, - STBRX = FIRST_MEMORY_OPCODE, - - /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a - /// byte-swapping load instruction. It loads "Type" bits, byte swaps it, - /// then puts it in the bottom bits of the GPRC. TYPE can be either i16 - /// or i32. - LBRX, - - /// STFIWX - The STFIWX instruction. The first operand is an input token - /// chain, then an f64 value to store, then an address to store it to. - STFIWX, - - /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point - /// load which sign-extends from a 32-bit integer value into the - /// destination 64-bit register. - LFIWAX, - - /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point - /// load which zero-extends from a 32-bit integer value into the - /// destination 64-bit register. - LFIWZX, - - /// GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an - /// integer smaller than 64 bits into a VSR. The integer is zero-extended. - /// This can be used for converting loaded integers to floating point. - LXSIZX, - - /// STXSIX - The STXSI[bh]X instruction. The first operand is an input - /// chain, then an f64 value to store, then an address to store it to, - /// followed by a byte-width for the store. - STXSIX, - - /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian. - /// Maps directly to an lxvd2x instruction that will be followed by - /// an xxswapd. - LXVD2X, - - /// LXVRZX - Load VSX Vector Rightmost and Zero Extend - /// This node represents v1i128 BUILD_VECTOR of a zero extending load - /// instruction from to i128. - /// Allows utilization of the Load VSX Vector Rightmost Instructions. - LXVRZX, - - /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian. - /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on - /// the vector type to load vector in big-endian element order. - LOAD_VEC_BE, - - /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a - /// v2f32 value into the lower half of a VSR register. - LD_VSX_LH, - - /// VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory - /// instructions such as LXVDSX, LXVWSX. - LD_SPLAT, - - /// VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory - /// that zero-extends. - ZEXT_LD_SPLAT, - - /// VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory - /// that sign-extends. - SEXT_LD_SPLAT, - - /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian. - /// Maps directly to an stxvd2x instruction that will be preceded by - /// an xxswapd. - STXVD2X, - - /// CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian. - /// Maps directly to one of stxvd2x/stxvw4x/stxvh8x/stxvb16x depending on - /// the vector type to store vector in big-endian element order. - STORE_VEC_BE, - - /// Store scalar integers from VSR. - ST_VSR_SCAL_INT, - - /// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes - /// except they ensure that the compare input is zero-extended for - /// sub-word versions because the atomic loads zero-extend. - ATOMIC_CMP_SWAP_8, - ATOMIC_CMP_SWAP_16, - - /// CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr - /// The store conditional instruction ST[BHWD]ARX that produces a glue - /// result to attach it to a conditional branch. - STORE_COND, - - /// GPRC = TOC_ENTRY GA, TOC - /// Loads the entry for GA from the TOC, where the TOC base is given by - /// the last operand. - TOC_ENTRY, - LAST_MEMORY_OPCODE = TOC_ENTRY, - }; - - } // end namespace PPCISD - /// Define some predicates that are used for node matching. namespace PPC { @@ -752,10 +178,6 @@ namespace llvm { explicit PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI); - /// getTargetNodeName() - This method returns the name of a target specific - /// DAG node. - const char *getTargetNodeName(unsigned Opcode) const override; - bool isSelectSupported(SelectSupportKind Kind) const override { // PowerPC does not support scalar condition selects on vectors. return (Kind != SelectSupportKind::ScalarCondVectorVal); diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index f3998113ddd52..3ecc58c04e378 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -149,28 +149,49 @@ def SDT_PPCBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, [ def PPCfre : SDNode<"PPCISD::FRE", SDTFPUnaryOp, []>; def PPCfrsqrte: SDNode<"PPCISD::FRSQRTE", SDTFPUnaryOp, []>; + +// Square root instruction. def PPCfsqrt : SDNode<"PPCISD::FSQRT", SDTFPUnaryOp, []>; + +// Test instruction for software square root. def PPCftsqrt : SDNode<"PPCISD::FTSQRT", SDT_PPCFtsqrt,[]>; +// FCFID - The FCFID instruction, taking an f64 operand and producing +// and f64 value containing the FP representation of the integer that +// was temporarily in the f64 operand. def PPCfcfid : SDNode<"PPCISD::FCFID", SDTFPUnaryOp, []>; + +// Newer FCFID[US] integer-to-floating-point conversion instructions for +// unsigned integers and single-precision outputs. def PPCfcfidu : SDNode<"PPCISD::FCFIDU", SDTFPUnaryOp, []>; def PPCfcfids : SDNode<"PPCISD::FCFIDS", SDTFPRoundOp, []>; def PPCfcfidus: SDNode<"PPCISD::FCFIDUS", SDTFPRoundOp, []>; + +// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 +// operand, producing an f64 value containing the integer representation +// of that FP value. def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>; def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>; + +// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for +// unsigned integers with round toward zero. def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>; def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>; +// VSRQ - The ISA 3.1 (P10) Vector Shift right quadword instruction def PPCvsrq: SDNode<"PPCISD::VSRQ", SDT_PPCVecShiftQuad, []>; -def PPCstrict_fcfid : SDNode<"PPCISD::STRICT_FCFID", - SDTFPUnaryOp, [SDNPHasChain]>; -def PPCstrict_fcfidu : SDNode<"PPCISD::STRICT_FCFIDU", - SDTFPUnaryOp, [SDNPHasChain]>; -def PPCstrict_fcfids : SDNode<"PPCISD::STRICT_FCFIDS", - SDTFPRoundOp, [SDNPHasChain]>; -def PPCstrict_fcfidus : SDNode<"PPCISD::STRICT_FCFIDUS", - SDTFPRoundOp, [SDNPHasChain]>; +// Constrained integer-to-floating-point conversion instructions. +let IsStrictFP = true in { + def PPCstrict_fcfid : SDNode<"PPCISD::STRICT_FCFID", + SDTFPUnaryOp, [SDNPHasChain]>; + def PPCstrict_fcfidu : SDNode<"PPCISD::STRICT_FCFIDU", + SDTFPUnaryOp, [SDNPHasChain]>; + def PPCstrict_fcfids : SDNode<"PPCISD::STRICT_FCFIDS", + SDTFPRoundOp, [SDNPHasChain]>; + def PPCstrict_fcfidus : SDNode<"PPCISD::STRICT_FCFIDUS", + SDTFPRoundOp, [SDNPHasChain]>; +} def PPCany_fcfid : PatFrags<(ops node:$op), [(PPCfcfid node:$op), @@ -185,28 +206,56 @@ def PPCany_fcfidus : PatFrags<(ops node:$op), [(PPCfcfidus node:$op), (PPCstrict_fcfidus node:$op)]>; +// Store scalar integers from VSR. def PPCstore_scal_int_from_vsr: SDNode<"PPCISD::ST_VSR_SCAL_INT", SDT_PPCstore_scal_int_from_vsr, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +// STFIWX - The STFIWX instruction. The first operand is an input token +// chain, then an f64 value to store, then an address to store it to. def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point +// load which sign-extends from a 32-bit integer value into the +// destination 64-bit register. def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwx, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point +// load which zero-extends from a 32-bit integer value into the +// destination 64-bit register. def PPClfiwzx : SDNode<"PPCISD::LFIWZX", SDT_PPClfiwx, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +// GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an +// integer smaller than 64 bits into a VSR. The integer is zero-extended. +// This can be used for converting loaded integers to floating point. def PPClxsizx : SDNode<"PPCISD::LXSIZX", SDT_PPCLxsizx, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +// STXSIX - The STXSI[bh]X instruction. The first operand is an input +// chain, then an f64 value to store, then an address to store it to, +// followed by a byte-width for the store. def PPCstxsix : SDNode<"PPCISD::STXSIX", SDT_PPCstxsix, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in +// VSFRC that is sign-extended from ByteWidth to a 64-byte integer. def PPCVexts : SDNode<"PPCISD::VEXTS", SDT_PPCVexts, []>; -// Extract FPSCR (not modeled at the DAG level). +// F8RC = MFFS - This moves the FPSCR (not modeled) into the register. def PPCmffs : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, [SDNPHasChain]>; -// Perform FADD in round-to-zero mode. +// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding +// towards zero. Used only as part of the long double-to-int +// conversion sequence. def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>; + +// Constrained floating point add in round-to-zero mode. +let IsStrictFP = true in def PPCstrict_faddrtz: SDNode<"PPCISD::STRICT_FADDRTZ", SDTFPBinOp, [SDNPHasChain]>; @@ -214,72 +263,194 @@ def PPCany_faddrtz: PatFrags<(ops node:$lhs, node:$rhs), [(PPCfaddrtz node:$lhs, node:$rhs), (PPCstrict_faddrtz node:$lhs, node:$rhs)]>; +// FSEL - Traditional three-operand fsel node. def PPCfsel : SDNode<"PPCISD::FSEL", // Type constraint for fsel. SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisFP<0>, SDTCisVT<1, f64>]>, []>; + +// XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions. def PPCxsmaxc : SDNode<"PPCISD::XSMAXC", SDT_PPCFPMinMax, []>; def PPCxsminc : SDNode<"PPCISD::XSMINC", SDT_PPCFPMinMax, []>; + +// Hi/Lo - These represent the high and low 16-bit parts of a global +// address respectively. These nodes have two operands, the first of +// which must be a TargetGlobalAddress, and the second of which must be a +// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C', +// though these are usually folded into other nodes. def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>; def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>; + +// GPRC = TOC_ENTRY GA, TOC +// Loads the entry for GA from the TOC, where the TOC base is given by +// the last operand. def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad, SDNPMemOperand]>; +// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS +// for non-position independent code on PPC32. def PPCppc32GOT : SDNode<"PPCISD::PPC32_GOT", SDTIntLeaf, []>; +// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec +// TLS model, produces an ADDIS8 instruction that adds the GOT +// base to sym\@got\@tprel\@ha. def PPCaddisGotTprelHA : SDNode<"PPCISD::ADDIS_GOT_TPREL_HA", SDTIntBinOp>; + +// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec +// TLS model, produces a LD instruction with base register G8RReg +// and offset sym\@got\@tprel\@l. This completes the addition that +// finds the offset of "sym" relative to the thread pointer. def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp, [SDNPMayLoad]>; + +// G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec +// and local-exec TLS models, produces an ADD instruction that adds +// the contents of G8RReg to the thread pointer. Symbol contains a +// relocation sym\@tls which is to be replaced by the thread pointer +// and identifies to the linker that the instruction is part of a +// TLS sequence. def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>; + +// G8RC = ADDIS_TLSGD_HA %x2, Symbol - For the general-dynamic TLS +// model, produces an ADDIS8 instruction that adds the GOT base +// register to sym\@got\@tlsgd\@ha. def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>; + +// %x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS +// model, produces an ADDI8 instruction that adds G8RReg to +// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by +// ADDIS_TLSGD_L_ADDR until after register assignment. def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>; + +// %x3 = GET_TLS_ADDR %x3, Symbol - For the general-dynamic TLS +// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by +// ADDIS_TLSGD_L_ADDR until after register assignment. def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>; + +// %x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model, +// produces a call to .__tls_get_mod(_$TLSML\@ml). def PPCgetTlsMod : SDNode<"PPCISD::GET_TLS_MOD_AIX", SDTIntUnaryOp>; + +// %x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on +// 32-bit AIX, produces a call to .__get_tpointer to retrieve the thread +// pointer. At the end of the call, the thread pointer is found in R3. def PPCgetTpointer : SDNode<"PPCISD::GET_TPOINTER", SDTIntLeaf, []>; + +// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that +// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following +// register assignment. def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR", SDTypeProfile<1, 3, [ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>; + +// GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY +// G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY +// Op that combines two register copies of TOC entries +// (region handle into R3 and variable offset into R4) followed by a +// GET_TLS_ADDR node which will be expanded to a call to .__tls_get_addr. +// This node is used in 64-bit mode as well (in which case the result is +// G8RC and inputs are X3/X4). def PPCTlsgdAIX : SDNode<"PPCISD::TLSGD_AIX", SDTIntBinOp>; + +// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle) +// Op that requires a single input of the module handle TOC entry in R3, +// and generates a GET_TLS_MOD_AIX node which will be expanded into a call +// to .__tls_get_mod. This node is used in both 32-bit and 64-bit modes. +// The only difference is the register class. def PPCTlsldAIX : SDNode<"PPCISD::TLSLD_AIX", SDTIntUnaryOp>; + +// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS +// model, produces an ADDIS8 instruction that adds the GOT base +// register to sym\@got\@tlsld\@ha. def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>; + +// %x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS +// model, produces an ADDI8 instruction that adds G8RReg to +// sym\@got\@tlsld\@l and stores the result in X3. Hidden by +// ADDIS_TLSLD_L_ADDR until after register assignment. def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>; + +// %x3 = GET_TLSLD_ADDR %x3, Symbol - For the local-dynamic TLS +// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by +// ADDIS_TLSLD_L_ADDR until after register assignment. def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>; + +// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that +// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion +// following register assignment. def PPCaddiTlsldLAddr : SDNode<"PPCISD::ADDI_TLSLD_L_ADDR", SDTypeProfile<1, 3, [ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>; + +// G8RC = ADDIS_DTPREL_HA %x3, Symbol - For the local-dynamic TLS +// model, produces an ADDIS8 instruction that adds X3 to +// sym\@dtprel\@ha. def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp>; + +// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS +// model, produces an ADDI8 instruction that adds G8RReg to +// sym\@got\@dtprel\@l. def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; + +// G8RC = PADDI_DTPREL %x3, Symbol - For the pc-rel based local-dynamic TLS +// model, produces a PADDI8 instruction that adds X3 to sym\@dtprel. def PPCpaddiDtprel : SDNode<"PPCISD::PADDI_DTPREL", SDTIntBinOp>; +// VPERM - The PPC VPERM Instruction. def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; + +// XXSPLT - The PPC VSX splat instructions def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>; + +// XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for +// converting immediate single precision numbers to double precision +// vector or scalar. def PPCxxspltidp : SDNode<"PPCISD::XXSPLTI_SP_TO_DP", SDT_PPCSpToDp, []>; + +// VECINSERT - The PPC vector insert instruction def PPCvecinsert : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsert, []>; + +// XXPERMDI - The PPC XXPERMDI instruction def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>; + +// VECSHL - The PPC vector shift left instruction def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>; +// The CMPB instruction (takes two operands of i32 or i64). def PPCcmpb : SDNode<"PPCISD::CMPB", SDTIntBinOp, []>; // These nodes represent the 32-bit PPC shifts that operate on 6-bit shift // amounts. These nodes are generated by the multi-precision shift code. +// +// For scalar types, only the last `n + 1` bits of the shift amounts +// are used, where n is log2(sizeof(element) * 8). See sld/slw, etc. +// for exact behaviors. +// +// For vector types, only the last n bits are used. See vsld. def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>; def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>; def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>; +// FNMSUB - Negated multiply-subtract instruction. def PPCfnmsub : SDNode<"PPCISD::FNMSUB" , SDTFPTernaryOp>; +// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign +// word and shift left immediate. def PPCextswsli : SDNode<"PPCISD::EXTSWSLI" , SDT_PPCextswsli>; -def PPCstrict_fctidz : SDNode<"PPCISD::STRICT_FCTIDZ", - SDTFPUnaryOp, [SDNPHasChain]>; -def PPCstrict_fctiwz : SDNode<"PPCISD::STRICT_FCTIWZ", - SDTFPUnaryOp, [SDNPHasChain]>; -def PPCstrict_fctiduz : SDNode<"PPCISD::STRICT_FCTIDUZ", - SDTFPUnaryOp, [SDNPHasChain]>; -def PPCstrict_fctiwuz : SDNode<"PPCISD::STRICT_FCTIWUZ", +// Constrained conversion from floating point to int +let IsStrictFP = true in { + def PPCstrict_fctidz : SDNode<"PPCISD::STRICT_FCTIDZ", SDTFPUnaryOp, [SDNPHasChain]>; + def PPCstrict_fctiwz : SDNode<"PPCISD::STRICT_FCTIWZ", + SDTFPUnaryOp, [SDNPHasChain]>; + def PPCstrict_fctiduz : SDNode<"PPCISD::STRICT_FCTIDUZ", + SDTFPUnaryOp, [SDNPHasChain]>; + def PPCstrict_fctiwuz : SDNode<"PPCISD::STRICT_FCTIWUZ", + SDTFPUnaryOp, [SDNPHasChain]>; +} def PPCany_fctidz : PatFrags<(ops node:$op), [(PPCstrict_fctidz node:$op), @@ -294,19 +465,24 @@ def PPCany_fctiwuz : PatFrags<(ops node:$op), [(PPCstrict_fctiwuz node:$op), (PPCfctiwuz node:$op)]>; -// Move 2 i64 values into a VSX register +// Direct move of 2 consecutive GPR to a VSX register. def PPCbuild_fp128: SDNode<"PPCISD::BUILD_FP128", SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisSameSizeAs<1,2>, SDTCisSameAs<1,2>]>, []>; +// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and +// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is +// unsupported for this target. +// Merge 2 GPRs to a single SPE register. def PPCbuild_spe64: SDNode<"PPCISD::BUILD_SPE64", SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1,i32>, SDTCisVT<1,i32>]>, []>; +// Extract SPE register component, second argument is high or low. def PPCextract_spe : SDNode<"PPCISD::EXTRACT_SPE", SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, f64>, @@ -320,6 +496,11 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeqEnd, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>; + +// CALL - A direct function call. +// CALL_NOP is a call with the special NOP which follows 64-bit +// CALL_NOTOC the caller does not use the TOC. +// SVR4 calls and 32-bit/64-bit AIX calls. def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; @@ -329,17 +510,28 @@ def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall, def PPCcall_notoc : SDNode<"PPCISD::CALL_NOTOC", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; + +// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a +// MTCTR instruction. def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + +// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a +// BCTRL instruction. def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; + +// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl +// instruction and the TOC reload required on 64-bit ELF, 32-bit AIX +// and 64-bit AIX. def PPCbctrl_load_toc : SDNode<"PPCISD::BCTRL_LOAD_TOC", SDTypeProfile<0, 1, []>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; -// Call nodes for strictfp calls (that define RM). +// The variants that implicitly define rounding mode for calls with +// strictfp semantics. def PPCcall_rm : SDNode<"PPCISD::CALL_RM", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; @@ -357,42 +549,81 @@ def PPCbctrl_load_toc_rm : SDNode<"PPCISD::BCTRL_LOAD_TOC_RM", [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +// Return with a glue operand, matched by 'blr' def PPCretglue : SDNode<"PPCISD::RET_GLUE", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +// TC_RETURN - A tail call return. +// operand #0 chain +// operand #1 callee (register or absolute) +// operand #2 stack adjustment +// operand #3 optional in flag def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +// EH_SJLJ_SETJMP - SjLj exception handling setjmp. def PPCeh_sjlj_setjmp : SDNode<"PPCISD::EH_SJLJ_SETJMP", SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>, [SDNPHasChain, SDNPSideEffect]>; + +// EH_SJLJ_LONGJMP - SjLj exception handling longjmp. def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPSideEffect]>; +// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* +// instructions. For lack of better number, we use the opcode number +// encoding for the OPC field to identify the compare. For example, 838 +// is VCMPGTSH. def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>; + +// RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the +// altivec VCMP*_rec instructions. For lack of better number, we use the +// opcode number encoding for the OPC field to identify the compare. For +// example, 838 is VCMPGTSH. def PPCvcmp_rec : SDNode<"PPCISD::VCMP_rec", SDT_PPCvcmp, [SDNPOutGlue]>; +// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This +// corresponds to the COND_BRANCH pseudo instruction. CRRC is the +// condition register to branch on, OPC is the branch opcode to use (e.g. +// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is +// an optional input flag argument. def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr, [SDNPHasChain, SDNPOptInGlue]>; -// PPC-specific atomic operations. +// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes +// except they ensure that the compare input is zero-extended for +// sub-word versions because the atomic loads zero-extend. def PPCatomicCmpSwap_8 : SDNode<"PPCISD::ATOMIC_CMP_SWAP_8", SDTAtomic3, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def PPCatomicCmpSwap_16 : SDNode<"PPCISD::ATOMIC_CMP_SWAP_16", SDTAtomic3, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; + +// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a +// byte-swapping load instruction. It loads "Type" bits, byte swaps it, +// then puts it in the bottom bits of the GPRC. TYPE can be either i16 +// or i32. def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a +// byte-swapping store instruction. It byte-swaps the low "Type" bits of +// the GPRC input, then stores it through Ptr. Type can be either i16 or +// i32. def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +// CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr +// The store conditional instruction ST[BHWD]ARX that produces a glue +// result to attach it to a conditional branch. def PPCStoreCond : SDNode<"PPCISD::STORE_COND", SDT_StoreCond, [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPOutGlue]>; -// Instructions to set/unset CR bit 6 for SVR4 vararg calls +// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone, @@ -401,17 +632,44 @@ def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone, // Instructions to support dynamic alloca. def SDTDynOp : SDTypeProfile<1, 2, []>; def SDTDynAreaOp : SDTypeProfile<1, 1, []>; + +// The following two target-specific nodes are used for calls through +// function pointers in the 64-bit SVR4 ABI. + +// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX) +// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to +// compute an allocation on the stack. def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>; + +// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to +// compute an offset from native SP to the address of the most recent +// dynamic alloca. def PPCdynareaoffset : SDNode<"PPCISD::DYNAREAOFFSET", SDTDynAreaOp, [SDNPHasChain]>; + +// To avoid stack clash, allocation is performed by block and each block is +// probed. def PPCprobedalloca : SDNode<"PPCISD::PROBED_ALLOCA", SDTDynOp, [SDNPHasChain]>; // PC Relative Specific Nodes + +// MAT_PCREL_ADDR = Materialize a PC Relative address. This can be done +// either through an add like PADDI or through a PC Relative load like +// PLD. def PPCmatpcreladdr : SDNode<"PPCISD::MAT_PCREL_ADDR", SDTIntUnaryOp, []>; + +// TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for +// TLS global address when using dynamic access models. This can be done +// through an add like PADDI. def PPCtlsdynamatpcreladdr : SDNode<"PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR", SDTIntUnaryOp, []>; + +// TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address +// when using local exec access models, and when prefixed instructions are +// available. This is used with ADD_TLS to produce an add like PADDI. def PPCtlslocalexecmataddr : SDNode<"PPCISD::TLS_LOCAL_EXEC_MAT_ADDR", SDTIntUnaryOp, []>; +// These nodes represent PPC arithmetic operations with carry. def PPCaddc : SDNode<"PPCISD::ADDC", SDT_PPCBinaryArithWithFlagsOut, [SDNPCommutative]>; def PPCadde : SDNode<"PPCISD::ADDE", SDT_PPCBinaryArithWithFlagsInOut, @@ -2535,6 +2793,7 @@ defm FCPSGND : XForm_28r<63, 8, (outs f8rc:$RST), (ins f8rc:$RA, f8rc:$RB), // Reciprocal estimates. let mayRaiseFPException = 1 in { +// Reciprocal estimate instructions (unary FP ops). defm FRE : XForm_26r<63, 24, (outs f8rc:$RST), (ins f8rc:$RB), "fre", "$RST, $RB", IIC_FPGeneral, [(set f64:$RST, (PPCfre f64:$RB))]>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td index 2d8c633b9fef6..bd9a999237c09 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrP10.td +++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td @@ -83,15 +83,31 @@ def SDT_PPCsetbc : SDTypeProfile<1, 1, [ // ISA 3.1 specific PPCISD nodes. // +// XXSPLTI32DX - The PPC XXSPLTI32DX instruction. def PPCxxsplti32dx : SDNode<"PPCISD::XXSPLTI32DX", SDT_PPCSplat32, []>; + +// ACC_BUILD = Build an accumulator register from 4 VSX registers. def PPCAccBuild : SDNode<"PPCISD::ACC_BUILD", SDT_PPCAccBuild, []>; + +// PAIR_BUILD = Build a vector pair register from 2 VSX registers. def PPCPairBuild : SDNode<"PPCISD::PAIR_BUILD", SDT_PPCPairBuild, []>; + +// EXTRACT_VSX_REG = Extract one of the underlying vsx registers of +// an accumulator or pair register. This node is needed because +// EXTRACT_SUBVECTOR expects the input and output vectors to have the same +// element type. def PPCAccExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCAccExtractVsx, []>; def PPCPairExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCPairExtractVsx, []>; + +// XXMFACC = This corresponds to the xxmfacc instruction. def PPCxxmfacc : SDNode<"PPCISD::XXMFACC", SDT_PPCxxmfacc, []>; + +// SETBC - The ISA 3.1 (P10) SETBC instruction. def PPCsetbc : SDNode<"PPCISD::SETBC", SDT_PPCsetbc, []>; + +// SETBCR - The ISA 3.1 (P10) SETBCR instruction. def PPCsetbcr : SDNode<"PPCISD::SETBCR", SDT_PPCsetbc, []>; //===----------------------------------------------------------------------===// @@ -105,7 +121,10 @@ def SDT_PPCLXVRZX : SDTypeProfile<1, 2, [ SDTCisVT<0, v1i128>, SDTCisPtrTy<1>, SDTCisPtrTy<2> ]>; -// PPC Specific DAG Nodes. +// LXVRZX - Load VSX Vector Rightmost and Zero Extend +// This node represents v1i128 BUILD_VECTOR of a zero extending load +// instruction from to i128. +// Allows utilization of the Load VSX Vector Rightmost Instructions. def PPClxvrzx : SDNode<"PPCISD::LXVRZX", SDT_PPCLXVRZX, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 885bed670e319..d72201df5b002 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -87,31 +87,91 @@ def SDT_PPCxxperm : SDTypeProfile<1, 3, [ SDTCisVT<0, v2f64>, SDTCisVT<1, v2f64>, SDTCisVT<2, v2f64>, SDTCisVT<3, v4i32>]>; //--------------------------- Custom PPC nodes -------------------------------// + +// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian. +// Maps directly to an lxvd2x instruction that will be followed by +// an xxswapd. def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian. +// Maps directly to an stxvd2x instruction that will be preceded by +// an xxswapd. def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian. +// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on +// the vector type to load vector in big-endian element order. def PPCld_vec_be : SDNode<"PPCISD::LOAD_VEC_BE", SDT_PPCld_vec_be, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +// CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian. +// Maps directly to one of stxvd2x/stxvw4x/stxvh8x/stxvb16x depending on +// the vector type to store vector in big-endian element order. def PPCst_vec_be : SDNode<"PPCISD::STORE_VEC_BE", SDT_PPCst_vec_be, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little +// endian. Maps to an xxswapd instruction that corrects an lxvd2x +// or stxvd2x instruction. The chain is necessary because the +// sequence replaces a load and needs to provide the same number +// of outputs. def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>; + +// Direct move from a VSX register to a GPR def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>; + +// Direct move from a GPR to a VSX register (algebraic) def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>; + +// Direct move from a GPR to a VSX register (zero) def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>; + +// Extract a subvector from signed integer vector and convert to FP. +// It is primarily used to convert a (widened) illegal integer vector +// type to a legal floating point vector type. +// For example v2i32 -> widened to v4i32 -> v2f64 def PPCsvec2fp : SDNode<"PPCISD::SINT_VEC_TO_FP", SDTVecConv, []>; + +// Extract a subvector from unsigned integer vector and convert to FP. +// As with SINT_VEC_TO_FP, used for converting illegal types. def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>; + +// An SDNode for swaps that are not associated with any loads/stores +// and thereby have no chain. def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>; +// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or +// lower (IDX=1) half of v4f32 to v2f64. def PPCfpexth : SDNode<"PPCISD::FP_EXTEND_HALF", SDT_PPCfpexth, []>; + +// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a +// v2f32 value into the lower half of a VSR register. def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +// VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory +// instructions such as LXVDSX, LXVWSX. def PPCldsplat : SDNode<"PPCISD::LD_SPLAT", SDT_PPCldsplat, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +// VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory +// that zero-extends. def PPCzextldsplat : SDNode<"PPCISD::ZEXT_LD_SPLAT", SDT_PPCldsplat, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +// VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory +// that sign-extends. def PPCsextldsplat : SDNode<"PPCISD::SEXT_LD_SPLAT", SDT_PPCldsplat, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +// PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to +// place the value into the least significant element of the most +// significant doubleword in the vector. This is not element zero for +// anything smaller than a doubleword on either endianness. This node has +// the same semantics as SCALAR_TO_VECTOR except that the value remains in +// the aforementioned location in the vector register. def PPCSToV : SDNode<"PPCISD::SCALAR_TO_VECTOR_PERMUTED", SDTypeProfile<1, 1, []>, []>; diff --git a/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp index 93a4693c50168..80aa1122167df 100644 --- a/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp @@ -7,20 +7,72 @@ //===----------------------------------------------------------------------===// #include "PPCSelectionDAGInfo.h" -#include "PPCISelLowering.h" +#include "llvm/CodeGen/SelectionDAG.h" + +#define GET_SDNODE_DESC +#include "PPCGenSDNodeInfo.inc" using namespace llvm; +PPCSelectionDAGInfo::PPCSelectionDAGInfo() + : SelectionDAGGenTargetInfo(PPCGenSDNodeInfo) {} + PPCSelectionDAGInfo::~PPCSelectionDAGInfo() = default; -bool PPCSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const { - return Opcode >= PPCISD::FIRST_MEMORY_OPCODE && - Opcode <= PPCISD::LAST_MEMORY_OPCODE; +const char *PPCSelectionDAGInfo::getTargetNodeName(unsigned Opcode) const { + switch (static_cast(Opcode)) { + case PPCISD::GlobalBaseReg: + return "PPCISD::GlobalBaseReg"; + case PPCISD::SRA_ADDZE: + return "PPCISD::SRA_ADDZE"; + case PPCISD::READ_TIME_BASE: + return "PPCISD::READ_TIME_BASE"; + case PPCISD::MFOCRF: + return "PPCISD::MFOCRF"; + case PPCISD::ANDI_rec_1_EQ_BIT: + return "PPCISD::ANDI_rec_1_EQ_BIT"; + case PPCISD::ANDI_rec_1_GT_BIT: + return "PPCISD::ANDI_rec_1_GT_BIT"; + case PPCISD::BDNZ: + return "PPCISD::BDNZ"; + case PPCISD::BDZ: + return "PPCISD::BDZ"; + case PPCISD::PPC32_PICGOT: + return "PPCISD::PPC32_PICGOT"; + case PPCISD::VADD_SPLAT: + return "PPCISD::VADD_SPLAT"; + } + + return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode); } -bool PPCSelectionDAGInfo::isTargetStrictFPOpcode(unsigned Opcode) const { - return Opcode >= PPCISD::FIRST_STRICTFP_OPCODE && - Opcode <= PPCISD::LAST_STRICTFP_OPCODE; +void PPCSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const { + switch (N->getOpcode()) { + default: + break; + case PPCISD::DYNAREAOFFSET: + // invalid number of results; expected 2, got 1 + case PPCISD::TOC_ENTRY: + // invalid number of results; expected 1, got 2 + case PPCISD::STORE_COND: + // invalid number of results; expected 2, got 3 + case PPCISD::LD_SPLAT: + case PPCISD::SEXT_LD_SPLAT: + case PPCISD::ZEXT_LD_SPLAT: + // invalid number of operands; expected 2, got 3 + case PPCISD::ST_VSR_SCAL_INT: + // invalid number of operands; expected 4, got 5 + case PPCISD::XXPERM: + // operand #1 must have type v2f64, but has type v16i8 + case PPCISD::ACC_BUILD: + // operand #3 must have type v4i32, but has type v16i8 + case PPCISD::PAIR_BUILD: + // operand #1 must have type v4i32, but has type v16i8 + return; + } + + SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N); } std::pair PPCSelectionDAGInfo::EmitTargetCodeForMemcmp( diff --git a/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h index f962a7a5321aa..ffe8982ce1af4 100644 --- a/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h +++ b/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h @@ -11,15 +11,66 @@ #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#define GET_SDNODE_ENUM +#include "PPCGenSDNodeInfo.inc" + namespace llvm { +namespace PPCISD { + +enum NodeType : unsigned { + /// The result of the mflr at function entry, used for PIC code. + GlobalBaseReg = GENERATED_OPCODE_END, + + /// The combination of sra[wd]i and addze used to implemented signed + /// integer division by a power of 2. The first operand is the dividend, + /// and the second is the constant shift amount (representing the + /// divisor). + SRA_ADDZE, + + /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction. + /// This copies the bits corresponding to the specified CRREG into the + /// resultant GPR. Bits corresponding to other CR regs are undefined. + MFOCRF, + + // FIXME: Remove these once the ANDI glue bug is fixed: + /// i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the + /// eq or gt bit of CR0 after executing andi. x, 1. This is used to + /// implement truncation of i32 or i64 to i1. + ANDI_rec_1_EQ_BIT, + ANDI_rec_1_GT_BIT, + + // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit + // target (returns (Lo, Hi)). It takes a chain operand. + READ_TIME_BASE, -class PPCSelectionDAGInfo : public SelectionDAGTargetInfo { + /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based + /// loops. + BDNZ, + BDZ, + + /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and + /// local dynamic TLS and position indendepent code on PPC32. + PPC32_PICGOT, + + /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded + /// during instruction selection to optimize a BUILD_VECTOR into + /// operations on splats. This is necessary to avoid losing these + /// optimizations due to constant folding. + VADD_SPLAT, +}; + +} // namespace PPCISD + +class PPCSelectionDAGInfo : public SelectionDAGGenTargetInfo { public: + PPCSelectionDAGInfo(); + ~PPCSelectionDAGInfo() override; - bool isTargetMemoryOpcode(unsigned Opcode) const override; + const char *getTargetNodeName(unsigned Opcode) const override; - bool isTargetStrictFPOpcode(unsigned Opcode) const override; + void verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const override; std::pair EmitTargetCodeForMemcmp(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index f313d3f1347d4..fb298ee35d6c2 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -16798,9 +16798,7 @@ static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG, // because X is exact (Y >> M + 2). uint64_t ShAmt = Log2_64(MulAmtLowBit) + 2; using namespace SDPatternMatch; - return sd_match(X, m_AnyOf(m_Sra(m_Value(), m_SpecificInt(ShAmt)), - m_Srl(m_Value(), m_SpecificInt(ShAmt)))) && - X->getFlags().hasExact(); + return sd_match(X, m_ExactSr(m_Value(), m_SpecificInt(ShAmt))); }; if (isPowerOf2_64(MulAmt - MulAmtLowBit) && !(CanSub && PreferSub())) { Op = ISD::ADD; @@ -16825,10 +16823,13 @@ static SDValue getShlAddShlAdd(SDNode *N, SelectionDAG &DAG, unsigned ShX, SDLoc DL(N); EVT VT = N->getValueType(0); SDValue X = N->getOperand(0); - // Put the shift first if we can fold a zext into the shift forming a slli.uw. + // Put the shift first if we can fold: + // a. a zext into the shift forming a slli.uw + // b. an exact shift right forming one shorter shift or no shift at all using namespace SDPatternMatch; if (Shift != 0 && - sd_match(X, m_And(m_Value(), m_SpecificInt(UINT64_C(0xffffffff))))) { + sd_match(X, m_AnyOf(m_And(m_Value(), m_SpecificInt(UINT64_C(0xffffffff))), + m_ExactSr(m_Value(), m_ConstInt())))) { X = DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(Shift, DL, VT)); Shift = 0; } diff --git a/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp index d17528dd882bf..751ae0fe34d33 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp @@ -17,7 +17,8 @@ #include "SPIRV.h" #include "SPIRVSubtarget.h" #include "SPIRVUtils.h" -#include "llvm/IR/Attributes.h" +#include "llvm/IR/IntrinsicsSPIRV.h" +#include "llvm/Support/Debug.h" #include #define DEBUG_TYPE "spirv-postlegalizer" @@ -43,79 +44,314 @@ extern void processInstr(MachineInstr &MI, MachineIRBuilder &MIB, SPIRVType *KnownResType); } // namespace llvm -static bool mayBeInserted(unsigned Opcode) { - switch (Opcode) { - case TargetOpcode::G_SMAX: - case TargetOpcode::G_UMAX: - case TargetOpcode::G_SMIN: - case TargetOpcode::G_UMIN: - case TargetOpcode::G_FMINNUM: - case TargetOpcode::G_FMINIMUM: - case TargetOpcode::G_FMAXNUM: - case TargetOpcode::G_FMAXIMUM: - return true; +static SPIRVType *deduceIntTypeFromResult(Register ResVReg, + MachineIRBuilder &MIB, + SPIRVGlobalRegistry *GR) { + const LLT &Ty = MIB.getMRI()->getType(ResVReg); + return GR->getOrCreateSPIRVIntegerType(Ty.getScalarSizeInBits(), MIB); +} + +static bool deduceAndAssignTypeForGUnmerge(MachineInstr *I, MachineFunction &MF, + SPIRVGlobalRegistry *GR) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + Register SrcReg = I->getOperand(I->getNumOperands() - 1).getReg(); + SPIRVType *ScalarType = nullptr; + if (SPIRVType *DefType = GR->getSPIRVTypeForVReg(SrcReg)) { + assert(DefType->getOpcode() == SPIRV::OpTypeVector); + ScalarType = GR->getSPIRVTypeForVReg(DefType->getOperand(1).getReg()); + } + + if (!ScalarType) { + // If we could not deduce the type from the source, try to deduce it from + // the uses of the results. + for (unsigned i = 0; i < I->getNumDefs() && !ScalarType; ++i) { + for (const auto &Use : + MRI.use_nodbg_instructions(I->getOperand(i).getReg())) { + assert(Use.getOpcode() == TargetOpcode::G_BUILD_VECTOR && + "Expected use of G_UNMERGE_VALUES to be a G_BUILD_VECTOR"); + if (auto *VecType = + GR->getSPIRVTypeForVReg(Use.getOperand(0).getReg())) { + ScalarType = GR->getScalarOrVectorComponentType(VecType); + break; + } + } + } + } + + if (!ScalarType) + return false; + + for (unsigned i = 0; i < I->getNumDefs(); ++i) { + Register DefReg = I->getOperand(i).getReg(); + if (GR->getSPIRVTypeForVReg(DefReg)) + continue; + + LLT DefLLT = MRI.getType(DefReg); + SPIRVType *ResType = + DefLLT.isVector() + ? GR->getOrCreateSPIRVVectorType( + ScalarType, DefLLT.getNumElements(), *I, + *MF.getSubtarget().getInstrInfo()) + : ScalarType; + setRegClassType(DefReg, ResType, GR, &MRI, MF); + } + return true; +} + +static SPIRVType *deduceTypeFromSingleOperand(MachineInstr *I, + MachineIRBuilder &MIB, + SPIRVGlobalRegistry *GR, + unsigned OpIdx) { + Register OpReg = I->getOperand(OpIdx).getReg(); + if (SPIRVType *OpType = GR->getSPIRVTypeForVReg(OpReg)) { + if (SPIRVType *CompType = GR->getScalarOrVectorComponentType(OpType)) { + Register ResVReg = I->getOperand(0).getReg(); + const LLT &ResLLT = MIB.getMRI()->getType(ResVReg); + if (ResLLT.isVector()) + return GR->getOrCreateSPIRVVectorType(CompType, ResLLT.getNumElements(), + MIB, false); + return CompType; + } + } + return nullptr; +} + +static SPIRVType *deduceTypeFromOperandRange(MachineInstr *I, + MachineIRBuilder &MIB, + SPIRVGlobalRegistry *GR, + unsigned StartOp, unsigned EndOp) { + SPIRVType *ResType = nullptr; + for (unsigned i = StartOp; i < EndOp; ++i) { + if (SPIRVType *Type = deduceTypeFromSingleOperand(I, MIB, GR, i)) { +#ifdef EXPENSIVE_CHECKS + assert(!ResType || Type == ResType && "Conflicting type from operands."); + ResType = Type; +#else + return Type; +#endif + } + } + return ResType; +} + +static SPIRVType *deduceTypeForResultRegister(MachineInstr *Use, + Register UseRegister, + SPIRVGlobalRegistry *GR, + MachineIRBuilder &MIB) { + for (const MachineOperand &MO : Use->defs()) { + if (!MO.isReg()) + continue; + if (SPIRVType *OpType = GR->getSPIRVTypeForVReg(MO.getReg())) { + if (SPIRVType *CompType = GR->getScalarOrVectorComponentType(OpType)) { + const LLT &ResLLT = MIB.getMRI()->getType(UseRegister); + if (ResLLT.isVector()) + return GR->getOrCreateSPIRVVectorType( + CompType, ResLLT.getNumElements(), MIB, false); + return CompType; + } + } + } + return nullptr; +} + +static SPIRVType *deduceTypeFromUses(Register Reg, MachineFunction &MF, + SPIRVGlobalRegistry *GR, + MachineIRBuilder &MIB) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + for (MachineInstr &Use : MRI.use_nodbg_instructions(Reg)) { + SPIRVType *ResType = nullptr; + switch (Use.getOpcode()) { + case TargetOpcode::G_BUILD_VECTOR: + case TargetOpcode::G_EXTRACT_VECTOR_ELT: + case TargetOpcode::G_UNMERGE_VALUES: + LLVM_DEBUG(dbgs() << "Looking at use " << Use << "\n"); + ResType = deduceTypeForResultRegister(&Use, Reg, GR, MIB); + break; + } + if (ResType) + return ResType; + } + return nullptr; +} + +static SPIRVType *deduceResultTypeFromOperands(MachineInstr *I, + SPIRVGlobalRegistry *GR, + MachineIRBuilder &MIB) { + Register ResVReg = I->getOperand(0).getReg(); + switch (I->getOpcode()) { + case TargetOpcode::G_CONSTANT: + case TargetOpcode::G_ANYEXT: + return deduceIntTypeFromResult(ResVReg, MIB, GR); + case TargetOpcode::G_BUILD_VECTOR: + return deduceTypeFromOperandRange(I, MIB, GR, 1, I->getNumOperands()); + case TargetOpcode::G_SHUFFLE_VECTOR: + return deduceTypeFromOperandRange(I, MIB, GR, 1, 3); default: - return isTypeFoldingSupported(Opcode); + if (I->getNumDefs() == 1 && I->getNumOperands() > 1 && + I->getOperand(1).isReg()) + return deduceTypeFromSingleOperand(I, MIB, GR, 1); + return nullptr; } } -static void processNewInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR, - MachineIRBuilder MIB) { +static bool deduceAndAssignSpirvType(MachineInstr *I, MachineFunction &MF, + SPIRVGlobalRegistry *GR, + MachineIRBuilder &MIB) { + LLVM_DEBUG(dbgs() << "\nProcessing instruction: " << *I); MachineRegisterInfo &MRI = MF.getRegInfo(); + Register ResVReg = I->getOperand(0).getReg(); + + // G_UNMERGE_VALUES is handled separately because it has multiple definitions, + // unlike the other instructions which have a single result register. The main + // deduction logic is designed for the single-definition case. + if (I->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) + return deduceAndAssignTypeForGUnmerge(I, MF, GR); + + LLVM_DEBUG(dbgs() << "Inferring type from operands\n"); + SPIRVType *ResType = deduceResultTypeFromOperands(I, GR, MIB); + if (!ResType) { + LLVM_DEBUG(dbgs() << "Inferring type from uses\n"); + ResType = deduceTypeFromUses(ResVReg, MF, GR, MIB); + } + + if (!ResType) + return false; + + LLVM_DEBUG(dbgs() << "Assigned type to " << *I << ": " << *ResType); + GR->assignSPIRVTypeToVReg(ResType, ResVReg, MF); + if (!MRI.getRegClassOrNull(ResVReg)) { + LLVM_DEBUG(dbgs() << "Updating the register class.\n"); + setRegClassType(ResVReg, ResType, GR, &MRI, *GR->CurMF, true); + } + return true; +} + +static bool requiresSpirvType(MachineInstr &I, SPIRVGlobalRegistry *GR, + MachineRegisterInfo &MRI) { + LLVM_DEBUG(dbgs() << "Checking if instruction requires a SPIR-V type: " + << I;); + if (I.getNumDefs() == 0) { + LLVM_DEBUG(dbgs() << "Instruction does not have a definition.\n"); + return false; + } + + if (!I.isPreISelOpcode()) { + LLVM_DEBUG(dbgs() << "Instruction is not a generic instruction.\n"); + return false; + } + + Register ResultRegister = I.defs().begin()->getReg(); + if (GR->getSPIRVTypeForVReg(ResultRegister)) { + LLVM_DEBUG(dbgs() << "Instruction already has a SPIR-V type.\n"); + if (!MRI.getRegClassOrNull(ResultRegister)) { + LLVM_DEBUG(dbgs() << "Updating the register class.\n"); + setRegClassType(ResultRegister, GR->getSPIRVTypeForVReg(ResultRegister), + GR, &MRI, *GR->CurMF, true); + } + return false; + } + + return true; +} + +static void registerSpirvTypeForNewInstructions(MachineFunction &MF, + SPIRVGlobalRegistry *GR) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + SmallVector Worklist; for (MachineBasicBlock &MBB : MF) { for (MachineInstr &I : MBB) { - const unsigned Opcode = I.getOpcode(); - if (Opcode == TargetOpcode::G_UNMERGE_VALUES) { - unsigned ArgI = I.getNumOperands() - 1; - Register SrcReg = I.getOperand(ArgI).isReg() - ? I.getOperand(ArgI).getReg() - : Register(0); - SPIRVType *DefType = - SrcReg.isValid() ? GR->getSPIRVTypeForVReg(SrcReg) : nullptr; - if (!DefType || DefType->getOpcode() != SPIRV::OpTypeVector) - report_fatal_error( - "cannot select G_UNMERGE_VALUES with a non-vector argument"); - SPIRVType *ScalarType = - GR->getSPIRVTypeForVReg(DefType->getOperand(1).getReg()); - for (unsigned i = 0; i < I.getNumDefs(); ++i) { - Register ResVReg = I.getOperand(i).getReg(); - SPIRVType *ResType = GR->getSPIRVTypeForVReg(ResVReg); - if (!ResType) { - // There was no "assign type" actions, let's fix this now - ResType = ScalarType; - setRegClassType(ResVReg, ResType, GR, &MRI, *GR->CurMF, true); - } - } - } else if (mayBeInserted(Opcode) && I.getNumDefs() == 1 && - I.getNumOperands() > 1 && I.getOperand(1).isReg()) { - // Legalizer may have added a new instructions and introduced new - // registers, we must decorate them as if they were introduced in a - // non-automatic way - Register ResVReg = I.getOperand(0).getReg(); - // Check if the register defined by the instruction is newly generated - // or already processed - // Check if we have type defined for operands of the new instruction - bool IsKnownReg = MRI.getRegClassOrNull(ResVReg); - SPIRVType *ResVType = GR->getSPIRVTypeForVReg( - IsKnownReg ? ResVReg : I.getOperand(1).getReg()); - if (!ResVType) - continue; - // Set type & class - if (!IsKnownReg) - setRegClassType(ResVReg, ResVType, GR, &MRI, *GR->CurMF, true); - // If this is a simple operation that is to be reduced by TableGen - // definition we must apply some of pre-legalizer rules here - if (isTypeFoldingSupported(Opcode)) { - processInstr(I, MIB, MRI, GR, GR->getSPIRVTypeForVReg(ResVReg)); - if (IsKnownReg && MRI.hasOneUse(ResVReg)) { - MachineInstr &UseMI = *MRI.use_instr_begin(ResVReg); - if (UseMI.getOpcode() == SPIRV::ASSIGN_TYPE) - continue; - } - insertAssignInstr(ResVReg, nullptr, ResVType, GR, MIB, MRI); + if (requiresSpirvType(I, GR, MRI)) { + Worklist.push_back(&I); + } + } + } + + if (Worklist.empty()) { + LLVM_DEBUG(dbgs() << "Initial worklist is empty.\n"); + return; + } + + LLVM_DEBUG(dbgs() << "Initial worklist:\n"; + for (auto *I : Worklist) { I->dump(); }); + + bool Changed; + do { + Changed = false; + SmallVector NextWorklist; + + for (MachineInstr *I : Worklist) { + MachineIRBuilder MIB(*I); + if (deduceAndAssignSpirvType(I, MF, GR, MIB)) { + Changed = true; + } else { + NextWorklist.push_back(I); + } + } + Worklist = std::move(NextWorklist); + LLVM_DEBUG(dbgs() << "Worklist size: " << Worklist.size() << "\n"); + } while (Changed); + + if (Worklist.empty()) + return; + + for (auto *I : Worklist) { + MachineIRBuilder MIB(*I); + Register ResVReg = I->getOperand(0).getReg(); + const LLT &ResLLT = MRI.getType(ResVReg); + SPIRVType *ResType = nullptr; + if (ResLLT.isVector()) { + SPIRVType *CompType = GR->getOrCreateSPIRVIntegerType( + ResLLT.getElementType().getSizeInBits(), MIB); + ResType = GR->getOrCreateSPIRVVectorType( + CompType, ResLLT.getNumElements(), MIB, false); + } else { + ResType = GR->getOrCreateSPIRVIntegerType(ResLLT.getSizeInBits(), MIB); + } + LLVM_DEBUG(dbgs() << "Could not determine type for " << *I + << ", defaulting to " << *ResType << "\n"); + setRegClassType(ResVReg, ResType, GR, &MRI, MF, true); + } +} + +static void ensureAssignTypeForTypeFolding(MachineFunction &MF, + SPIRVGlobalRegistry *GR) { + LLVM_DEBUG(dbgs() << "Entering ensureAssignTypeForTypeFolding for function " + << MF.getName() << "\n"); + MachineRegisterInfo &MRI = MF.getRegInfo(); + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (!isTypeFoldingSupported(MI.getOpcode())) + continue; + if (MI.getNumOperands() == 1 || !MI.getOperand(1).isReg()) + continue; + + LLVM_DEBUG(dbgs() << "Processing instruction: " << MI); + + // Check uses of MI to see if it already has an use in SPIRV::ASSIGN_TYPE + bool HasAssignType = false; + Register ResultRegister = MI.defs().begin()->getReg(); + // All uses of Result register + for (MachineInstr &UseInstr : + MRI.use_nodbg_instructions(ResultRegister)) { + if (UseInstr.getOpcode() == SPIRV::ASSIGN_TYPE) { + HasAssignType = true; + LLVM_DEBUG(dbgs() << " Instruction already has an ASSIGN_TYPE use: " + << UseInstr); + break; } } + + if (!HasAssignType) { + Register ResultRegister = MI.defs().begin()->getReg(); + SPIRVType *ResultType = GR->getSPIRVTypeForVReg(ResultRegister); + LLVM_DEBUG( + dbgs() << " Adding ASSIGN_TYPE for ResultRegister: " + << printReg(ResultRegister, MRI.getTargetRegisterInfo()) + << " with type: " << *ResultType); + MachineIRBuilder MIB(MI); + insertAssignInstr(ResultRegister, nullptr, ResultType, GR, MIB, MRI); + } } } } @@ -155,10 +391,8 @@ bool SPIRVPostLegalizer::runOnMachineFunction(MachineFunction &MF) { const SPIRVSubtarget &ST = MF.getSubtarget(); SPIRVGlobalRegistry *GR = ST.getSPIRVGlobalRegistry(); GR->setCurrentFunc(MF); - MachineIRBuilder MIB(MF); - - processNewInstrs(MF, GR, MIB); - + registerSpirvTypeForNewInstructions(MF, GR); + ensureAssignTypeForTypeFolding(MF, GR); return true; } diff --git a/llvm/lib/Target/SystemZ/CMakeLists.txt b/llvm/lib/Target/SystemZ/CMakeLists.txt index 0d8f3eac6ee4f..6d94a755322df 100644 --- a/llvm/lib/Target/SystemZ/CMakeLists.txt +++ b/llvm/lib/Target/SystemZ/CMakeLists.txt @@ -11,6 +11,7 @@ tablegen(LLVM SystemZGenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM SystemZGenInstrInfo.inc -gen-instr-info) tablegen(LLVM SystemZGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM SystemZGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM SystemZGenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM SystemZGenSubtargetInfo.inc -gen-subtarget) add_public_tablegen_target(SystemZCommonTableGen) diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 58109acc92015..dfd76f9b0427f 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -7423,153 +7423,6 @@ SystemZTargetLowering::ReplaceNodeResults(SDNode *N, return LowerOperationWrapper(N, Results, DAG); } -const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { -#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME - switch ((SystemZISD::NodeType)Opcode) { - case SystemZISD::FIRST_NUMBER: break; - OPCODE(RET_GLUE); - OPCODE(CALL); - OPCODE(SIBCALL); - OPCODE(TLS_GDCALL); - OPCODE(TLS_LDCALL); - OPCODE(PCREL_WRAPPER); - OPCODE(PCREL_OFFSET); - OPCODE(ICMP); - OPCODE(FCMP); - OPCODE(STRICT_FCMP); - OPCODE(STRICT_FCMPS); - OPCODE(TM); - OPCODE(BR_CCMASK); - OPCODE(SELECT_CCMASK); - OPCODE(ADJDYNALLOC); - OPCODE(PROBED_ALLOCA); - OPCODE(POPCNT); - OPCODE(SMUL_LOHI); - OPCODE(UMUL_LOHI); - OPCODE(SDIVREM); - OPCODE(UDIVREM); - OPCODE(SADDO); - OPCODE(SSUBO); - OPCODE(UADDO); - OPCODE(USUBO); - OPCODE(ADDCARRY); - OPCODE(SUBCARRY); - OPCODE(GET_CCMASK); - OPCODE(MVC); - OPCODE(NC); - OPCODE(OC); - OPCODE(XC); - OPCODE(CLC); - OPCODE(MEMSET_MVC); - OPCODE(STPCPY); - OPCODE(STRCMP); - OPCODE(SEARCH_STRING); - OPCODE(IPM); - OPCODE(TBEGIN); - OPCODE(TBEGIN_NOFLOAT); - OPCODE(TEND); - OPCODE(BYTE_MASK); - OPCODE(ROTATE_MASK); - OPCODE(REPLICATE); - OPCODE(JOIN_DWORDS); - OPCODE(SPLAT); - OPCODE(MERGE_HIGH); - OPCODE(MERGE_LOW); - OPCODE(SHL_DOUBLE); - OPCODE(PERMUTE_DWORDS); - OPCODE(PERMUTE); - OPCODE(PACK); - OPCODE(PACKS_CC); - OPCODE(PACKLS_CC); - OPCODE(UNPACK_HIGH); - OPCODE(UNPACKL_HIGH); - OPCODE(UNPACK_LOW); - OPCODE(UNPACKL_LOW); - OPCODE(VSHL_BY_SCALAR); - OPCODE(VSRL_BY_SCALAR); - OPCODE(VSRA_BY_SCALAR); - OPCODE(VROTL_BY_SCALAR); - OPCODE(SHL_DOUBLE_BIT); - OPCODE(SHR_DOUBLE_BIT); - OPCODE(VSUM); - OPCODE(VACC); - OPCODE(VSCBI); - OPCODE(VAC); - OPCODE(VSBI); - OPCODE(VACCC); - OPCODE(VSBCBI); - OPCODE(VMAH); - OPCODE(VMALH); - OPCODE(VME); - OPCODE(VMLE); - OPCODE(VMO); - OPCODE(VMLO); - OPCODE(VICMPE); - OPCODE(VICMPH); - OPCODE(VICMPHL); - OPCODE(VICMPES); - OPCODE(VICMPHS); - OPCODE(VICMPHLS); - OPCODE(VFCMPE); - OPCODE(STRICT_VFCMPE); - OPCODE(STRICT_VFCMPES); - OPCODE(VFCMPH); - OPCODE(STRICT_VFCMPH); - OPCODE(STRICT_VFCMPHS); - OPCODE(VFCMPHE); - OPCODE(STRICT_VFCMPHE); - OPCODE(STRICT_VFCMPHES); - OPCODE(VFCMPES); - OPCODE(VFCMPHS); - OPCODE(VFCMPHES); - OPCODE(VFTCI); - OPCODE(VEXTEND); - OPCODE(STRICT_VEXTEND); - OPCODE(VROUND); - OPCODE(STRICT_VROUND); - OPCODE(VTM); - OPCODE(SCMP128HI); - OPCODE(UCMP128HI); - OPCODE(VFAE_CC); - OPCODE(VFAEZ_CC); - OPCODE(VFEE_CC); - OPCODE(VFEEZ_CC); - OPCODE(VFENE_CC); - OPCODE(VFENEZ_CC); - OPCODE(VISTR_CC); - OPCODE(VSTRC_CC); - OPCODE(VSTRCZ_CC); - OPCODE(VSTRS_CC); - OPCODE(VSTRSZ_CC); - OPCODE(TDC); - OPCODE(ATOMIC_SWAPW); - OPCODE(ATOMIC_LOADW_ADD); - OPCODE(ATOMIC_LOADW_SUB); - OPCODE(ATOMIC_LOADW_AND); - OPCODE(ATOMIC_LOADW_OR); - OPCODE(ATOMIC_LOADW_XOR); - OPCODE(ATOMIC_LOADW_NAND); - OPCODE(ATOMIC_LOADW_MIN); - OPCODE(ATOMIC_LOADW_MAX); - OPCODE(ATOMIC_LOADW_UMIN); - OPCODE(ATOMIC_LOADW_UMAX); - OPCODE(ATOMIC_CMP_SWAPW); - OPCODE(ATOMIC_CMP_SWAP); - OPCODE(ATOMIC_LOAD_128); - OPCODE(ATOMIC_STORE_128); - OPCODE(ATOMIC_CMP_SWAP_128); - OPCODE(LRV); - OPCODE(STRV); - OPCODE(VLER); - OPCODE(VSTER); - OPCODE(STCKF); - OPCODE(PREFETCH); - OPCODE(ADA_ENTRY); - } - return nullptr; -#undef OPCODE -} - // Return true if VT is a vector whose elements are a whole number of bytes // in width. Also check for presence of vector support. bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const { diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index d5b76031766dd..13a1cd1614a53 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -22,390 +22,6 @@ #include namespace llvm { -namespace SystemZISD { -enum NodeType : unsigned { - FIRST_NUMBER = ISD::BUILTIN_OP_END, - - // Return with a glue operand. Operand 0 is the chain operand. - RET_GLUE, - - // Calls a function. Operand 0 is the chain operand and operand 1 - // is the target address. The arguments start at operand 2. - // There is an optional glue operand at the end. - CALL, - SIBCALL, - - // TLS calls. Like regular calls, except operand 1 is the TLS symbol. - // (The call target is implicitly __tls_get_offset.) - TLS_GDCALL, - TLS_LDCALL, - - // Wraps a TargetGlobalAddress that should be loaded using PC-relative - // accesses (LARL). Operand 0 is the address. - PCREL_WRAPPER, - - // Used in cases where an offset is applied to a TargetGlobalAddress. - // Operand 0 is the full TargetGlobalAddress and operand 1 is a - // PCREL_WRAPPER for an anchor point. This is used so that we can - // cheaply refer to either the full address or the anchor point - // as a register base. - PCREL_OFFSET, - - // Integer comparisons. There are three operands: the two values - // to compare, and an integer of type SystemZICMP. - ICMP, - - // Floating-point comparisons. The two operands are the values to compare. - FCMP, - - // Test under mask. The first operand is ANDed with the second operand - // and the condition codes are set on the result. The third operand is - // a boolean that is true if the condition codes need to distinguish - // between CCMASK_TM_MIXED_MSB_0 and CCMASK_TM_MIXED_MSB_1 (which the - // register forms do but the memory forms don't). - TM, - - // Branches if a condition is true. Operand 0 is the chain operand; - // operand 1 is the 4-bit condition-code mask, with bit N in - // big-endian order meaning "branch if CC=N"; operand 2 is the - // target block and operand 3 is the flag operand. - BR_CCMASK, - - // Selects between operand 0 and operand 1. Operand 2 is the - // mask of condition-code values for which operand 0 should be - // chosen over operand 1; it has the same form as BR_CCMASK. - // Operand 3 is the flag operand. - SELECT_CCMASK, - - // Evaluates to the gap between the stack pointer and the - // base of the dynamically-allocatable area. - ADJDYNALLOC, - - // For allocating stack space when using stack clash protector. - // Allocation is performed by block, and each block is probed. - PROBED_ALLOCA, - - // Count number of bits set in operand 0 per byte. - POPCNT, - - // Wrappers around the ISD opcodes of the same name. The output is GR128. - // Input operands may be GR64 or GR32, depending on the instruction. - SMUL_LOHI, - UMUL_LOHI, - SDIVREM, - UDIVREM, - - // Add/subtract with overflow/carry. These have the same operands as - // the corresponding standard operations, except with the carry flag - // replaced by a condition code value. - SADDO, SSUBO, UADDO, USUBO, ADDCARRY, SUBCARRY, - - // Set the condition code from a boolean value in operand 0. - // Operand 1 is a mask of all condition-code values that may result of this - // operation, operand 2 is a mask of condition-code values that may result - // if the boolean is true. - // Note that this operation is always optimized away, we will never - // generate any code for it. - GET_CCMASK, - - // Use a series of MVCs to copy bytes from one memory location to another. - // The operands are: - // - the target address - // - the source address - // - the constant length - // - // This isn't a memory opcode because we'd need to attach two - // MachineMemOperands rather than one. - MVC, - - // Similar to MVC, but for logic operations (AND, OR, XOR). - NC, - OC, - XC, - - // Use CLC to compare two blocks of memory, with the same comments - // as for MVC. - CLC, - - // Use MVC to set a block of memory after storing the first byte. - MEMSET_MVC, - - // Use an MVST-based sequence to implement stpcpy(). - STPCPY, - - // Use a CLST-based sequence to implement strcmp(). The two input operands - // are the addresses of the strings to compare. - STRCMP, - - // Use an SRST-based sequence to search a block of memory. The first - // operand is the end address, the second is the start, and the third - // is the character to search for. CC is set to 1 on success and 2 - // on failure. - SEARCH_STRING, - - // Store the CC value in bits 29 and 28 of an integer. - IPM, - - // Transaction begin. The first operand is the chain, the second - // the TDB pointer, and the third the immediate control field. - // Returns CC value and chain. - TBEGIN, - TBEGIN_NOFLOAT, - - // Transaction end. Just the chain operand. Returns CC value and chain. - TEND, - - // Create a vector constant by filling byte N of the result with bit - // 15-N of the single operand. - BYTE_MASK, - - // Create a vector constant by replicating an element-sized RISBG-style mask. - // The first operand specifies the starting set bit and the second operand - // specifies the ending set bit. Both operands count from the MSB of the - // element. - ROTATE_MASK, - - // Replicate a GPR scalar value into all elements of a vector. - REPLICATE, - - // Create a vector from two i64 GPRs. - JOIN_DWORDS, - - // Replicate one element of a vector into all elements. The first operand - // is the vector and the second is the index of the element to replicate. - SPLAT, - - // Interleave elements from the high half of operand 0 and the high half - // of operand 1. - MERGE_HIGH, - - // Likewise for the low halves. - MERGE_LOW, - - // Concatenate the vectors in the first two operands, shift them left - // by the third operand, and take the first half of the result. - SHL_DOUBLE, - - // Take one element of the first v2i64 operand and the one element of - // the second v2i64 operand and concatenate them to form a v2i64 result. - // The third operand is a 4-bit value of the form 0A0B, where A and B - // are the element selectors for the first operand and second operands - // respectively. - PERMUTE_DWORDS, - - // Perform a general vector permute on vector operands 0 and 1. - // Each byte of operand 2 controls the corresponding byte of the result, - // in the same way as a byte-level VECTOR_SHUFFLE mask. - PERMUTE, - - // Pack vector operands 0 and 1 into a single vector with half-sized elements. - PACK, - - // Likewise, but saturate the result and set CC. PACKS_CC does signed - // saturation and PACKLS_CC does unsigned saturation. - PACKS_CC, - PACKLS_CC, - - // Unpack the first half of vector operand 0 into double-sized elements. - // UNPACK_HIGH sign-extends and UNPACKL_HIGH zero-extends. - UNPACK_HIGH, - UNPACKL_HIGH, - - // Likewise for the second half. - UNPACK_LOW, - UNPACKL_LOW, - - // Shift/rotate each element of vector operand 0 by the number of bits - // specified by scalar operand 1. - VSHL_BY_SCALAR, - VSRL_BY_SCALAR, - VSRA_BY_SCALAR, - VROTL_BY_SCALAR, - - // Concatenate the vectors in the first two operands, shift them left/right - // bitwise by the third operand, and take the first/last half of the result. - SHL_DOUBLE_BIT, - SHR_DOUBLE_BIT, - - // For each element of the output type, sum across all sub-elements of - // operand 0 belonging to the corresponding element, and add in the - // rightmost sub-element of the corresponding element of operand 1. - VSUM, - - // Compute carry/borrow indication for add/subtract. - VACC, VSCBI, - // Add/subtract with carry/borrow. - VAC, VSBI, - // Compute carry/borrow indication for add/subtract with carry/borrow. - VACCC, VSBCBI, - - // High-word multiply-and-add. - VMAH, VMALH, - // Widen and multiply even/odd vector elements. - VME, VMLE, VMO, VMLO, - - // Compare integer vector operands 0 and 1 to produce the usual 0/-1 - // vector result. VICMPE is for equality, VICMPH for "signed greater than" - // and VICMPHL for "unsigned greater than". - VICMPE, - VICMPH, - VICMPHL, - - // Likewise, but also set the condition codes on the result. - VICMPES, - VICMPHS, - VICMPHLS, - - // Compare floating-point vector operands 0 and 1 to produce the usual 0/-1 - // vector result. VFCMPE is for "ordered and equal", VFCMPH for "ordered and - // greater than" and VFCMPHE for "ordered and greater than or equal to". - VFCMPE, - VFCMPH, - VFCMPHE, - - // Likewise, but also set the condition codes on the result. - VFCMPES, - VFCMPHS, - VFCMPHES, - - // Test floating-point data class for vectors. - VFTCI, - - // Extend the even f32 elements of vector operand 0 to produce a vector - // of f64 elements. - VEXTEND, - - // Round the f64 elements of vector operand 0 to f32s and store them in the - // even elements of the result. - VROUND, - - // AND the two vector operands together and set CC based on the result. - VTM, - - // i128 high integer comparisons. - SCMP128HI, - UCMP128HI, - - // String operations that set CC as a side-effect. - VFAE_CC, - VFAEZ_CC, - VFEE_CC, - VFEEZ_CC, - VFENE_CC, - VFENEZ_CC, - VISTR_CC, - VSTRC_CC, - VSTRCZ_CC, - VSTRS_CC, - VSTRSZ_CC, - - // Test Data Class. - // - // Operand 0: the value to test - // Operand 1: the bit mask - TDC, - - // z/OS XPLINK ADA Entry - // Wraps a TargetGlobalAddress that should be loaded from a function's - // AssociatedData Area (ADA). Tha ADA is passed to the function by the - // caller in the XPLink ABI defined register R5. - // Operand 0: the GlobalValue/External Symbol - // Operand 1: the ADA register - // Operand 2: the offset (0 for the first and 8 for the second element in the - // function descriptor) - ADA_ENTRY, - - // Strict variants of scalar floating-point comparisons. - // Quiet and signaling versions. - FIRST_STRICTFP_OPCODE, - STRICT_FCMP = FIRST_STRICTFP_OPCODE, - STRICT_FCMPS, - - // Strict variants of vector floating-point comparisons. - // Quiet and signaling versions. - STRICT_VFCMPE, - STRICT_VFCMPH, - STRICT_VFCMPHE, - STRICT_VFCMPES, - STRICT_VFCMPHS, - STRICT_VFCMPHES, - - // Strict variants of VEXTEND and VROUND. - STRICT_VEXTEND, - STRICT_VROUND, - LAST_STRICTFP_OPCODE = STRICT_VROUND, - - // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or - // ATOMIC_LOAD_. - // - // Operand 0: the address of the containing 32-bit-aligned field - // Operand 1: the second operand of , in the high bits of an i32 - // for everything except ATOMIC_SWAPW - // Operand 2: how many bits to rotate the i32 left to bring the first - // operand into the high bits - // Operand 3: the negative of operand 2, for rotating the other way - // Operand 4: the width of the field in bits (8 or 16) - FIRST_MEMORY_OPCODE, - ATOMIC_SWAPW = FIRST_MEMORY_OPCODE, - ATOMIC_LOADW_ADD, - ATOMIC_LOADW_SUB, - ATOMIC_LOADW_AND, - ATOMIC_LOADW_OR, - ATOMIC_LOADW_XOR, - ATOMIC_LOADW_NAND, - ATOMIC_LOADW_MIN, - ATOMIC_LOADW_MAX, - ATOMIC_LOADW_UMIN, - ATOMIC_LOADW_UMAX, - - // A wrapper around the inner loop of an ATOMIC_CMP_SWAP. - // - // Operand 0: the address of the containing 32-bit-aligned field - // Operand 1: the compare value, in the low bits of an i32 - // Operand 2: the swap value, in the low bits of an i32 - // Operand 3: how many bits to rotate the i32 left to bring the first - // operand into the high bits - // Operand 4: the negative of operand 2, for rotating the other way - // Operand 5: the width of the field in bits (8 or 16) - ATOMIC_CMP_SWAPW, - - // Atomic compare-and-swap returning CC value. - // Val, CC, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) - ATOMIC_CMP_SWAP, - - // 128-bit atomic load. - // Val, OUTCHAIN = ATOMIC_LOAD_128(INCHAIN, ptr) - ATOMIC_LOAD_128, - - // 128-bit atomic store. - // OUTCHAIN = ATOMIC_STORE_128(INCHAIN, val, ptr) - ATOMIC_STORE_128, - - // 128-bit atomic compare-and-swap. - // Val, CC, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) - ATOMIC_CMP_SWAP_128, - - // Byte swapping load/store. Same operands as regular load/store. - LRV, STRV, - - // Element swapping load/store. Same operands as regular load/store. - VLER, VSTER, - - // Use STORE CLOCK FAST to store current TOD clock value. - STCKF, - - // Prefetch from the second operand using the 4-bit control code in - // the first operand. The code is 1 for a load prefetch and 2 for - // a store prefetch. - PREFETCH, - LAST_MEMORY_OPCODE = PREFETCH, -}; - -// Return true if OPCODE is some kind of PC-relative address. -inline bool isPCREL(unsigned Opcode) { - return Opcode == PCREL_WRAPPER || Opcode == PCREL_OFFSET; -} -} // end namespace SystemZISD namespace SystemZICMP { // Describes whether an integer comparison needs to be signed or unsigned, @@ -532,8 +148,6 @@ class SystemZTargetLowering : public TargetLowering { return true; } - const char *getTargetNodeName(unsigned Opcode) const override; - // This function currently returns cost for srl/ipm/cc sequence for merging. CondMergingParams getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td index 547d3dcf92804..a02cafaaafcdf 100644 --- a/llvm/lib/Target/SystemZ/SystemZOperators.td +++ b/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -265,74 +265,151 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd, SDNPOutGlue]>; def global_offset_table : SDNode<"ISD::GLOBAL_OFFSET_TABLE", SDTPtrLeaf>; -// Nodes for SystemZISD::*. See SystemZISelLowering.h for more details. +// Return with a glue operand. Operand 0 is the chain operand. def z_retglue : SDNode<"SystemZISD::RET_GLUE", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +// Calls a function. Operand 0 is the chain operand and operand 1 +// is the target address. The arguments start at operand 2. +// There is an optional glue operand at the end. def z_call : SDNode<"SystemZISD::CALL", SDT_ZCall, [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>; def z_sibcall : SDNode<"SystemZISD::SIBCALL", SDT_ZCall, [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>; +// TLS calls. Like regular calls, except operand 1 is the TLS symbol. +// (The call target is implicitly __tls_get_offset.) def z_tls_gdcall : SDNode<"SystemZISD::TLS_GDCALL", SDT_ZCall, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPVariadic]>; def z_tls_ldcall : SDNode<"SystemZISD::TLS_LDCALL", SDT_ZCall, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPVariadic]>; + +// Wraps a TargetGlobalAddress that should be loaded using PC-relative +// accesses (LARL). Operand 0 is the address. def z_pcrel_wrapper : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>; + +// Used in cases where an offset is applied to a TargetGlobalAddress. +// Operand 0 is the full TargetGlobalAddress and operand 1 is a +// PCREL_WRAPPER for an anchor point. This is used so that we can +// cheaply refer to either the full address or the anchor point +// as a register base. def z_pcrel_offset : SDNode<"SystemZISD::PCREL_OFFSET", SDT_ZWrapOffset, []>; + +// Integer comparisons. There are three operands: the two values +// to compare, and an integer of type SystemZICMP. def z_icmp : SDNode<"SystemZISD::ICMP", SDT_ZICmp>; + +// Floating-point comparisons. The two operands are the values to compare. def z_fcmp : SDNode<"SystemZISD::FCMP", SDT_ZCmp>; -def z_strict_fcmp : SDNode<"SystemZISD::STRICT_FCMP", SDT_ZCmp, - [SDNPHasChain]>; -def z_strict_fcmps : SDNode<"SystemZISD::STRICT_FCMPS", SDT_ZCmp, - [SDNPHasChain]>; + +let IsStrictFP = true in { + // Strict variants of scalar floating-point comparisons. + // Quiet and signaling versions. + def z_strict_fcmp : SDNode<"SystemZISD::STRICT_FCMP", SDT_ZCmp, + [SDNPHasChain]>; + def z_strict_fcmps : SDNode<"SystemZISD::STRICT_FCMPS", SDT_ZCmp, + [SDNPHasChain]>; +} + +// Test under mask. The first operand is ANDed with the second operand +// and the condition codes are set on the result. The third operand is +// a boolean that is true if the condition codes need to distinguish +// between CCMASK_TM_MIXED_MSB_0 and CCMASK_TM_MIXED_MSB_1 (which the +// register forms do but the memory forms don't). def z_tm : SDNode<"SystemZISD::TM", SDT_ZICmp>; + +// Branches if a condition is true. Operand 0 is the chain operand; +// operand 1 is the 4-bit condition-code mask, with bit N in +// big-endian order meaning "branch if CC=N"; operand 2 is the +// target block and operand 3 is the flag operand. def z_br_ccmask_1 : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask, [SDNPHasChain]>; + +// Selects between operand 0 and operand 1. Operand 2 is the +// mask of condition-code values for which operand 0 should be +// chosen over operand 1; it has the same form as BR_CCMASK. +// Operand 3 is the flag operand. def z_select_ccmask_1 : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask>; + +// Store the CC value in bits 29 and 28 of an integer. def z_ipm_1 : SDNode<"SystemZISD::IPM", SDT_ZIPM>; + +// Evaluates to the gap between the stack pointer and the +// base of the dynamically-allocatable area. def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>; + +// For allocating stack space when using stack clash protector. +// Allocation is performed by block, and each block is probed. def z_probed_alloca : SDNode<"SystemZISD::PROBED_ALLOCA", SDT_ZProbedAlloca, [SDNPHasChain]>; + +// Count number of bits set in operand 0 per byte. def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>; + +// Wrappers around the ISD opcodes of the same name. The output is GR128. +// Input operands may be GR64 or GR32, depending on the instruction. def z_smul_lohi : SDNode<"SystemZISD::SMUL_LOHI", SDT_ZGR128Binary>; def z_umul_lohi : SDNode<"SystemZISD::UMUL_LOHI", SDT_ZGR128Binary>; def z_sdivrem : SDNode<"SystemZISD::SDIVREM", SDT_ZGR128Binary>; def z_udivrem : SDNode<"SystemZISD::UDIVREM", SDT_ZGR128Binary>; + +// Add/subtract with overflow/carry. These have the same operands as +// the corresponding standard operations, except with the carry flag +// replaced by a condition code value. def z_saddo : SDNode<"SystemZISD::SADDO", SDT_ZBinaryWithFlags>; def z_ssubo : SDNode<"SystemZISD::SSUBO", SDT_ZBinaryWithFlags>; def z_uaddo : SDNode<"SystemZISD::UADDO", SDT_ZBinaryWithFlags>; def z_usubo : SDNode<"SystemZISD::USUBO", SDT_ZBinaryWithFlags>; def z_addcarry_1 : SDNode<"SystemZISD::ADDCARRY", SDT_ZBinaryWithCarry>; def z_subcarry_1 : SDNode<"SystemZISD::SUBCARRY", SDT_ZBinaryWithCarry>; + +// Compute carry/borrow indication for add/subtract. def z_vacc : SDNode<"SystemZISD::VACC", SDTIntBinOp>; -def z_vac : SDNode<"SystemZISD::VAC", SDT_ZTernary>; -def z_vaccc : SDNode<"SystemZISD::VACCC", SDT_ZTernary>; def z_vscbi : SDNode<"SystemZISD::VSCBI", SDTIntBinOp>; + +// Add/subtract with carry/borrow. +def z_vac : SDNode<"SystemZISD::VAC", SDT_ZTernary>; def z_vsbi : SDNode<"SystemZISD::VSBI", SDT_ZTernary>; + +// Compute carry/borrow indication for add/subtract with carry/borrow. +def z_vaccc : SDNode<"SystemZISD::VACCC", SDT_ZTernary>; def z_vsbcbi : SDNode<"SystemZISD::VSBCBI", SDT_ZTernary>; + +// High-word multiply-and-add. def z_vmah : SDNode<"SystemZISD::VMAH", SDT_ZTernary>; def z_vmalh : SDNode<"SystemZISD::VMALH", SDT_ZTernary>; + +// Widen and multiply even/odd vector elements. def z_vme : SDNode<"SystemZISD::VME", SDT_ZBinaryConv>; def z_vmle : SDNode<"SystemZISD::VMLE", SDT_ZBinaryConv>; def z_vmo : SDNode<"SystemZISD::VMO", SDT_ZBinaryConv>; def z_vmlo : SDNode<"SystemZISD::VMLO", SDT_ZBinaryConv>; +// Byte swapping load/store. Same operands as regular load/store. def z_loadbswap : SDNode<"SystemZISD::LRV", SDTLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def z_storebswap : SDNode<"SystemZISD::STRV", SDTStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +// Element swapping load/store. Same operands as regular load/store. def z_loadeswap : SDNode<"SystemZISD::VLER", SDTLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def z_storeeswap : SDNode<"SystemZISD::VSTER", SDTStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +// Use STORE CLOCK FAST to store current TOD clock value. def z_stckf : SDNode<"SystemZISD::STCKF", SDT_ZStoreInherent, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +// Test Data Class. +// +// Operand 0: the value to test +// Operand 1: the bit mask def z_tdc : SDNode<"SystemZISD::TDC", SDT_ZTest>; def z_eh_sjlj_setjmp : SDNode<"ISD::EH_SJLJ_SETJMP", SDT_ZSetJmp, @@ -346,26 +423,75 @@ def z_vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT", SDT_ZInsertVectorElt>; def z_vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT", SDT_ZExtractVectorElt>; + +// Create a vector constant by filling byte N of the result with bit +// 15-N of the single operand. def z_byte_mask : SDNode<"SystemZISD::BYTE_MASK", SDT_ZReplicate>; + +// Create a vector constant by replicating an element-sized RISBG-style mask. +// The first operand specifies the starting set bit and the second operand +// specifies the ending set bit. Both operands count from the MSB of the +// element. def z_rotate_mask : SDNode<"SystemZISD::ROTATE_MASK", SDT_ZRotateMask>; + +// Replicate a GPR scalar value into all elements of a vector. def z_replicate : SDNode<"SystemZISD::REPLICATE", SDT_ZReplicate>; + +// Create a vector from two i64 GPRs. def z_join_dwords : SDNode<"SystemZISD::JOIN_DWORDS", SDT_ZJoinDwords>; + +// Replicate one element of a vector into all elements. The first operand +// is the vector and the second is the index of the element to replicate. def z_splat : SDNode<"SystemZISD::SPLAT", SDT_ZVecBinaryInt>; + +// Interleave elements from the high half of operand 0 and the high half +// of operand 1. def z_merge_high : SDNode<"SystemZISD::MERGE_HIGH", SDT_ZVecBinary>; + +// Likewise for the low halves. def z_merge_low : SDNode<"SystemZISD::MERGE_LOW", SDT_ZVecBinary>; + +// Concatenate the vectors in the first two operands, shift them left +// by the third operand, and take the first half of the result. def z_shl_double : SDNode<"SystemZISD::SHL_DOUBLE", SDT_ZVecTernaryInt>; + +// Concatenate the vectors in the first two operands, shift them left/right +// bitwise by the third operand, and take the first/last half of the result. def z_shl_double_bit : SDNode<"SystemZISD::SHL_DOUBLE_BIT", SDT_ZVecTernaryInt>; def z_shr_double_bit : SDNode<"SystemZISD::SHR_DOUBLE_BIT", SDT_ZVecTernaryInt>; + +// Take one element of the first v2i64 operand and the one element of +// the second v2i64 operand and concatenate them to form a v2i64 result. +// The third operand is a 4-bit value of the form 0A0B, where A and B +// are the element selectors for the first operand and second operands +// respectively. def z_permute_dwords : SDNode<"SystemZISD::PERMUTE_DWORDS", SDT_ZVecTernaryInt>; + +// Perform a general vector permute on vector operands 0 and 1. +// Each byte of operand 2 controls the corresponding byte of the result, +// in the same way as a byte-level VECTOR_SHUFFLE mask. def z_permute : SDNode<"SystemZISD::PERMUTE", SDT_ZVecTernary>; + +// Pack vector operands 0 and 1 into a single vector with half-sized elements. def z_pack : SDNode<"SystemZISD::PACK", SDT_ZVecBinaryConv>; + +// Likewise, but saturate the result and set CC. PACKS_CC does signed +// saturation and PACKLS_CC does unsigned saturation. def z_packs_cc : SDNode<"SystemZISD::PACKS_CC", SDT_ZVecBinaryConvCC>; def z_packls_cc : SDNode<"SystemZISD::PACKLS_CC", SDT_ZVecBinaryConvCC>; + +// Unpack the first half of vector operand 0 into double-sized elements. +// UNPACK_HIGH sign-extends and UNPACKL_HIGH zero-extends. def z_unpack_high : SDNode<"SystemZISD::UNPACK_HIGH", SDT_ZVecUnpack>; def z_unpackl_high : SDNode<"SystemZISD::UNPACKL_HIGH", SDT_ZVecUnpack>; + +// Likewise for the second half. def z_unpack_low : SDNode<"SystemZISD::UNPACK_LOW", SDT_ZVecUnpack>; def z_unpackl_low : SDNode<"SystemZISD::UNPACKL_LOW", SDT_ZVecUnpack>; + +// Shift/rotate each element of vector operand 0 by the number of bits +// specified by scalar operand 1. def z_vshl_by_scalar : SDNode<"SystemZISD::VSHL_BY_SCALAR", SDT_ZVecBinaryInt>; def z_vsrl_by_scalar : SDNode<"SystemZISD::VSRL_BY_SCALAR", @@ -374,40 +500,75 @@ def z_vsra_by_scalar : SDNode<"SystemZISD::VSRA_BY_SCALAR", SDT_ZVecBinaryInt>; def z_vrotl_by_scalar : SDNode<"SystemZISD::VROTL_BY_SCALAR", SDT_ZVecBinaryInt>; + +// For each element of the output type, sum across all sub-elements of +// operand 0 belonging to the corresponding element, and add in the +// rightmost sub-element of the corresponding element of operand 1. def z_vsum : SDNode<"SystemZISD::VSUM", SDT_ZBinaryConv>; + +// Compare integer vector operands 0 and 1 to produce the usual 0/-1 +// vector result. VICMPE is for equality, VICMPH for "signed greater than" +// and VICMPHL for "unsigned greater than". def z_vicmpe : SDNode<"SystemZISD::VICMPE", SDT_ZVecCompare>; def z_vicmph : SDNode<"SystemZISD::VICMPH", SDT_ZVecCompare>; def z_vicmphl : SDNode<"SystemZISD::VICMPHL", SDT_ZVecCompare>; + +// Likewise, but also set the condition codes on the result. def z_vicmpes : SDNode<"SystemZISD::VICMPES", SDT_ZVecCompareCC>; def z_vicmphs : SDNode<"SystemZISD::VICMPHS", SDT_ZVecCompareCC>; def z_vicmphls : SDNode<"SystemZISD::VICMPHLS", SDT_ZVecCompareCC>; + +// Compare floating-point vector operands 0 and 1 to produce the usual 0/-1 +// vector result. VFCMPE is for "ordered and equal", VFCMPH for "ordered and +// greater than" and VFCMPHE for "ordered and greater than or equal to". def z_vfcmpe : SDNode<"SystemZISD::VFCMPE", SDT_ZVecBinaryConv>; -def z_strict_vfcmpe : SDNode<"SystemZISD::STRICT_VFCMPE", - SDT_ZVecBinaryConv, [SDNPHasChain]>; -def z_strict_vfcmpes : SDNode<"SystemZISD::STRICT_VFCMPES", - SDT_ZVecBinaryConv, [SDNPHasChain]>; def z_vfcmph : SDNode<"SystemZISD::VFCMPH", SDT_ZVecBinaryConv>; -def z_strict_vfcmph : SDNode<"SystemZISD::STRICT_VFCMPH", - SDT_ZVecBinaryConv, [SDNPHasChain]>; -def z_strict_vfcmphs : SDNode<"SystemZISD::STRICT_VFCMPHS", - SDT_ZVecBinaryConv, [SDNPHasChain]>; def z_vfcmphe : SDNode<"SystemZISD::VFCMPHE", SDT_ZVecBinaryConv>; -def z_strict_vfcmphe : SDNode<"SystemZISD::STRICT_VFCMPHE", - SDT_ZVecBinaryConv, [SDNPHasChain]>; -def z_strict_vfcmphes : SDNode<"SystemZISD::STRICT_VFCMPHES", - SDT_ZVecBinaryConv, [SDNPHasChain]>; + +// Likewise, but also set the condition codes on the result. def z_vfcmpes : SDNode<"SystemZISD::VFCMPES", SDT_ZVecBinaryConvCC>; def z_vfcmphs : SDNode<"SystemZISD::VFCMPHS", SDT_ZVecBinaryConvCC>; def z_vfcmphes : SDNode<"SystemZISD::VFCMPHES", SDT_ZVecBinaryConvCC>; + +// Extend the even f32 elements of vector operand 0 to produce a vector +// of f64 elements. def z_vextend : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>; -def z_strict_vextend : SDNode<"SystemZISD::STRICT_VEXTEND", - SDT_ZVecUnaryConv, [SDNPHasChain]>; + +// Round the f64 elements of vector operand 0 to f32s and store them in the +// even elements of the result. def z_vround : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>; -def z_strict_vround : SDNode<"SystemZISD::STRICT_VROUND", + +let IsStrictFP = true in { + // Strict variants of vector floating-point comparisons. + // Quiet and signaling versions. + def z_strict_vfcmpe : SDNode<"SystemZISD::STRICT_VFCMPE", + SDT_ZVecBinaryConv, [SDNPHasChain]>; + def z_strict_vfcmph : SDNode<"SystemZISD::STRICT_VFCMPH", + SDT_ZVecBinaryConv, [SDNPHasChain]>; + def z_strict_vfcmphe : SDNode<"SystemZISD::STRICT_VFCMPHE", + SDT_ZVecBinaryConv, [SDNPHasChain]>; + def z_strict_vfcmpes : SDNode<"SystemZISD::STRICT_VFCMPES", + SDT_ZVecBinaryConv, [SDNPHasChain]>; + def z_strict_vfcmphs : SDNode<"SystemZISD::STRICT_VFCMPHS", + SDT_ZVecBinaryConv, [SDNPHasChain]>; + def z_strict_vfcmphes : SDNode<"SystemZISD::STRICT_VFCMPHES", + SDT_ZVecBinaryConv, [SDNPHasChain]>; + + // Strict variants of VEXTEND and VROUND. + def z_strict_vextend : SDNode<"SystemZISD::STRICT_VEXTEND", + SDT_ZVecUnaryConv, [SDNPHasChain]>; + def z_strict_vround : SDNode<"SystemZISD::STRICT_VROUND", SDT_ZVecUnaryConv, [SDNPHasChain]>; +} + +// AND the two vector operands together and set CC based on the result. def z_vtm : SDNode<"SystemZISD::VTM", SDT_ZCmp>; + +// i128 high integer comparisons. def z_scmp128hi : SDNode<"SystemZISD::SCMP128HI", SDT_ZCmp>; def z_ucmp128hi : SDNode<"SystemZISD::UCMP128HI", SDT_ZCmp>; + +// String operations that set CC as a side-effect. def z_vfae_cc : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryIntCC>; def z_vfaez_cc : SDNode<"SystemZISD::VFAEZ_CC", SDT_ZVecTernaryIntCC>; def z_vfee_cc : SDNode<"SystemZISD::VFEE_CC", SDT_ZVecBinaryCC>; @@ -423,12 +584,24 @@ def z_vstrs_cc : SDNode<"SystemZISD::VSTRS_CC", SDT_ZVecTernaryConvCC>; def z_vstrsz_cc : SDNode<"SystemZISD::VSTRSZ_CC", SDT_ZVecTernaryConvCC>; + +// Test floating-point data class for vectors. def z_vftci : SDNode<"SystemZISD::VFTCI", SDT_ZVecBinaryConvIntCC>; class AtomicWOp : SDNode<"SystemZISD::"#name, profile, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; +// Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or +// ATOMIC_LOAD_. +// +// Operand 0: the address of the containing 32-bit-aligned field +// Operand 1: the second operand of , in the high bits of an i32 +// for everything except ATOMIC_SWAPW +// Operand 2: how many bits to rotate the i32 left to bring the first +// operand into the high bits +// Operand 3: the negative of operand 2, for rotating the other way +// Operand 4: the width of the field in bits (8 or 16) def z_atomic_swapw : AtomicWOp<"ATOMIC_SWAPW">; def z_atomic_loadw_add : AtomicWOp<"ATOMIC_LOADW_ADD">; def z_atomic_loadw_sub : AtomicWOp<"ATOMIC_LOADW_SUB">; @@ -441,55 +614,117 @@ def z_atomic_loadw_max : AtomicWOp<"ATOMIC_LOADW_MAX">; def z_atomic_loadw_umin : AtomicWOp<"ATOMIC_LOADW_UMIN">; def z_atomic_loadw_umax : AtomicWOp<"ATOMIC_LOADW_UMAX">; +// Atomic compare-and-swap returning CC value. +// Val, CC, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) def z_atomic_cmp_swap : SDNode<"SystemZISD::ATOMIC_CMP_SWAP", SDT_ZAtomicCmpSwap, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; + +// A wrapper around the inner loop of an ATOMIC_CMP_SWAP. +// +// Operand 0: the address of the containing 32-bit-aligned field +// Operand 1: the compare value, in the low bits of an i32 +// Operand 2: the swap value, in the low bits of an i32 +// Operand 3: how many bits to rotate the i32 left to bring the first +// operand into the high bits +// Operand 4: the negative of operand 2, for rotating the other way +// Operand 5: the width of the field in bits (8 or 16) def z_atomic_cmp_swapw : SDNode<"SystemZISD::ATOMIC_CMP_SWAPW", SDT_ZAtomicCmpSwapW, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; +// 128-bit atomic load. +// Val, OUTCHAIN = ATOMIC_LOAD_128(INCHAIN, ptr) def z_atomic_load_128 : SDNode<"SystemZISD::ATOMIC_LOAD_128", SDT_ZAtomicLoad128, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +// 128-bit atomic store. +// OUTCHAIN = ATOMIC_STORE_128(INCHAIN, val, ptr) def z_atomic_store_128 : SDNode<"SystemZISD::ATOMIC_STORE_128", SDT_ZAtomicStore128, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +// 128-bit atomic compare-and-swap. +// Val, CC, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) def z_atomic_cmp_swap_128 : SDNode<"SystemZISD::ATOMIC_CMP_SWAP_128", SDT_ZAtomicCmpSwap128, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; +// Use a series of MVCs to copy bytes from one memory location to another. +// The operands are: +// - the target address +// - the source address +// - the constant length +// +// This isn't a memory opcode because we'd need to attach two +// MachineMemOperands rather than one. def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZMemMemLength, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; + +// Similar to MVC, but for logic operations (AND, OR, XOR). def z_nc : SDNode<"SystemZISD::NC", SDT_ZMemMemLength, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; def z_oc : SDNode<"SystemZISD::OC", SDT_ZMemMemLength, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; def z_xc : SDNode<"SystemZISD::XC", SDT_ZMemMemLength, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; + +// Use CLC to compare two blocks of memory, with the same comments +// as for MVC. def z_clc : SDNode<"SystemZISD::CLC", SDT_ZMemMemLengthCC, [SDNPHasChain, SDNPMayLoad]>; + +// Use MVC to set a block of memory after storing the first byte. def z_memset_mvc : SDNode<"SystemZISD::MEMSET_MVC", SDT_ZMemsetMVC, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; + +// Use a CLST-based sequence to implement strcmp(). The two input operands +// are the addresses of the strings to compare. def z_strcmp : SDNode<"SystemZISD::STRCMP", SDT_ZStringCC, [SDNPHasChain, SDNPMayLoad]>; + +// Use an MVST-based sequence to implement stpcpy(). def z_stpcpy : SDNode<"SystemZISD::STPCPY", SDT_ZString, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; + +// Use an SRST-based sequence to search a block of memory. The first +// operand is the end address, the second is the start, and the third +// is the character to search for. CC is set to 1 on success and 2 +// on failure. def z_search_string : SDNode<"SystemZISD::SEARCH_STRING", SDT_ZStringCC, [SDNPHasChain, SDNPMayLoad]>; + +// Prefetch from the second operand using the 4-bit control code in +// the first operand. The code is 1 for a load prefetch and 2 for +// a store prefetch. def z_prefetch : SDNode<"SystemZISD::PREFETCH", SDT_ZPrefetch, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; +// Transaction begin. The first operand is the chain, the second +// the TDB pointer, and the third the immediate control field. +// Returns CC value and chain. def z_tbegin : SDNode<"SystemZISD::TBEGIN", SDT_ZTBegin, [SDNPHasChain, SDNPMayStore, SDNPSideEffect]>; def z_tbegin_nofloat : SDNode<"SystemZISD::TBEGIN_NOFLOAT", SDT_ZTBegin, [SDNPHasChain, SDNPMayStore, SDNPSideEffect]>; + +// Transaction end. Just the chain operand. Returns CC value and chain. def z_tend : SDNode<"SystemZISD::TEND", SDT_ZTEnd, [SDNPHasChain, SDNPSideEffect]>; +// z/OS XPLINK ADA Entry +// Wraps a TargetGlobalAddress that should be loaded from a function's +// AssociatedData Area (ADA). Tha ADA is passed to the function by the +// caller in the XPLink ABI defined register R5. +// Operand 0: the GlobalValue/External Symbol +// Operand 1: the ADA register +// Operand 2: the offset (0 for the first and 8 for the second element in the +// function descriptor) def z_ada_entry : SDNode<"SystemZISD::ADA_ENTRY", SDT_ZADAENTRY>; diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index eb00d484af693..88feba8adce0e 100644 --- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -10,21 +10,27 @@ // //===----------------------------------------------------------------------===// +#include "SystemZSelectionDAGInfo.h" #include "SystemZTargetMachine.h" #include "llvm/CodeGen/SelectionDAG.h" +#define GET_SDNODE_DESC +#include "SystemZGenSDNodeInfo.inc" + using namespace llvm; #define DEBUG_TYPE "systemz-selectiondag-info" -bool SystemZSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const { - return Opcode >= SystemZISD::FIRST_MEMORY_OPCODE && - Opcode <= SystemZISD::LAST_MEMORY_OPCODE; -} +SystemZSelectionDAGInfo::SystemZSelectionDAGInfo() + : SelectionDAGGenTargetInfo(SystemZGenSDNodeInfo) {} + +const char *SystemZSelectionDAGInfo::getTargetNodeName(unsigned Opcode) const { + switch (static_cast(Opcode)) { + case SystemZISD::GET_CCMASK: + return "SystemZISD::GET_CCMASK"; + } -bool SystemZSelectionDAGInfo::isTargetStrictFPOpcode(unsigned Opcode) const { - return Opcode >= SystemZISD::FIRST_STRICTFP_OPCODE && - Opcode <= SystemZISD::LAST_STRICTFP_OPCODE; + return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode); } static unsigned getMemMemLenAdj(unsigned Op) { diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h index 200566f9646c1..d25fddab65161 100644 --- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -15,15 +15,34 @@ #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#define GET_SDNODE_ENUM +#include "SystemZGenSDNodeInfo.inc" + namespace llvm { +namespace SystemZISD { + +enum NodeType : unsigned { + // Set the condition code from a boolean value in operand 0. + // Operand 1 is a mask of all condition-code values that may result of this + // operation, operand 2 is a mask of condition-code values that may result + // if the boolean is true. + // Note that this operation is always optimized away, we will never + // generate any code for it. + GET_CCMASK = GENERATED_OPCODE_END, +}; -class SystemZSelectionDAGInfo : public SelectionDAGTargetInfo { -public: - explicit SystemZSelectionDAGInfo() = default; +// Return true if OPCODE is some kind of PC-relative address. +inline bool isPCREL(unsigned Opcode) { + return Opcode == PCREL_WRAPPER || Opcode == PCREL_OFFSET; +} - bool isTargetMemoryOpcode(unsigned Opcode) const override; +} // namespace SystemZISD + +class SystemZSelectionDAGInfo : public SelectionDAGGenTargetInfo { +public: + SystemZSelectionDAGInfo(); - bool isTargetStrictFPOpcode(unsigned Opcode) const override; + const char *getTargetNodeName(unsigned Opcode) const override; SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src, diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt index f9bd233cf8ecf..434a6d2c3553f 100644 --- a/llvm/lib/Target/X86/CMakeLists.txt +++ b/llvm/lib/Target/X86/CMakeLists.txt @@ -31,7 +31,6 @@ set(sources X86CmovConversion.cpp X86CodeGenPassBuilder.cpp X86DomainReassignment.cpp - X86DiscriminateMemOps.cpp X86LowerTileCopy.cpp X86LowerAMXType.cpp X86LowerAMXIntrinsics.cpp @@ -57,7 +56,6 @@ set(sources X86IndirectBranchTracking.cpp X86IndirectThunks.cpp X86InterleavedAccess.cpp - X86InsertPrefetch.cpp X86InstCombineIntrinsic.cpp X86InstrFMA3Info.cpp X86InstrFoldTables.cpp diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 03706aaaab237..97848bec7127e 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -166,13 +166,6 @@ FunctionPass *createX86IndirectThunksPass(); /// This pass replaces ret instructions with jmp's to __x86_return thunk. FunctionPass *createX86ReturnThunksPass(); -/// This pass ensures instructions featuring a memory operand -/// have distinctive (with respect to each other) -FunctionPass *createX86DiscriminateMemOpsPass(); - -/// This pass applies profiling information to insert cache prefetches. -FunctionPass *createX86InsertPrefetchPass(); - /// This pass insert wait instruction after X87 instructions which could raise /// fp exceptions when strict-fp enabled. FunctionPass *createX86InsertX87waitPass(); diff --git a/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp b/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp deleted file mode 100644 index bd151a450394a..0000000000000 --- a/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp +++ /dev/null @@ -1,184 +0,0 @@ -//===- X86DiscriminateMemOps.cpp - Unique IDs for Mem Ops -----------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// This pass aids profile-driven cache prefetch insertion by ensuring all -/// instructions that have a memory operand are distinguishible from each other. -/// -//===----------------------------------------------------------------------===// - -#include "X86.h" -#include "X86Subtarget.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/ProfileData/SampleProf.h" -#include "llvm/ProfileData/SampleProfReader.h" -#include "llvm/Support/Debug.h" -#include -using namespace llvm; - -#define DEBUG_TYPE "x86-discriminate-memops" - -static cl::opt EnableDiscriminateMemops( - DEBUG_TYPE, cl::init(false), - cl::desc("Generate unique debug info for each instruction with a memory " - "operand. Should be enabled for profile-driven cache prefetching, " - "both in the build of the binary being profiled, as well as in " - "the build of the binary consuming the profile."), - cl::Hidden); - -static cl::opt BypassPrefetchInstructions( - "x86-bypass-prefetch-instructions", cl::init(true), - cl::desc("When discriminating instructions with memory operands, ignore " - "prefetch instructions. This ensures the other memory operand " - "instructions have the same identifiers after inserting " - "prefetches, allowing for successive insertions."), - cl::Hidden); - -namespace { - -using Location = std::pair; - -Location diToLocation(const DILocation *Loc) { - return std::make_pair(Loc->getFilename(), Loc->getLine()); -} - -/// Ensure each instruction having a memory operand has a distinct pair. -void updateDebugInfo(MachineInstr *MI, const DILocation *Loc) { - DebugLoc DL(Loc); - MI->setDebugLoc(DL); -} - -class X86DiscriminateMemOps : public MachineFunctionPass { - bool runOnMachineFunction(MachineFunction &MF) override; - StringRef getPassName() const override { - return "X86 Discriminate Memory Operands"; - } - -public: - static char ID; - - /// Default construct and initialize the pass. - X86DiscriminateMemOps(); -}; - -bool IsPrefetchOpcode(unsigned Opcode) { - return Opcode == X86::PREFETCHNTA || Opcode == X86::PREFETCHT0 || - Opcode == X86::PREFETCHT1 || Opcode == X86::PREFETCHT2 || - Opcode == X86::PREFETCHIT0 || Opcode == X86::PREFETCHIT1 || - Opcode == X86::PREFETCHRST2; -} -} // end anonymous namespace - -//===----------------------------------------------------------------------===// -// Implementation -//===----------------------------------------------------------------------===// - -char X86DiscriminateMemOps::ID = 0; - -/// Default construct and initialize the pass. -X86DiscriminateMemOps::X86DiscriminateMemOps() : MachineFunctionPass(ID) {} - -bool X86DiscriminateMemOps::runOnMachineFunction(MachineFunction &MF) { - if (!EnableDiscriminateMemops) - return false; - - DISubprogram *FDI = MF.getFunction().getSubprogram(); - if (!FDI || !FDI->getUnit()->getDebugInfoForProfiling()) - return false; - - // Have a default DILocation, if we find instructions with memops that don't - // have any debug info. - const DILocation *ReferenceDI = - DILocation::get(FDI->getContext(), FDI->getLine(), 0, FDI); - assert(ReferenceDI && "ReferenceDI should not be nullptr"); - DenseMap MemOpDiscriminators; - MemOpDiscriminators[diToLocation(ReferenceDI)] = 0; - - // Figure out the largest discriminator issued for each Location. When we - // issue new discriminators, we can thus avoid issuing discriminators - // belonging to instructions that don't have memops. This isn't a requirement - // for the goals of this pass, however, it avoids unnecessary ambiguity. - for (auto &MBB : MF) { - for (auto &MI : MBB) { - const auto &DI = MI.getDebugLoc(); - if (!DI) - continue; - if (BypassPrefetchInstructions && IsPrefetchOpcode(MI.getDesc().Opcode)) - continue; - Location Loc = diToLocation(DI); - unsigned &Disc = MemOpDiscriminators[Loc]; - Disc = std::max(Disc, DI->getBaseDiscriminator()); - } - } - - // Keep track of the discriminators seen at each Location. If an instruction's - // DebugInfo has a Location and discriminator we've already seen, replace its - // discriminator with a new one, to guarantee uniqueness. - DenseMap> Seen; - - bool Changed = false; - for (auto &MBB : MF) { - for (auto &MI : MBB) { - if (X86II::getMemoryOperandNo(MI.getDesc().TSFlags) < 0) - continue; - if (BypassPrefetchInstructions && IsPrefetchOpcode(MI.getDesc().Opcode)) - continue; - const DILocation *DI = MI.getDebugLoc(); - bool HasDebug = DI; - if (!HasDebug) { - DI = ReferenceDI; - } - Location L = diToLocation(DI); - DenseSet &Set = Seen[L]; - const std::pair::iterator, bool> TryInsert = - Set.insert(DI->getBaseDiscriminator()); - if (!TryInsert.second || !HasDebug) { - unsigned BF, DF, CI = 0; - DILocation::decodeDiscriminator(DI->getDiscriminator(), BF, DF, CI); - std::optional EncodedDiscriminator = - DILocation::encodeDiscriminator(MemOpDiscriminators[L] + 1, DF, CI); - - if (!EncodedDiscriminator) { - // FIXME(mtrofin): The assumption is that this scenario is infrequent/OK - // not to support. If evidence points otherwise, we can explore synthesizeing - // unique DIs by adding fake line numbers, or by constructing 64 bit - // discriminators. - LLVM_DEBUG(dbgs() << "Unable to create a unique discriminator " - "for instruction with memory operand in: " - << DI->getFilename() << " Line: " << DI->getLine() - << " Column: " << DI->getColumn() - << ". This is likely due to a large macro expansion. \n"); - continue; - } - // Since we were able to encode, bump the MemOpDiscriminators. - ++MemOpDiscriminators[L]; - DI = DI->cloneWithDiscriminator(*EncodedDiscriminator); - assert(DI && "DI should not be nullptr"); - updateDebugInfo(&MI, DI); - Changed = true; - std::pair::iterator, bool> MustInsert = - Set.insert(DI->getBaseDiscriminator()); - (void)MustInsert; // Silence warning in release build. - assert(MustInsert.second && "New discriminator shouldn't be present in set"); - } - - // Bump the reference DI to avoid cramming discriminators on line 0. - // FIXME(mtrofin): pin ReferenceDI on blocks or first instruction with DI - // in a block. It's more consistent than just relying on the last memop - // instruction we happened to see. - ReferenceDI = DI; - } - } - return Changed; -} - -FunctionPass *llvm::createX86DiscriminateMemOpsPass() { - return new X86DiscriminateMemOps(); -} diff --git a/llvm/lib/Target/X86/X86InsertPrefetch.cpp b/llvm/lib/Target/X86/X86InsertPrefetch.cpp deleted file mode 100644 index 953b755a0ca4c..0000000000000 --- a/llvm/lib/Target/X86/X86InsertPrefetch.cpp +++ /dev/null @@ -1,259 +0,0 @@ -//===------- X86InsertPrefetch.cpp - Insert cache prefetch hints ----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass applies cache prefetch instructions based on a profile. The pass -// assumes DiscriminateMemOps ran immediately before, to ensure debug info -// matches the one used at profile generation time. The profile is encoded in -// afdo format (text or binary). It contains prefetch hints recommendations. -// Each recommendation is made in terms of debug info locations, a type (i.e. -// nta, t{0|1|2}) and a delta. The debug info identifies an instruction with a -// memory operand (see X86DiscriminateMemOps). The prefetch will be made for -// a location at that memory operand + the delta specified in the -// recommendation. -// -//===----------------------------------------------------------------------===// - -#include "X86.h" -#include "X86Subtarget.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/Module.h" -#include "llvm/ProfileData/SampleProf.h" -#include "llvm/ProfileData/SampleProfReader.h" -#include "llvm/Support/VirtualFileSystem.h" -#include "llvm/Transforms/IPO/SampleProfile.h" -using namespace llvm; -using namespace sampleprof; - -static cl::opt - PrefetchHintsFile("prefetch-hints-file", - cl::desc("Path to the prefetch hints profile. See also " - "-x86-discriminate-memops"), - cl::Hidden); -namespace { - -class X86InsertPrefetch : public MachineFunctionPass { - void getAnalysisUsage(AnalysisUsage &AU) const override; - bool doInitialization(Module &) override; - - bool runOnMachineFunction(MachineFunction &MF) override; - struct PrefetchInfo { - unsigned InstructionID; - int64_t Delta; - }; - typedef SmallVectorImpl Prefetches; - bool findPrefetchInfo(const FunctionSamples *Samples, const MachineInstr &MI, - Prefetches &prefetches) const; - -public: - static char ID; - X86InsertPrefetch(const std::string &PrefetchHintsFilename); - StringRef getPassName() const override { - return "X86 Insert Cache Prefetches"; - } - -private: - std::string Filename; - std::unique_ptr Reader; -}; - -using PrefetchHints = SampleRecord::CallTargetMap; - -// Return any prefetching hints for the specified MachineInstruction. The hints -// are returned as pairs (name, delta). -ErrorOr -getPrefetchHints(const FunctionSamples *TopSamples, const MachineInstr &MI) { - if (const auto &Loc = MI.getDebugLoc()) - if (const auto *Samples = TopSamples->findFunctionSamples(Loc)) - return Samples->findCallTargetMapAt(FunctionSamples::getOffset(Loc), - Loc->getBaseDiscriminator()); - return std::error_code(); -} - -// The prefetch instruction can't take memory operands involving vector -// registers. -bool IsMemOpCompatibleWithPrefetch(const MachineInstr &MI, int Op) { - Register BaseReg = MI.getOperand(Op + X86::AddrBaseReg).getReg(); - Register IndexReg = MI.getOperand(Op + X86::AddrIndexReg).getReg(); - return (BaseReg == 0 || - X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) || - X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg)) && - (IndexReg == 0 || - X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) || - X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)); -} - -} // end anonymous namespace - -//===----------------------------------------------------------------------===// -// Implementation -//===----------------------------------------------------------------------===// - -char X86InsertPrefetch::ID = 0; - -X86InsertPrefetch::X86InsertPrefetch(const std::string &PrefetchHintsFilename) - : MachineFunctionPass(ID), Filename(PrefetchHintsFilename) {} - -/// Return true if the provided MachineInstruction has cache prefetch hints. In -/// that case, the prefetch hints are stored, in order, in the Prefetches -/// vector. -bool X86InsertPrefetch::findPrefetchInfo(const FunctionSamples *TopSamples, - const MachineInstr &MI, - Prefetches &Prefetches) const { - assert(Prefetches.empty() && - "Expected caller passed empty PrefetchInfo vector."); - - // There is no point to match prefetch hints if the profile is using MD5. - if (FunctionSamples::UseMD5) - return false; - - static constexpr std::pair HintTypes[] = { - {"_nta_", X86::PREFETCHNTA}, - {"_t0_", X86::PREFETCHT0}, - {"_t1_", X86::PREFETCHT1}, - {"_t2_", X86::PREFETCHT2}, - }; - static const char *SerializedPrefetchPrefix = "__prefetch"; - - auto T = getPrefetchHints(TopSamples, MI); - if (!T) - return false; - int16_t max_index = -1; - // Convert serialized prefetch hints into PrefetchInfo objects, and populate - // the Prefetches vector. - for (const auto &S_V : *T) { - StringRef Name = S_V.first.stringRef(); - if (Name.consume_front(SerializedPrefetchPrefix)) { - int64_t D = static_cast(S_V.second); - unsigned IID = 0; - for (const auto &HintType : HintTypes) { - if (Name.consume_front(HintType.first)) { - IID = HintType.second; - break; - } - } - if (IID == 0) - return false; - uint8_t index = 0; - Name.consumeInteger(10, index); - - if (index >= Prefetches.size()) - Prefetches.resize(index + 1); - Prefetches[index] = {IID, D}; - max_index = std::max(max_index, static_cast(index)); - } - } - assert(max_index + 1 >= 0 && - "Possible overflow: max_index + 1 should be positive."); - assert(static_cast(max_index + 1) == Prefetches.size() && - "The number of prefetch hints received should match the number of " - "PrefetchInfo objects returned"); - return !Prefetches.empty(); -} - -bool X86InsertPrefetch::doInitialization(Module &M) { - if (Filename.empty()) - return false; - - LLVMContext &Ctx = M.getContext(); - // TODO: Propagate virtual file system into LLVM targets. - auto FS = vfs::getRealFileSystem(); - ErrorOr> ReaderOrErr = - SampleProfileReader::create(Filename, Ctx, *FS); - if (std::error_code EC = ReaderOrErr.getError()) { - std::string Msg = "Could not open profile: " + EC.message(); - Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg, - DiagnosticSeverity::DS_Warning)); - return false; - } - Reader = std::move(ReaderOrErr.get()); - Reader->read(); - return true; -} - -void X86InsertPrefetch::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - MachineFunctionPass::getAnalysisUsage(AU); -} - -bool X86InsertPrefetch::runOnMachineFunction(MachineFunction &MF) { - if (!Reader) - return false; - const FunctionSamples *Samples = Reader->getSamplesFor(MF.getFunction()); - if (!Samples) - return false; - - bool Changed = false; - - const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); - SmallVector Prefetches; - for (auto &MBB : MF) { - for (auto MI = MBB.instr_begin(); MI != MBB.instr_end();) { - auto Current = MI; - ++MI; - - int Offset = X86II::getMemoryOperandNo(Current->getDesc().TSFlags); - if (Offset < 0) - continue; - unsigned Bias = X86II::getOperandBias(Current->getDesc()); - int MemOpOffset = Offset + Bias; - // FIXME(mtrofin): ORE message when the recommendation cannot be taken. - if (!IsMemOpCompatibleWithPrefetch(*Current, MemOpOffset)) - continue; - Prefetches.clear(); - if (!findPrefetchInfo(Samples, *Current, Prefetches)) - continue; - assert(!Prefetches.empty() && - "The Prefetches vector should contain at least a value if " - "findPrefetchInfo returned true."); - for (auto &PrefInfo : Prefetches) { - unsigned PFetchInstrID = PrefInfo.InstructionID; - int64_t Delta = PrefInfo.Delta; - const MCInstrDesc &Desc = TII->get(PFetchInstrID); - MachineInstr *PFetch = - MF.CreateMachineInstr(Desc, Current->getDebugLoc(), true); - MachineInstrBuilder MIB(MF, PFetch); - - static_assert(X86::AddrBaseReg == 0 && X86::AddrScaleAmt == 1 && - X86::AddrIndexReg == 2 && X86::AddrDisp == 3 && - X86::AddrSegmentReg == 4, - "Unexpected change in X86 operand offset order."); - - // This assumes X86::AddBaseReg = 0, {...}ScaleAmt = 1, etc. - // FIXME(mtrofin): consider adding a: - // MachineInstrBuilder::set(unsigned offset, op). - MIB.addReg(Current->getOperand(MemOpOffset + X86::AddrBaseReg).getReg()) - .addImm( - Current->getOperand(MemOpOffset + X86::AddrScaleAmt).getImm()) - .addReg( - Current->getOperand(MemOpOffset + X86::AddrIndexReg).getReg()) - .addImm(Current->getOperand(MemOpOffset + X86::AddrDisp).getImm() + - Delta) - .addReg(Current->getOperand(MemOpOffset + X86::AddrSegmentReg) - .getReg()); - - if (!Current->memoperands_empty()) { - MachineMemOperand *CurrentOp = *(Current->memoperands_begin()); - MIB.addMemOperand(MF.getMachineMemOperand( - CurrentOp, CurrentOp->getOffset() + Delta, CurrentOp->getSize())); - } - - // Insert before Current. This is because Current may clobber some of - // the registers used to describe the input memory operand. - MBB.insert(Current, PFetch); - Changed = true; - } - } - } - return Changed; -} - -FunctionPass *llvm::createX86InsertPrefetchPass() { - return new X86InsertPrefetch(PrefetchHintsFile); -} diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 543220b2fd3b9..713df63479987 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -563,8 +563,6 @@ void X86PassConfig::addPreEmitPass() { addPass(createX86FixupVectorConstants()); } addPass(createX86CompressEVEXPass()); - addPass(createX86DiscriminateMemOpsPass()); - addPass(createX86InsertPrefetchPass()); addPass(createX86InsertX87waitPass()); } diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index d35ae4730a9f3..0f4bc649df720 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -107,6 +107,10 @@ STATISTIC(MismatchedCloneAssignments, STATISTIC(TotalMergeInvokes, "Number of merge invocations for nodes"); STATISTIC(TotalMergeIters, "Number of merge iterations for nodes"); STATISTIC(MaxMergeIters, "Max merge iterations for nodes"); +STATISTIC(NumImportantContextIds, "Number of important context ids"); +STATISTIC(NumFixupEdgeIdsInserted, "Number of fixup edge ids inserted"); +STATISTIC(NumFixupEdgesAdded, "Number of fixup edges added"); +STATISTIC(NumFixedContexts, "Number of contexts with fixed edges"); static cl::opt DotFilePathPrefix( "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, @@ -223,9 +227,18 @@ static cl::opt MemProfRequireDefinitionForPromotion( extern cl::opt MemProfReportHintedSizes; extern cl::opt MinClonedColdBytePercent; +cl::opt MemProfTopNImportant( + "memprof-top-n-important", cl::init(10), cl::Hidden, + cl::desc("Number of largest cold contexts to consider important")); + +cl::opt MemProfFixupImportant( + "memprof-fixup-important", cl::init(true), cl::Hidden, + cl::desc("Enables edge fixup for important contexts")); + } // namespace llvm namespace { + /// CRTP base for graphs built from either IR or ThinLTO summary index. /// /// The graph represents the call contexts in all memprof metadata on allocation @@ -581,17 +594,26 @@ class CallsiteContextGraph { /// Adds nodes for the given MIB stack ids. template - void addStackNodesForMIB(ContextNode *AllocNode, - CallStack &StackContext, - CallStack &CallsiteContext, - AllocationType AllocType, - ArrayRef ContextSizeInfo); + void addStackNodesForMIB( + ContextNode *AllocNode, CallStack &StackContext, + CallStack &CallsiteContext, AllocationType AllocType, + ArrayRef ContextSizeInfo, + std::map &TotalSizeToContextIdTopNCold); /// Matches all callsite metadata (or summary) to the nodes created for /// allocation memprof MIB metadata, synthesizing new nodes to reflect any /// inlining performed on those callsite instructions. void updateStackNodes(); + /// Optionally fixup edges for the N largest cold contexts to better enable + /// cloning. This is particularly helpful if the context includes recursion + /// as well as inlining, resulting in a single stack node for multiple stack + /// ids in the context. With recursion it is particularly difficult to get the + /// edge updates correct as in the general case we have lost the original + /// stack id ordering for the context. Do more expensive fixup for the largest + /// contexts, controlled by MemProfTopNImportant and MemProfFixupImportant. + void fixupImportantContexts(); + /// Update graph to conservatively handle any callsite stack nodes that target /// multiple different callee target functions. void handleCallsitesWithMultipleTargets(); @@ -658,7 +680,8 @@ class CallsiteContextGraph { void assignStackNodesPostOrder( ContextNode *Node, DenseSet &Visited, DenseMap> &StackIdToMatchingCalls, - DenseMap &CallToMatchingCall); + DenseMap &CallToMatchingCall, + const DenseSet &ImportantContextIds); /// Duplicates the given set of context ids, updating the provided /// map from each original id with the newly generated context ids, @@ -859,6 +882,50 @@ class CallsiteContextGraph { /// nodes. DenseMap StackEntryIdToContextNodeMap; + /// Saves information for the contexts identified as important (the largest + /// cold contexts up to MemProfTopNImportant). + struct ImportantContextInfo { + // The original list of leaf first stack ids corresponding to this context. + std::vector StackIds; + // Max length of stack ids corresponding to a single stack ContextNode for + // this context (i.e. the max length of a key in StackIdsToNode below). + unsigned MaxLength = 0; + // Mapping of slices of the stack ids to the corresponding ContextNode + // (there can be multiple stack ids due to inlining). Populated when + // updating stack nodes while matching them to the IR or summary. + std::map, ContextNode *> StackIdsToNode; + }; + + // Map of important full context ids to information about each. + DenseMap ImportantContextIdInfo; + + // For each important context id found in Node (if any), records the list of + // stack ids that corresponded to the given callsite Node. There can be more + // than one in the case of inlining. + void recordStackNode(std::vector &StackIds, ContextNode *Node, + // We pass in the Node's context ids to avoid the + // overhead of computing them as the caller already has + // them in some cases. + const DenseSet &NodeContextIds, + const DenseSet &ImportantContextIds) { + if (!MemProfTopNImportant) { + assert(ImportantContextIds.empty()); + return; + } + DenseSet Ids = + set_intersection(NodeContextIds, ImportantContextIds); + if (Ids.empty()) + return; + auto Size = StackIds.size(); + for (auto Id : Ids) { + auto &Entry = ImportantContextIdInfo[Id]; + Entry.StackIdsToNode[StackIds] = Node; + // Keep track of the max to simplify later analysis. + if (Size > Entry.MaxLength) + Entry.MaxLength = Size; + } + } + /// Maps to track the calls to their corresponding nodes in the graph. MapVector AllocationCallToContextNodeMap; MapVector NonAllocationCallToContextNodeMap; @@ -1353,7 +1420,8 @@ template void CallsiteContextGraph::addStackNodesForMIB( ContextNode *AllocNode, CallStack &StackContext, CallStack &CallsiteContext, AllocationType AllocType, - ArrayRef ContextSizeInfo) { + ArrayRef ContextSizeInfo, + std::map &TotalSizeToContextIdTopNCold) { // Treating the hot alloc type as NotCold before the disambiguation for "hot" // is done. if (AllocType == AllocationType::Hot) @@ -1361,8 +1429,33 @@ void CallsiteContextGraph::addStackNodesForMIB( ContextIdToAllocationType[++LastContextId] = AllocType; + bool IsImportant = false; if (!ContextSizeInfo.empty()) { auto &Entry = ContextIdToContextSizeInfos[LastContextId]; + // If this is a cold allocation, and we are collecting non-zero largest + // contexts, see if this is a candidate. + if (AllocType == AllocationType::Cold && MemProfTopNImportant > 0) { + uint64_t TotalCold = 0; + for (auto &CSI : ContextSizeInfo) + TotalCold += CSI.TotalSize; + // Record this context if either we haven't found the first top-n largest + // yet, or if it is larger than the smallest already recorded. + if (TotalSizeToContextIdTopNCold.size() < MemProfTopNImportant || + // Since TotalSizeToContextIdTopNCold is a std::map, it is implicitly + // sorted in ascending size of its key which is the size. + TotalCold > TotalSizeToContextIdTopNCold.begin()->first) { + if (TotalSizeToContextIdTopNCold.size() == MemProfTopNImportant) { + // Remove old one and its associated entries. + auto IdToRemove = TotalSizeToContextIdTopNCold.begin()->second; + TotalSizeToContextIdTopNCold.erase( + TotalSizeToContextIdTopNCold.begin()); + assert(ImportantContextIdInfo.count(IdToRemove)); + ImportantContextIdInfo.erase(IdToRemove); + } + TotalSizeToContextIdTopNCold[TotalCold] = LastContextId; + IsImportant = true; + } + } Entry.insert(Entry.begin(), ContextSizeInfo.begin(), ContextSizeInfo.end()); } @@ -1381,6 +1474,8 @@ void CallsiteContextGraph::addStackNodesForMIB( for (auto ContextIter = StackContext.beginAfterSharedPrefix(CallsiteContext); ContextIter != StackContext.end(); ++ContextIter) { auto StackId = getStackId(*ContextIter); + if (IsImportant) + ImportantContextIdInfo[LastContextId].StackIds.push_back(StackId); ContextNode *StackNode = getNodeForStackId(StackId); if (!StackNode) { StackNode = createNewNode(/*IsAllocation=*/false); @@ -1600,11 +1695,12 @@ static void checkNode(const ContextNode *Node, template void CallsiteContextGraph:: - assignStackNodesPostOrder( - ContextNode *Node, DenseSet &Visited, - DenseMap> - &StackIdToMatchingCalls, - DenseMap &CallToMatchingCall) { + assignStackNodesPostOrder(ContextNode *Node, + DenseSet &Visited, + DenseMap> + &StackIdToMatchingCalls, + DenseMap &CallToMatchingCall, + const DenseSet &ImportantContextIds) { auto Inserted = Visited.insert(Node); if (!Inserted.second) return; @@ -1620,7 +1716,7 @@ void CallsiteContextGraph:: continue; } assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls, - CallToMatchingCall); + CallToMatchingCall, ImportantContextIds); } // If this node's stack id is in the map, update the graph to contain new @@ -1648,6 +1744,7 @@ void CallsiteContextGraph:: Node->setCall(Call); NonAllocationCallToContextNodeMap[Call] = Node; NodeToCallingFunc[Node] = Func; + recordStackNode(Ids, Node, Node->getContextIds(), ImportantContextIds); return; } } @@ -1786,6 +1883,9 @@ void CallsiteContextGraph:: : CurNode->computeAllocType(); PrevNode = CurNode; } + + recordStackNode(Ids, NewNode, SavedContextIds, ImportantContextIds); + if (VerifyNodes) { checkNode(NewNode, /*CheckEdges=*/true); for (auto Id : Ids) { @@ -1798,6 +1898,122 @@ void CallsiteContextGraph:: } } +template +void CallsiteContextGraph::fixupImportantContexts() { + if (ImportantContextIdInfo.empty()) + return; + + // Update statistics as we are done building this map at this point. + NumImportantContextIds = ImportantContextIdInfo.size(); + + if (!MemProfFixupImportant) + return; + + if (ExportToDot) + exportToDot("beforestackfixup"); + + // For each context we identified as important, walk through the saved context + // stack ids in order from leaf upwards, and make sure all edges are correct. + // These can be difficult to get right when updating the graph while mapping + // nodes onto summary or IR, especially when there is recursion. In + // particular, when we have created new nodes to reflect inlining, it is + // sometimes impossible to know exactly how to update the edges in the face of + // recursion, as we have lost the original ordering of the stack ids in the + // contexts. + // TODO: Consider only doing this if we detect the context has recursive + // cycles. + // + // I.e. assume we have a context with stack ids like: {A B A C A D E} + // and let's say A was inlined into B, C, and D. The original graph will have + // multiple recursive cycles through A. When we match the original context + // nodes onto the IR or summary, we will merge {A B} into one context node, + // {A C} onto another, and {A D} onto another. Looking at the stack sequence + // above, we should end up with a non-cyclic set of edges like: + // {AB} <- {AC} <- {AD} <- E. However, because we normally have lost the + // original ordering, we won't get the edges correct initially (it's + // impossible without the original ordering). Here we do the fixup (add and + // removing edges where necessary) for this context. In the + // ImportantContextInfo struct in this case we should have a MaxLength = 2, + // and map entries for {A B}, {A C}, {A D}, and {E}. + for (auto &[CurContextId, Info] : ImportantContextIdInfo) { + if (Info.StackIdsToNode.empty()) + continue; + bool Changed = false; + ContextNode *PrevNode = nullptr; + ContextNode *CurNode = nullptr; + DenseSet VisitedEdges; + ArrayRef AllStackIds(Info.StackIds); + // Try to identify what callsite ContextNode maps to which slice of the + // context's ordered stack ids. + for (unsigned I = 0; I < AllStackIds.size(); I++, PrevNode = CurNode) { + // We will do this greedily, trying up to MaxLength stack ids in a row, to + // see if we recorded a context node for that sequence. + auto Len = Info.MaxLength; + auto LenToEnd = AllStackIds.size() - I; + if (Len > LenToEnd) + Len = LenToEnd; + CurNode = nullptr; + // Try to find a recorded context node starting with the longest length + // recorded, and on down until we check for just a single stack node. + for (; Len > 0; Len--) { + // Get the slice of the original stack id sequence to check. + auto CheckStackIds = AllStackIds.slice(I, Len); + auto EntryIt = Info.StackIdsToNode.find(CheckStackIds); + if (EntryIt == Info.StackIdsToNode.end()) + continue; + CurNode = EntryIt->second; + // Skip forward so we don't try to look for the ones we just matched. + // We increment by Len - 1, because the outer for loop will increment I. + I += Len - 1; + break; + } + // Give up if we couldn't find a node. Since we need to clone from the + // leaf allocation upwards, no sense in doing anymore fixup further up + // the context if we couldn't match part of the original stack context + // onto a callsite node. + if (!CurNode) + break; + // No edges to fix up until we have a pair of nodes that should be + // adjacent in the graph. + if (!PrevNode) + continue; + // See if we already have a call edge from CurNode to PrevNode. + auto *CurEdge = PrevNode->findEdgeFromCaller(CurNode); + if (CurEdge) { + // We already have an edge. Make sure it contains this context id. + if (CurEdge->getContextIds().insert(CurContextId).second) { + NumFixupEdgeIdsInserted++; + Changed = true; + } + } else { + // No edge exists - add one. + NumFixupEdgesAdded++; + DenseSet ContextIds({CurContextId}); + auto AllocType = computeAllocType(ContextIds); + auto NewEdge = std::make_shared( + PrevNode, CurNode, AllocType, std::move(ContextIds)); + PrevNode->CallerEdges.push_back(NewEdge); + CurNode->CalleeEdges.push_back(NewEdge); + // Save the new edge for the below handling. + CurEdge = NewEdge.get(); + Changed = true; + } + VisitedEdges.insert(CurEdge); + // Now remove this context id from any other caller edges calling + // PrevNode. + for (auto &Edge : PrevNode->CallerEdges) { + // Skip the edge updating/created above and edges we have already + // visited (due to recursion). + if (Edge.get() != CurEdge && !VisitedEdges.contains(Edge.get())) + Edge->getContextIds().erase(CurContextId); + } + } + if (Changed) + NumFixedContexts++; + } +} + template void CallsiteContextGraph::updateStackNodes() { // Map of stack id to all calls with that as the last (outermost caller) @@ -2043,9 +2259,14 @@ void CallsiteContextGraph::updateStackNodes() { // nodes representing any inlining at interior callsites. Note we move the // associated context ids over to the new nodes. DenseSet Visited; + DenseSet ImportantContextIds(llvm::from_range, + ImportantContextIdInfo.keys()); for (auto &Entry : AllocationCallToContextNodeMap) assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls, - CallToMatchingCall); + CallToMatchingCall, ImportantContextIds); + + fixupImportantContexts(); + if (VerifyCCG) check(); } @@ -2155,6 +2376,10 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph( Module &M, llvm::function_ref OREGetter) : Mod(M), OREGetter(OREGetter) { + // Map for keeping track of the largest cold contexts up to the number given + // by MemProfTopNImportant. Must be a std::map (not DenseMap) because keys + // must be sorted. + std::map TotalSizeToContextIdTopNCold; for (auto &F : M) { std::vector CallsWithMetadata; for (auto &BB : F) { @@ -2191,7 +2416,8 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph( CallStack StackContext(StackNode); addStackNodesForMIB( AllocNode, StackContext, CallsiteContext, - getMIBAllocType(MIBMD), ContextSizeInfo); + getMIBAllocType(MIBMD), ContextSizeInfo, + TotalSizeToContextIdTopNCold); } // If exporting the graph to dot and an allocation id of interest was // specified, record all the context ids for this allocation node. @@ -2241,6 +2467,10 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph( llvm::function_ref isPrevailing) : Index(Index), isPrevailing(isPrevailing) { + // Map for keeping track of the largest cold contexts up to the number given + // by MemProfTopNImportant. Must be a std::map (not DenseMap) because keys + // must be sorted. + std::map TotalSizeToContextIdTopNCold; for (auto &I : Index) { auto VI = Index.getValueInfo(I); for (auto &S : VI.getSummaryList()) { @@ -2288,7 +2518,7 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph( } addStackNodesForMIB::const_iterator>( AllocNode, StackContext, EmptyContext, MIB.AllocType, - ContextSizeInfo); + ContextSizeInfo, TotalSizeToContextIdTopNCold); I++; } // If exporting the graph to dot and an allocation id of interest was diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 5dc3175382254..f533a47150a7b 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -63,9 +63,11 @@ class VPBuilder { } VPInstruction *createInstruction(unsigned Opcode, - ArrayRef Operands, DebugLoc DL, + ArrayRef Operands, + const VPIRMetadata &MD, DebugLoc DL, const Twine &Name = "") { - return tryInsertInstruction(new VPInstruction(Opcode, Operands, DL, Name)); + return tryInsertInstruction( + new VPInstruction(Opcode, Operands, {}, MD, DL, Name)); } public: @@ -150,17 +152,17 @@ class VPBuilder { /// its underlying Instruction. VPInstruction *createNaryOp(unsigned Opcode, ArrayRef Operands, Instruction *Inst = nullptr, + const VPIRMetadata &MD = {}, + DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") { - DebugLoc DL = DebugLoc::getUnknown(); - if (Inst) - DL = Inst->getDebugLoc(); - VPInstruction *NewVPInst = createInstruction(Opcode, Operands, DL, Name); + VPInstruction *NewVPInst = tryInsertInstruction( + new VPInstruction(Opcode, Operands, {}, MD, DL, Name)); NewVPInst->setUnderlyingValue(Inst); return NewVPInst; } VPInstruction *createNaryOp(unsigned Opcode, ArrayRef Operands, DebugLoc DL, const Twine &Name = "") { - return createInstruction(Opcode, Operands, DL, Name); + return createInstruction(Opcode, Operands, {}, DL, Name); } VPInstruction *createNaryOp(unsigned Opcode, ArrayRef Operands, const VPIRFlags &Flags, @@ -174,8 +176,8 @@ class VPBuilder { Type *ResultTy, const VPIRFlags &Flags = {}, DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") { - return tryInsertInstruction( - new VPInstructionWithType(Opcode, Operands, ResultTy, Flags, DL, Name)); + return tryInsertInstruction(new VPInstructionWithType( + Opcode, Operands, ResultTy, Flags, {}, DL, Name)); } VPInstruction *createOverflowingOp( @@ -189,13 +191,14 @@ class VPBuilder { VPInstruction *createNot(VPValue *Operand, DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") { - return createInstruction(VPInstruction::Not, {Operand}, DL, Name); + return createInstruction(VPInstruction::Not, {Operand}, {}, DL, Name); } VPInstruction *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") { - return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL, Name); + return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, {}, DL, + Name); } VPInstruction *createOr(VPValue *LHS, VPValue *RHS, @@ -210,20 +213,18 @@ class VPBuilder { VPInstruction *createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") { - return tryInsertInstruction( - new VPInstruction(VPInstruction::LogicalAnd, {LHS, RHS}, DL, Name)); + return createNaryOp(VPInstruction::LogicalAnd, {LHS, RHS}, DL, Name); } VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "", std::optional FMFs = std::nullopt) { - auto *Select = - FMFs ? new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal}, - *FMFs, {}, DL, Name) - : new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal}, - DL, Name); - return tryInsertInstruction(Select); + if (!FMFs) + return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal}, DL, + Name); + return tryInsertInstruction(new VPInstruction( + Instruction::Select, {Cond, TrueVal, FalseVal}, *FMFs, {}, DL, Name)); } /// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A @@ -306,7 +307,7 @@ class VPBuilder { const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {}) { return tryInsertInstruction( - new VPInstructionWithType(Opcode, Op, ResultTy, DL, Flags, Metadata)); + new VPInstructionWithType(Opcode, Op, ResultTy, Flags, Metadata, DL)); } VPValue *createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 10bd6cd471152..356d759b94799 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7616,14 +7616,13 @@ VPWidenMemoryRecipe *VPRecipeBuilder::tryToWidenMemory(VPInstruction *VPI, } if (VPI->getOpcode() == Instruction::Load) { auto *Load = cast(I); - return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse, - VPIRMetadata(*Load, LVer), I->getDebugLoc()); + return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse, *VPI, + VPI->getDebugLoc()); } StoreInst *Store = cast(I); return new VPWidenStoreRecipe(*Store, Ptr, VPI->getOperand(0), Mask, - Consecutive, Reverse, - VPIRMetadata(*Store, LVer), VPI->getDebugLoc()); + Consecutive, Reverse, *VPI, VPI->getDebugLoc()); } /// Creates a VPWidenIntOrFpInductionRecipe for \p PhiR. If needed, it will @@ -7751,7 +7750,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(VPInstruction *VPI, }, Range); if (ShouldUseVectorIntrinsic) - return new VPWidenIntrinsicRecipe(*CI, ID, Ops, CI->getType(), + return new VPWidenIntrinsicRecipe(*CI, ID, Ops, CI->getType(), *VPI, VPI->getDebugLoc()); Function *Variant = nullptr; @@ -7843,7 +7842,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(VPInstruction *VPI) { auto *SafeRHS = Builder.createSelect(Mask, Ops[1], One, VPI->getDebugLoc()); Ops[1] = SafeRHS; - return new VPWidenRecipe(*I, Ops); + return new VPWidenRecipe(*I, Ops, *VPI, VPI->getDebugLoc()); } [[fallthrough]]; } @@ -7889,7 +7888,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(VPInstruction *VPI) { // For other binops, the legacy cost model only checks the second operand. NewOps[1] = GetConstantViaSCEV(NewOps[1]); } - return new VPWidenRecipe(*I, NewOps); + return new VPWidenRecipe(*I, NewOps, *VPI, VPI->getDebugLoc()); } case Instruction::ExtractValue: { SmallVector NewOps(VPI->operands()); @@ -7897,7 +7896,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(VPInstruction *VPI) { assert(EVI->getNumIndices() == 1 && "Expected one extractvalue index"); unsigned Idx = EVI->getIndices()[0]; NewOps.push_back(Plan.getConstantInt(32, Idx)); - return new VPWidenRecipe(*I, NewOps); + return new VPWidenRecipe(*I, NewOps, *VPI, VPI->getDebugLoc()); } }; } @@ -7981,8 +7980,8 @@ VPReplicateRecipe *VPRecipeBuilder::handleReplication(VPInstruction *VPI, assert((Range.Start.isScalar() || !IsUniform || !IsPredicated || (Range.Start.isScalable() && isa(I))) && "Should not predicate a uniform recipe"); - auto *Recipe = new VPReplicateRecipe(I, VPI->operands(), IsUniform, - BlockInMask, VPIRMetadata(*I, LVer)); + auto *Recipe = + new VPReplicateRecipe(I, VPI->operands(), IsUniform, BlockInMask, *VPI); return Recipe; } @@ -8235,13 +8234,14 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R, return new VPWidenGEPRecipe(cast(Instr), R->operands()); if (VPI->getOpcode() == Instruction::Select) - return new VPWidenSelectRecipe(*cast(Instr), R->operands()); + return new VPWidenSelectRecipe(*cast(Instr), R->operands(), + *VPI); if (Instruction::isCast(VPI->getOpcode())) { auto *CastR = cast(R); auto *CI = cast(Instr); return new VPWidenCastRecipe(CI->getOpcode(), VPI->getOperand(0), - CastR->getResultType(), *CI); + CastR->getResultType(), *CI, *VPI); } return tryToWiden(VPI); @@ -8269,7 +8269,8 @@ VPRecipeBuilder::tryToCreatePartialReduction(VPInstruction *Reduction, SmallVector Ops; Ops.push_back(Plan.getOrAddLiveIn(Zero)); Ops.push_back(BinOp); - BinOp = new VPWidenRecipe(*ReductionI, Ops); + BinOp = new VPWidenRecipe(*ReductionI, Ops, VPIRMetadata(), + ReductionI->getDebugLoc()); Builder.insert(BinOp->getDefiningRecipe()); ReductionOpcode = Instruction::Add; } @@ -8302,7 +8303,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, // candidates built later for specific VF ranges. auto VPlan0 = VPlanTransforms::buildVPlan0( OrigLoop, *LI, Legal->getWidestInductionType(), - getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), PSE); + getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), PSE, &LVer); auto MaxVFTimes2 = MaxVF * 2; for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) { @@ -8408,7 +8409,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( // VPInstructions in the loop. // --------------------------------------------------------------------------- VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE, - Builder, BlockMaskCache, LVer); + Builder, BlockMaskCache); // TODO: Handle partial reductions with EVL tail folding. if (!CM.foldTailWithEVL()) RecipeBuilder.collectScaledReductions(Range); @@ -8453,9 +8454,9 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) { // Only create recipe for the final invariant store of the reduction. if (Legal->isInvariantStoreOfReduction(SI)) { - auto *Recipe = - new VPReplicateRecipe(SI, R.operands(), true /* IsUniform */, - nullptr /*Mask*/, VPIRMetadata(*SI, LVer)); + auto *Recipe = new VPReplicateRecipe( + SI, R.operands(), true /* IsUniform */, nullptr /*Mask*/, + *cast(SingleDef)); Recipe->insertBefore(*MiddleVPBB, MBIP); } R.eraseFromParent(); @@ -8606,7 +8607,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) { // addScalarResumePhis. DenseMap BlockMaskCache; VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE, - Builder, BlockMaskCache, nullptr /*LVer*/); + Builder, BlockMaskCache); for (auto &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) { if (isa(&R)) continue; diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index a7000aff06379..87280b83fc0e5 100644 --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -84,10 +84,6 @@ class VPRecipeBuilder { /// A mapping of partial reduction exit instructions to their scaling factor. DenseMap ScaledReductionMap; - /// Loop versioning instance for getting noalias metadata guaranteed by - /// runtime checks. - LoopVersioning *LVer; - /// Check if \p I can be widened at the start of \p Range and possibly /// decrease the range such that the returned value holds for the entire \p /// Range. The function should not be called for memory instructions or calls. @@ -144,11 +140,9 @@ class VPRecipeBuilder { LoopVectorizationLegality *Legal, LoopVectorizationCostModel &CM, PredicatedScalarEvolution &PSE, VPBuilder &Builder, - DenseMap &BlockMaskCache, - LoopVersioning *LVer) + DenseMap &BlockMaskCache) : Plan(Plan), OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal), - CM(CM), PSE(PSE), Builder(Builder), BlockMaskCache(BlockMaskCache), - LVer(LVer) {} + CM(CM), PSE(PSE), Builder(Builder), BlockMaskCache(BlockMaskCache) {} std::optional getScalingForReduction(const Instruction *ExitInst) { auto It = ScaledReductionMap.find(ExitInst); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 0932922c07126..c81834e401726 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -65,7 +65,6 @@ class VPReplicateRecipe; class VPlanSlp; class Value; class LoopVectorizationCostModel; -class LoopVersioning; struct VPCostContext; @@ -958,10 +957,6 @@ class VPIRMetadata { /// \p I. VPIRMetadata(Instruction &I) { getMetadataToPropagate(&I, Metadata); } - /// Adds metatadata that can be preserved from the original instruction - /// \p I and noalias metadata guaranteed by runtime checks using \p LVer. - VPIRMetadata(Instruction &I, LoopVersioning *LVer); - /// Copy constructor for cloning. VPIRMetadata(const VPIRMetadata &Other) = default; @@ -970,14 +965,17 @@ class VPIRMetadata { /// Add all metadata to \p I. void applyMetadata(Instruction &I) const; - /// Add metadata with kind \p Kind and \p Node. - void addMetadata(unsigned Kind, MDNode *Node) { - assert(none_of(Metadata, - [Kind](const std::pair &P) { - return P.first == Kind; - }) && - "Kind must appear at most once in Metadata"); - Metadata.emplace_back(Kind, Node); + /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind + /// already exists, it will be replaced. Otherwise, it will be added. + void setMetadata(unsigned Kind, MDNode *Node) { + auto It = + llvm::find_if(Metadata, [Kind](const std::pair &P) { + return P.first == Kind; + }); + if (It != Metadata.end()) + It->second = Node; + else + Metadata.emplace_back(Kind, Node); } /// Intersect this VPIRMetada object with \p MD, keeping only metadata @@ -1117,11 +1115,7 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, public: VPInstruction(unsigned Opcode, ArrayRef Operands, - DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") - : VPInstruction(Opcode, Operands, {}, {}, DL, Name) {} - - VPInstruction(unsigned Opcode, ArrayRef Operands, - const VPIRFlags &Flags, const VPIRMetadata &MD = {}, + const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {}, DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = ""); VP_CLASSOF_IMPL(VPDef::VPInstructionSC) @@ -1211,14 +1205,10 @@ class VPInstructionWithType : public VPInstruction { public: VPInstructionWithType(unsigned Opcode, ArrayRef Operands, - Type *ResultTy, const VPIRFlags &Flags, DebugLoc DL, + Type *ResultTy, const VPIRFlags &Flags = {}, + const VPIRMetadata &Metadata = {}, + DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") - : VPInstruction(Opcode, Operands, Flags, {}, DL, Name), - ResultTy(ResultTy) {} - - VPInstructionWithType(unsigned Opcode, ArrayRef Operands, - Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags, - const VPIRMetadata &Metadata, const Twine &Name = "") : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name), ResultTy(ResultTy) {} @@ -1247,7 +1237,7 @@ class VPInstructionWithType : public VPInstruction { VPInstruction *clone() override { auto *New = new VPInstructionWithType(getOpcode(), operands(), getResultType(), - *this, getDebugLoc(), getName()); + *this, *this, getDebugLoc(), getName()); New->setUnderlyingValue(getUnderlyingValue()); return New; } @@ -1331,7 +1321,7 @@ class VPPhiAccessors { struct LLVM_ABI_FOR_TEST VPPhi : public VPInstruction, public VPPhiAccessors { VPPhi(ArrayRef Operands, DebugLoc DL, const Twine &Name = "") - : VPInstruction(Instruction::PHI, Operands, DL, Name) {} + : VPInstruction(Instruction::PHI, Operands, {}, {}, DL, Name) {} static inline bool classof(const VPUser *U) { auto *VPI = dyn_cast(U); @@ -1475,9 +1465,10 @@ class LLVM_ABI_FOR_TEST VPWidenRecipe : public VPRecipeWithIRFlags, : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL), VPIRMetadata(Metadata), Opcode(Opcode) {} - VPWidenRecipe(Instruction &I, ArrayRef Operands) - : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I), VPIRMetadata(I), - Opcode(I.getOpcode()) {} + VPWidenRecipe(Instruction &I, ArrayRef Operands, + const VPIRMetadata &Metadata, DebugLoc DL) + : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I), + VPIRMetadata(Metadata), Opcode(I.getOpcode()) {} ~VPWidenRecipe() override = default; @@ -1518,13 +1509,12 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { public: VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, - CastInst &UI) - : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), VPIRMetadata(UI), - Opcode(Opcode), ResultTy(ResultTy) { + CastInst &UI, const VPIRMetadata &Metadata) + : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), + VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) { assert(UI.getOpcode() == Opcode && "opcode of underlying cast doesn't match"); } - VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {}, @@ -1587,18 +1577,23 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { public: VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef CallArguments, Type *Ty, + const VPIRMetadata &MD = {}, DebugLoc DL = DebugLoc::getUnknown()) : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI), - VPIRMetadata(CI), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty), + VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty), MayReadFromMemory(CI.mayReadFromMemory()), MayWriteToMemory(CI.mayWriteToMemory()), MayHaveSideEffects(CI.mayHaveSideEffects()) {} VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef CallArguments, Type *Ty, + const VPIRFlags &Flags = {}, + const VPIRMetadata &Metadata = {}, DebugLoc DL = DebugLoc::getUnknown()) - : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, DL), - VPIRMetadata(), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) { + : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, Flags, + DL), + VPIRMetadata(Metadata), VectorIntrinsicID(VectorIntrinsicID), + ResultTy(Ty) { LLVMContext &Ctx = Ty->getContext(); AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID); MemoryEffects ME = Attrs.getMemoryEffects(); @@ -1614,9 +1609,10 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { VPWidenIntrinsicRecipe *clone() override { if (Value *CI = getUnderlyingValue()) return new VPWidenIntrinsicRecipe(*cast(CI), VectorIntrinsicID, - operands(), ResultTy, getDebugLoc()); + operands(), ResultTy, *this, + getDebugLoc()); return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy, - getDebugLoc()); + *this, *this, getDebugLoc()); } VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC) @@ -1757,15 +1753,16 @@ class VPHistogramRecipe : public VPRecipeBase { /// instruction. struct LLVM_ABI_FOR_TEST VPWidenSelectRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { - VPWidenSelectRecipe(SelectInst &I, ArrayRef Operands) + VPWidenSelectRecipe(SelectInst &I, ArrayRef Operands, + const VPIRMetadata &MD = {}) : VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, I), - VPIRMetadata(I) {} + VPIRMetadata(MD) {} ~VPWidenSelectRecipe() override = default; VPWidenSelectRecipe *clone() override { return new VPWidenSelectRecipe(*cast(getUnderlyingInstr()), - operands()); + operands(), *this); } VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC) diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 4ffd5577d31a4..612202d049774 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/Transforms/Utils/LoopVersioning.h" #define DEBUG_TYPE "vplan" @@ -37,6 +38,9 @@ class PlainCFGBuilder { // Loop Info analysis. LoopInfo *LI; + // Loop versioning for alias metadata. + LoopVersioning *LVer; + // Vectorization plan that we are working on. std::unique_ptr Plan; @@ -65,8 +69,8 @@ class PlainCFGBuilder { void createVPInstructionsForVPBB(VPBasicBlock *VPBB, BasicBlock *BB); public: - PlainCFGBuilder(Loop *Lp, LoopInfo *LI) - : TheLoop(Lp), LI(LI), Plan(std::make_unique(Lp)) {} + PlainCFGBuilder(Loop *Lp, LoopInfo *LI, LoopVersioning *LVer) + : TheLoop(Lp), LI(LI), LVer(LVer), Plan(std::make_unique(Lp)) {} /// Build plain CFG for TheLoop and connect it to Plan's entry. std::unique_ptr buildPlainCFG(); @@ -186,7 +190,8 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, // recipes. if (Br->isConditional()) { VPValue *Cond = getOrCreateVPOperand(Br->getCondition()); - VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst); + VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst, + VPIRMetadata(*Inst), Inst->getDebugLoc()); } // Skip the rest of the Instruction processing for Branch instructions. @@ -200,7 +205,8 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, SmallVector Ops = {getOrCreateVPOperand(SI->getCondition())}; for (auto Case : SI->cases()) Ops.push_back(getOrCreateVPOperand(Case.getCaseValue())); - VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst); + VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst, + VPIRMetadata(*Inst), Inst->getDebugLoc()); continue; } @@ -228,6 +234,18 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, VPPredToIncomingValue.lookup(Pred->getExitingBasicBlock())); } } else { + // Build VPIRMetadata from the instruction and add loop versioning + // metadata for loads and stores. + VPIRMetadata MD(*Inst); + if (isa(Inst) && LVer) { + const auto &[AliasScopeMD, NoAliasMD] = + LVer->getNoAliasMetadataFor(Inst); + if (AliasScopeMD) + MD.setMetadata(LLVMContext::MD_alias_scope, AliasScopeMD); + if (NoAliasMD) + MD.setMetadata(LLVMContext::MD_noalias, NoAliasMD); + } + // Translate LLVM-IR operands into VPValue operands and set them in the // new VPInstruction. SmallVector VPOperands; @@ -236,12 +254,14 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, if (auto *CI = dyn_cast(Inst)) { NewR = VPIRBuilder.createScalarCast(CI->getOpcode(), VPOperands[0], - CI->getType(), CI->getDebugLoc()); + CI->getType(), CI->getDebugLoc(), + {}, MD); NewR->setUnderlyingValue(CI); } else { // Build VPInstruction for any arbitrary Instruction without specific // representation in VPlan. - NewR = VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst); + NewR = VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst, MD, + Inst->getDebugLoc()); } } @@ -537,8 +557,9 @@ static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL, std::unique_ptr VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, - DebugLoc IVDL, PredicatedScalarEvolution &PSE) { - PlainCFGBuilder Builder(TheLoop, &LI); + DebugLoc IVDL, PredicatedScalarEvolution &PSE, + LoopVersioning *LVer) { + PlainCFGBuilder Builder(TheLoop, &LI, LVer); std::unique_ptr VPlan0 = Builder.buildPlainCFG(); addInitialSkeleton(*VPlan0, InductionTy, IVDL, PSE, TheLoop); return VPlan0; @@ -672,7 +693,7 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond, MDBuilder MDB(Plan.getContext()); MDNode *BranchWeights = MDB.createBranchWeights(CheckBypassWeights, /*IsExpected=*/false); - Term->addMetadata(LLVMContext::MD_prof, BranchWeights); + Term->setMetadata(LLVMContext::MD_prof, BranchWeights); } } @@ -756,7 +777,7 @@ void VPlanTransforms::addMinimumIterationCheck( MDBuilder MDB(Plan.getContext()); MDNode *BranchWeights = MDB.createBranchWeights( ArrayRef(MinItersBypassWeights, 2), /*IsExpected=*/false); - Term->addMetadata(LLVMContext::MD_prof, BranchWeights); + Term->setMetadata(LLVMContext::MD_prof, BranchWeights); } } @@ -793,7 +814,7 @@ void VPlanTransforms::addMinimumVectorEpilogueIterationCheck( MDBuilder MDB(Plan.getContext()); MDNode *BranchWeights = MDB.createBranchWeights(Weights, /*IsExpected=*/false); - Branch->addMetadata(LLVMContext::MD_prof, BranchWeights); + Branch->setMetadata(LLVMContext::MD_prof, BranchWeights); } /// If \p RedPhiR is used by a ComputeReductionResult recipe, return it. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index e2a8e495d5ed5..fca6554ad77c6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -36,7 +36,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/LoopUtils.h" -#include "llvm/Transforms/Utils/LoopVersioning.h" #include using namespace llvm; @@ -1674,17 +1673,6 @@ void VPIRPhi::printRecipe(raw_ostream &O, const Twine &Indent, } #endif -VPIRMetadata::VPIRMetadata(Instruction &I, LoopVersioning *LVer) - : VPIRMetadata(I) { - if (!LVer || !isa(&I)) - return; - const auto &[AliasScopeMD, NoAliasMD] = LVer->getNoAliasMetadataFor(&I); - if (AliasScopeMD) - Metadata.emplace_back(LLVMContext::MD_alias_scope, AliasScopeMD); - if (NoAliasMD) - Metadata.emplace_back(LLVMContext::MD_noalias, NoAliasMD); -} - void VPIRMetadata::applyMetadata(Instruction &I) const { for (const auto &[Kind, Node] : Metadata) I.setMetadata(Kind, Node); diff --git a/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp b/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp index 1453c6623625b..3b5cc9fcb9820 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp @@ -517,7 +517,8 @@ VPInstruction *VPlanSlp::buildGraph(ArrayRef Values) { assert(CombinedOperands.size() > 0 && "Need more some operands"); auto *Inst = cast(Values[0])->getUnderlyingInstr(); - auto *VPI = new VPInstruction(Opcode, CombinedOperands, Inst->getDebugLoc()); + auto *VPI = + new VPInstruction(Opcode, CombinedOperands, {}, {}, Inst->getDebugLoc()); LLVM_DEBUG(dbgs() << "Create VPInstruction " << *VPI << " " << Values[0] << "\n"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index bbeb447de45cb..89118b49bed44 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -85,20 +85,19 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( Ingredient.getDebugLoc()); } } else { - assert(isa(&Ingredient) && - "only VPInstructions expected here"); + auto *VPI = cast(&Ingredient); assert(!isa(Inst) && "phis should be handled above"); // Create VPWidenMemoryRecipe for loads and stores. if (LoadInst *Load = dyn_cast(Inst)) { NewRecipe = new VPWidenLoadRecipe( *Load, Ingredient.getOperand(0), nullptr /*Mask*/, - false /*Consecutive*/, false /*Reverse*/, VPIRMetadata(*Load), + false /*Consecutive*/, false /*Reverse*/, *VPI, Ingredient.getDebugLoc()); } else if (StoreInst *Store = dyn_cast(Inst)) { NewRecipe = new VPWidenStoreRecipe( *Store, Ingredient.getOperand(1), Ingredient.getOperand(0), - nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/, - VPIRMetadata(*Store), Ingredient.getDebugLoc()); + nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/, *VPI, + Ingredient.getDebugLoc()); } else if (GetElementPtrInst *GEP = dyn_cast(Inst)) { NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands()); } else if (CallInst *CI = dyn_cast(Inst)) { @@ -107,15 +106,17 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( return false; NewRecipe = new VPWidenIntrinsicRecipe( *CI, getVectorIntrinsicIDForCall(CI, &TLI), - drop_end(Ingredient.operands()), CI->getType(), + drop_end(Ingredient.operands()), CI->getType(), *VPI, CI->getDebugLoc()); } else if (SelectInst *SI = dyn_cast(Inst)) { - NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands()); + NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands(), *VPI); } else if (auto *CI = dyn_cast(Inst)) { - NewRecipe = new VPWidenCastRecipe( - CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), *CI); + NewRecipe = + new VPWidenCastRecipe(CI->getOpcode(), Ingredient.getOperand(0), + CI->getType(), *CI, *VPI); } else { - NewRecipe = new VPWidenRecipe(*Inst, Ingredient.operands()); + NewRecipe = new VPWidenRecipe(*Inst, Ingredient.operands(), *VPI, + Ingredient.getDebugLoc()); } } @@ -1705,8 +1706,9 @@ static bool tryToReplaceALMWithWideALM(VPlan &Plan, ElementCount VF, Ops.append({ALM, Plan.getOrAddLiveIn( ConstantInt::get(IntegerType::getInt64Ty(Ctx), VF.getKnownMinValue() * Part))}); - auto *Ext = new VPWidenIntrinsicRecipe(Intrinsic::vector_extract, Ops, - IntegerType::getInt1Ty(Ctx), DL); + auto *Ext = + new VPWidenIntrinsicRecipe(Intrinsic::vector_extract, Ops, + IntegerType::getInt1Ty(Ctx), {}, {}, DL); Extracts[Part] = Ext; Ext->insertAfter(ALM); } @@ -1845,7 +1847,7 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF, // The vector region contains header phis for which we cannot remove the // loop region yet. auto *BOC = new VPInstruction(VPInstruction::BranchOnCond, {Plan.getTrue()}, - Term->getDebugLoc()); + {}, {}, Term->getDebugLoc()); ExitingVPBB->appendRecipe(BOC); } @@ -2679,13 +2681,13 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask, m_Select(m_Specific(HeaderMask), m_VPValue(LHS), m_VPValue(RHS)))) return new VPWidenIntrinsicRecipe( Intrinsic::vp_merge, {Plan->getTrue(), LHS, RHS, &EVL}, - TypeInfo.inferScalarType(LHS), CurRecipe.getDebugLoc()); + TypeInfo.inferScalarType(LHS), {}, {}, CurRecipe.getDebugLoc()); if (match(&CurRecipe, m_Select(m_RemoveMask(HeaderMask, Mask), m_VPValue(LHS), m_VPValue(RHS)))) return new VPWidenIntrinsicRecipe( Intrinsic::vp_merge, {Mask, LHS, RHS, &EVL}, - TypeInfo.inferScalarType(LHS), CurRecipe.getDebugLoc()); + TypeInfo.inferScalarType(LHS), {}, {}, CurRecipe.getDebugLoc()); return nullptr; } @@ -2753,7 +2755,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { VPWidenIntrinsicRecipe *VPSplice = new VPWidenIntrinsicRecipe( Intrinsic::experimental_vp_splice, {V1, V2, Imm, Plan.getTrue(), PrevEVL, &EVL}, - TypeInfo.inferScalarType(R.getVPSingleValue()), R.getDebugLoc()); + TypeInfo.inferScalarType(R.getVPSingleValue()), {}, {}, + R.getDebugLoc()); VPSplice->insertBefore(&R); R.getVPSingleValue()->replaceAllUsesWith(VPSplice); ToErase.push_back(&R); @@ -4458,7 +4461,7 @@ void VPlanTransforms::addBranchWeightToMiddleTerminator( MDBuilder MDB(Plan.getContext()); MDNode *BranchWeights = MDB.createBranchWeights({1, VectorStep - 1}, /*IsExpected=*/false); - MiddleTerm->addMetadata(LLVMContext::MD_prof, BranchWeights); + MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights); } /// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index e3bde8a47dcbc..a44a4f69c917b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -23,6 +23,7 @@ namespace llvm { class InductionDescriptor; class Instruction; +class LoopVersioning; class PHINode; class ScalarEvolution; class PredicatedScalarEvolution; @@ -99,7 +100,7 @@ struct VPlanTransforms { /// >[ ] <-- original loop exit block(s), wrapped in VPIRBasicBlocks. LLVM_ABI_FOR_TEST static std::unique_ptr buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL, - PredicatedScalarEvolution &PSE); + PredicatedScalarEvolution &PSE, LoopVersioning *LVer = nullptr); /// Update \p Plan to account for all early exits. LLVM_ABI_FOR_TEST static void handleEarlyExits(VPlan &Plan, diff --git a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll index 317feb5ad9ad0..0ef2b31d00daa 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll @@ -22,17 +22,16 @@ define <2 x i16> @test0(ptr %i16_ptr, i64 %inc) { define <2 x i16> @test1(ptr %v2i16_ptr) { ; CHECK-LE-LABEL: test1: ; CHECK-LE: // %bb.0: -; CHECK-LE-NEXT: ld1 { v0.h }[0], [x0] -; CHECK-LE-NEXT: add x8, x0, #2 -; CHECK-LE-NEXT: ld1 { v0.h }[2], [x8] +; CHECK-LE-NEXT: ldr s0, [x0] +; CHECK-LE-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-LE-NEXT: ret ; ; CHECK-BE-LABEL: test1: ; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: ld1 { v0.h }[0], [x0] -; CHECK-BE-NEXT: add x8, x0, #2 -; CHECK-BE-NEXT: ld1 { v0.h }[2], [x8] +; CHECK-BE-NEXT: ldr s0, [x0] +; CHECK-BE-NEXT: rev32 v0.4h, v0.4h +; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-BE-NEXT: rev64 v0.2s, v0.2s ; CHECK-BE-NEXT: ret %v2i16 = load <2 x i16>, ptr %v2i16_ptr @@ -66,17 +65,18 @@ define <2 x i16> @test2(ptr %i16_ptr, i64 %inc) { define <2 x i8> @test3(ptr %v2i8_ptr) { ; CHECK-LE-LABEL: test3: ; CHECK-LE: // %bb.0: -; CHECK-LE-NEXT: ld1 { v0.b }[0], [x0] -; CHECK-LE-NEXT: add x8, x0, #1 -; CHECK-LE-NEXT: ld1 { v0.b }[4], [x8] +; CHECK-LE-NEXT: ldr h0, [x0] +; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-LE-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-LE-NEXT: ret ; ; CHECK-BE-LABEL: test3: ; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: ld1 { v0.b }[0], [x0] -; CHECK-BE-NEXT: add x8, x0, #1 -; CHECK-BE-NEXT: ld1 { v0.b }[4], [x8] +; CHECK-BE-NEXT: ldr h0, [x0] +; CHECK-BE-NEXT: rev16 v0.8b, v0.8b +; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-BE-NEXT: rev64 v0.2s, v0.2s ; CHECK-BE-NEXT: ret %v2i8 = load <2 x i8>, ptr %v2i8_ptr @@ -105,19 +105,18 @@ define <4 x i8> @test4(ptr %v4i8_ptr) { define <2 x i32> @fsext_v2i32(ptr %a) { ; CHECK-LE-LABEL: fsext_v2i32: ; CHECK-LE: // %bb.0: -; CHECK-LE-NEXT: ldrsb w8, [x0] -; CHECK-LE-NEXT: ldrsb w9, [x0, #1] -; CHECK-LE-NEXT: fmov s0, w8 -; CHECK-LE-NEXT: mov v0.s[1], w9 +; CHECK-LE-NEXT: ldr h0, [x0] +; CHECK-LE-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-LE-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-LE-NEXT: ret ; ; CHECK-BE-LABEL: fsext_v2i32: ; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: ldrsb w8, [x0] -; CHECK-BE-NEXT: ldrsb w9, [x0, #1] -; CHECK-BE-NEXT: fmov s0, w8 -; CHECK-BE-NEXT: mov v0.s[1], w9 +; CHECK-BE-NEXT: ldr h0, [x0] +; CHECK-BE-NEXT: rev16 v0.8b, v0.8b +; CHECK-BE-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-BE-NEXT: rev64 v0.2s, v0.2s ; CHECK-BE-NEXT: ret %x = load <2 x i8>, ptr %a @@ -249,19 +248,18 @@ define i32 @loadExti32(ptr %ref) { define <2 x i16> @fsext_v2i16(ptr %a) { ; CHECK-LE-LABEL: fsext_v2i16: ; CHECK-LE: // %bb.0: -; CHECK-LE-NEXT: ldrsb w8, [x0] -; CHECK-LE-NEXT: ldrsb w9, [x0, #1] -; CHECK-LE-NEXT: fmov s0, w8 -; CHECK-LE-NEXT: mov v0.s[1], w9 +; CHECK-LE-NEXT: ldr h0, [x0] +; CHECK-LE-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-LE-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-LE-NEXT: ret ; ; CHECK-BE-LABEL: fsext_v2i16: ; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: ldrsb w8, [x0] -; CHECK-BE-NEXT: ldrsb w9, [x0, #1] -; CHECK-BE-NEXT: fmov s0, w8 -; CHECK-BE-NEXT: mov v0.s[1], w9 +; CHECK-BE-NEXT: ldr h0, [x0] +; CHECK-BE-NEXT: rev16 v0.8b, v0.8b +; CHECK-BE-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-BE-NEXT: rev64 v0.2s, v0.2s ; CHECK-BE-NEXT: ret %x = load <2 x i8>, ptr %a @@ -497,3 +495,213 @@ define <4 x i8> @strict_align_unaligned(ptr %v4i8_ptr) "target-features"="+stric %v4i8 = load <4 x i8>, ptr %v4i8_ptr, align 1 ret <4 x i8> %v4i8 } + +define <2 x i16> @zext_v2i8_v2i16(ptr %a) { +; CHECK-LE-LABEL: zext_v2i8_v2i16: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: ldr h0, [x0] +; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-LE-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: zext_v2i8_v2i16: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ldr h0, [x0] +; CHECK-BE-NEXT: rev16 v0.8b, v0.8b +; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-BE-NEXT: rev64 v0.2s, v0.2s +; CHECK-BE-NEXT: ret + %x = load <2 x i8>, ptr %a + %y = zext <2 x i8> %x to <2 x i16> + ret <2 x i16> %y +} + +define <2 x i32> @zext_v2i8_v2i32(ptr %a) { +; CHECK-LE-LABEL: zext_v2i8_v2i32: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: ldr h0, [x0] +; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-LE-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: zext_v2i8_v2i32: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ldr h0, [x0] +; CHECK-BE-NEXT: rev16 v0.8b, v0.8b +; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-BE-NEXT: rev64 v0.2s, v0.2s +; CHECK-BE-NEXT: ret + %x = load <2 x i8>, ptr %a + %y = zext <2 x i8> %x to <2 x i32> + ret <2 x i32> %y +} + +define <2 x i64> @zext_v2i8_v2i64(ptr %a) { +; CHECK-LE-LABEL: zext_v2i8_v2i64: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: ldr h0, [x0] +; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-LE-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-LE-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: zext_v2i8_v2i64: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ldr h0, [x0] +; CHECK-BE-NEXT: rev16 v0.8b, v0.8b +; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-BE-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: ret + %x = load <2 x i8>, ptr %a + %y = zext <2 x i8> %x to <2 x i64> + ret <2 x i64> %y +} + +define <2 x i32> @zext_v2i16_v2i32(ptr %a) { +; CHECK-LE-LABEL: zext_v2i16_v2i32: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: ldr s0, [x0] +; CHECK-LE-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: zext_v2i16_v2i32: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ldr s0, [x0] +; CHECK-BE-NEXT: rev32 v0.4h, v0.4h +; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-BE-NEXT: rev64 v0.2s, v0.2s +; CHECK-BE-NEXT: ret + %x = load <2 x i16>, ptr %a + %y = zext <2 x i16> %x to <2 x i32> + ret <2 x i32> %y +} + +define <2 x i64> @zext_v2i16_v2i64(ptr %a) { +; CHECK-LE-LABEL: zext_v2i16_v2i64: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: ldr s0, [x0] +; CHECK-LE-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-LE-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: zext_v2i16_v2i64: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ldr s0, [x0] +; CHECK-BE-NEXT: rev32 v0.4h, v0.4h +; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-BE-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: ret + %x = load <2 x i16>, ptr %a + %y = zext <2 x i16> %x to <2 x i64> + ret <2 x i64> %y +} + +define <4 x i32> @zext_v4i16_v4i32(ptr %a) { +; CHECK-LE-LABEL: zext_v4i16_v4i32: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: ldr d0, [x0] +; CHECK-LE-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: zext_v4i16_v4i32: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ld1 { v0.4h }, [x0] +; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-BE-NEXT: rev64 v0.4s, v0.4s +; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: ret + %x = load <4 x i16>, ptr %a + %y = zext <4 x i16> %x to <4 x i32> + ret <4 x i32> %y +} + +define <2 x i64> @sext_v2i8_v2i64(ptr %a) { +; CHECK-LE-LABEL: sext_v2i8_v2i64: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: ldr h0, [x0] +; CHECK-LE-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-LE-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-LE-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: sext_v2i8_v2i64: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ldr h0, [x0] +; CHECK-BE-NEXT: rev16 v0.8b, v0.8b +; CHECK-BE-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-BE-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: ret + %x = load <2 x i8>, ptr %a + %y = sext <2 x i8> %x to <2 x i64> + ret <2 x i64> %y +} + +define <2 x i32> @sext_v2i16_v2i32(ptr %a) { +; CHECK-LE-LABEL: sext_v2i16_v2i32: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: ldr s0, [x0] +; CHECK-LE-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: sext_v2i16_v2i32: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ldr s0, [x0] +; CHECK-BE-NEXT: rev32 v0.4h, v0.4h +; CHECK-BE-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-BE-NEXT: rev64 v0.2s, v0.2s +; CHECK-BE-NEXT: ret + %x = load <2 x i16>, ptr %a + %y = sext <2 x i16> %x to <2 x i32> + ret <2 x i32> %y +} + +define <2 x i64> @sext_v2i16_v2i64(ptr %a) { +; CHECK-LE-LABEL: sext_v2i16_v2i64: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: ldr s0, [x0] +; CHECK-LE-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-LE-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: sext_v2i16_v2i64: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ldr s0, [x0] +; CHECK-BE-NEXT: rev32 v0.4h, v0.4h +; CHECK-BE-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-BE-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: ret + %x = load <2 x i16>, ptr %a + %y = sext <2 x i16> %x to <2 x i64> + ret <2 x i64> %y +} + +define <4 x i32> @sext_v4i16_v4i32(ptr %a) { +; CHECK-LE-LABEL: sext_v4i16_v4i32: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: ldr d0, [x0] +; CHECK-LE-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: sext_v4i16_v4i32: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ld1 { v0.4h }, [x0] +; CHECK-BE-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-BE-NEXT: rev64 v0.4s, v0.4s +; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: ret + %x = load <4 x i16>, ptr %a + %y = sext <4 x i16> %x to <4 x i32> + ret <4 x i32> %y +} diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll index e85e808921c87..a302ddf483caa 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll @@ -219,21 +219,17 @@ define <4 x i32> @smull_zext_v4i16_v4i32(ptr %A, ptr %B) nounwind { define <2 x i64> @smull_zext_v2i32_v2i64(ptr %A, ptr %B) nounwind { ; CHECK-NEON-LABEL: smull_zext_v2i32_v2i64: ; CHECK-NEON: // %bb.0: -; CHECK-NEON-NEXT: ldrh w8, [x0, #2] -; CHECK-NEON-NEXT: ldr h0, [x0] +; CHECK-NEON-NEXT: ldr s0, [x0] ; CHECK-NEON-NEXT: ldr d1, [x1] -; CHECK-NEON-NEXT: mov v0.d[1], x8 -; CHECK-NEON-NEXT: xtn v0.2s, v0.2d +; CHECK-NEON-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEON-NEXT: smull v0.2d, v0.2s, v1.2s ; CHECK-NEON-NEXT: ret ; ; CHECK-SVE-LABEL: smull_zext_v2i32_v2i64: ; CHECK-SVE: // %bb.0: -; CHECK-SVE-NEXT: ldrh w8, [x0, #2] -; CHECK-SVE-NEXT: ldr h0, [x0] +; CHECK-SVE-NEXT: ldr s0, [x0] ; CHECK-SVE-NEXT: ldr d1, [x1] -; CHECK-SVE-NEXT: mov v0.d[1], x8 -; CHECK-SVE-NEXT: xtn v0.2s, v0.2d +; CHECK-SVE-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-SVE-NEXT: smull v0.2d, v0.2s, v1.2s ; CHECK-SVE-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/add.ll b/llvm/test/CodeGen/AArch64/add.ll index 96168cb80196f..7502db4c5aa93 100644 --- a/llvm/test/CodeGen/AArch64/add.ll +++ b/llvm/test/CodeGen/AArch64/add.ll @@ -56,13 +56,11 @@ entry: define void @v2i8(ptr %p1, ptr %p2) { ; CHECK-SD-LABEL: v2i8: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ld1 { v0.b }[0], [x0] -; CHECK-SD-NEXT: ld1 { v1.b }[0], [x1] -; CHECK-SD-NEXT: add x8, x0, #1 -; CHECK-SD-NEXT: add x9, x1, #1 -; CHECK-SD-NEXT: ld1 { v0.b }[4], [x8] -; CHECK-SD-NEXT: ld1 { v1.b }[4], [x9] -; CHECK-SD-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ldr h1, [x1] +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-SD-NEXT: uaddl v0.4s, v0.4h, v1.4h ; CHECK-SD-NEXT: mov s1, v0.s[1] ; CHECK-SD-NEXT: str b0, [x0] ; CHECK-SD-NEXT: stur b1, [x0, #1] @@ -101,10 +99,9 @@ define void @v3i8(ptr %p1, ptr %p2) { ; CHECK-SD-NEXT: add v0.4h, v0.4h, v1.4h ; CHECK-SD-NEXT: uzp1 v1.8b, v0.8b, v0.8b ; CHECK-SD-NEXT: mov h0, v0.h[2] -; CHECK-SD-NEXT: str s1, [sp, #12] -; CHECK-SD-NEXT: ldrh w8, [sp, #12] +; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-SD-NEXT: stur b0, [x0, #2] -; CHECK-SD-NEXT: strh w8, [x0] +; CHECK-SD-NEXT: str h1, [x0] ; CHECK-SD-NEXT: add sp, sp, #16 ; CHECK-SD-NEXT: ret ; @@ -228,13 +225,9 @@ entry: define void @v2i16(ptr %p1, ptr %p2) { ; CHECK-SD-LABEL: v2i16: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ld1 { v0.h }[0], [x0] -; CHECK-SD-NEXT: ld1 { v1.h }[0], [x1] -; CHECK-SD-NEXT: add x8, x0, #2 -; CHECK-SD-NEXT: add x9, x1, #2 -; CHECK-SD-NEXT: ld1 { v0.h }[2], [x8] -; CHECK-SD-NEXT: ld1 { v1.h }[2], [x9] -; CHECK-SD-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ldr s1, [x1] +; CHECK-SD-NEXT: uaddl v0.4s, v0.4h, v1.4h ; CHECK-SD-NEXT: mov s1, v0.s[1] ; CHECK-SD-NEXT: str h0, [x0] ; CHECK-SD-NEXT: str h1, [x0, #2] diff --git a/llvm/test/CodeGen/AArch64/andorxor.ll b/llvm/test/CodeGen/AArch64/andorxor.ll index a7875dbebd0e6..d8d003c85eed6 100644 --- a/llvm/test/CodeGen/AArch64/andorxor.ll +++ b/llvm/test/CodeGen/AArch64/andorxor.ll @@ -176,12 +176,12 @@ entry: define void @and_v2i8(ptr %p1, ptr %p2) { ; CHECK-SD-LABEL: and_v2i8: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ld1 { v0.b }[0], [x0] -; CHECK-SD-NEXT: ld1 { v1.b }[0], [x1] -; CHECK-SD-NEXT: add x8, x0, #1 -; CHECK-SD-NEXT: add x9, x1, #1 -; CHECK-SD-NEXT: ld1 { v0.b }[4], [x8] -; CHECK-SD-NEXT: ld1 { v1.b }[4], [x9] +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ldr h1, [x1] +; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b +; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-SD-NEXT: zip1 v0.4h, v0.4h, v0.4h ; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-SD-NEXT: mov s1, v0.s[1] ; CHECK-SD-NEXT: str b0, [x0] @@ -212,12 +212,12 @@ entry: define void @or_v2i8(ptr %p1, ptr %p2) { ; CHECK-SD-LABEL: or_v2i8: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ld1 { v0.b }[0], [x0] -; CHECK-SD-NEXT: ld1 { v1.b }[0], [x1] -; CHECK-SD-NEXT: add x8, x0, #1 -; CHECK-SD-NEXT: add x9, x1, #1 -; CHECK-SD-NEXT: ld1 { v0.b }[4], [x8] -; CHECK-SD-NEXT: ld1 { v1.b }[4], [x9] +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ldr h1, [x1] +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-SD-NEXT: mov s1, v0.s[1] ; CHECK-SD-NEXT: str b0, [x0] @@ -248,12 +248,12 @@ entry: define void @xor_v2i8(ptr %p1, ptr %p2) { ; CHECK-SD-LABEL: xor_v2i8: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ld1 { v0.b }[0], [x0] -; CHECK-SD-NEXT: ld1 { v1.b }[0], [x1] -; CHECK-SD-NEXT: add x8, x0, #1 -; CHECK-SD-NEXT: add x9, x1, #1 -; CHECK-SD-NEXT: ld1 { v0.b }[4], [x8] -; CHECK-SD-NEXT: ld1 { v1.b }[4], [x9] +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ldr h1, [x1] +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-SD-NEXT: eor v0.8b, v0.8b, v1.8b ; CHECK-SD-NEXT: mov s1, v0.s[1] ; CHECK-SD-NEXT: str b0, [x0] @@ -293,10 +293,9 @@ define void @and_v3i8(ptr %p1, ptr %p2) { ; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-SD-NEXT: uzp1 v1.8b, v0.8b, v0.8b ; CHECK-SD-NEXT: mov h0, v0.h[2] -; CHECK-SD-NEXT: str s1, [sp, #12] -; CHECK-SD-NEXT: ldrh w8, [sp, #12] +; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-SD-NEXT: stur b0, [x0, #2] -; CHECK-SD-NEXT: strh w8, [x0] +; CHECK-SD-NEXT: str h1, [x0] ; CHECK-SD-NEXT: add sp, sp, #16 ; CHECK-SD-NEXT: ret ; @@ -345,10 +344,9 @@ define void @or_v3i8(ptr %p1, ptr %p2) { ; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-SD-NEXT: uzp1 v1.8b, v0.8b, v0.8b ; CHECK-SD-NEXT: mov h0, v0.h[2] -; CHECK-SD-NEXT: str s1, [sp, #12] -; CHECK-SD-NEXT: ldrh w8, [sp, #12] +; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-SD-NEXT: stur b0, [x0, #2] -; CHECK-SD-NEXT: strh w8, [x0] +; CHECK-SD-NEXT: str h1, [x0] ; CHECK-SD-NEXT: add sp, sp, #16 ; CHECK-SD-NEXT: ret ; @@ -397,10 +395,9 @@ define void @xor_v3i8(ptr %p1, ptr %p2) { ; CHECK-SD-NEXT: eor v0.8b, v0.8b, v1.8b ; CHECK-SD-NEXT: uzp1 v1.8b, v0.8b, v0.8b ; CHECK-SD-NEXT: mov h0, v0.h[2] -; CHECK-SD-NEXT: str s1, [sp, #12] -; CHECK-SD-NEXT: ldrh w8, [sp, #12] +; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-SD-NEXT: stur b0, [x0, #2] -; CHECK-SD-NEXT: strh w8, [x0] +; CHECK-SD-NEXT: str h1, [x0] ; CHECK-SD-NEXT: add sp, sp, #16 ; CHECK-SD-NEXT: ret ; @@ -698,12 +695,10 @@ entry: define void @and_v2i16(ptr %p1, ptr %p2) { ; CHECK-SD-LABEL: and_v2i16: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ld1 { v0.h }[0], [x0] -; CHECK-SD-NEXT: ld1 { v1.h }[0], [x1] -; CHECK-SD-NEXT: add x8, x0, #2 -; CHECK-SD-NEXT: add x9, x1, #2 -; CHECK-SD-NEXT: ld1 { v0.h }[2], [x8] -; CHECK-SD-NEXT: ld1 { v1.h }[2], [x9] +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ldr s1, [x1] +; CHECK-SD-NEXT: zip1 v0.4h, v0.4h, v0.4h +; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-SD-NEXT: mov s1, v0.s[1] ; CHECK-SD-NEXT: str h0, [x0] @@ -734,12 +729,10 @@ entry: define void @or_v2i16(ptr %p1, ptr %p2) { ; CHECK-SD-LABEL: or_v2i16: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ld1 { v0.h }[0], [x0] -; CHECK-SD-NEXT: ld1 { v1.h }[0], [x1] -; CHECK-SD-NEXT: add x8, x0, #2 -; CHECK-SD-NEXT: add x9, x1, #2 -; CHECK-SD-NEXT: ld1 { v0.h }[2], [x8] -; CHECK-SD-NEXT: ld1 { v1.h }[2], [x9] +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ldr s1, [x1] +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-SD-NEXT: mov s1, v0.s[1] ; CHECK-SD-NEXT: str h0, [x0] @@ -770,12 +763,10 @@ entry: define void @xor_v2i16(ptr %p1, ptr %p2) { ; CHECK-SD-LABEL: xor_v2i16: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ld1 { v0.h }[0], [x0] -; CHECK-SD-NEXT: ld1 { v1.h }[0], [x1] -; CHECK-SD-NEXT: add x8, x0, #2 -; CHECK-SD-NEXT: add x9, x1, #2 -; CHECK-SD-NEXT: ld1 { v0.h }[2], [x8] -; CHECK-SD-NEXT: ld1 { v1.h }[2], [x9] +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ldr s1, [x1] +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-SD-NEXT: eor v0.8b, v0.8b, v1.8b ; CHECK-SD-NEXT: mov s1, v0.s[1] ; CHECK-SD-NEXT: str h0, [x0] diff --git a/llvm/test/CodeGen/AArch64/bitcast.ll b/llvm/test/CodeGen/AArch64/bitcast.ll index 20f19fddf790a..002e6cd509bec 100644 --- a/llvm/test/CodeGen/AArch64/bitcast.ll +++ b/llvm/test/CodeGen/AArch64/bitcast.ll @@ -433,12 +433,8 @@ define <2 x i16> @bitcast_v4i8_v2i16(<4 x i8> %a, <4 x i8> %b){ ; CHECK-SD-NEXT: sub sp, sp, #16 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 ; CHECK-SD-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-SD-NEXT: add x8, sp, #12 ; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b -; CHECK-SD-NEXT: str s0, [sp, #12] -; CHECK-SD-NEXT: ld1 { v0.h }[0], [x8] -; CHECK-SD-NEXT: orr x8, x8, #0x2 -; CHECK-SD-NEXT: ld1 { v0.h }[2], [x8] +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: add sp, sp, #16 ; CHECK-SD-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/ctlz.ll b/llvm/test/CodeGen/AArch64/ctlz.ll index 04124609eec74..b1b869ec9e1ff 100644 --- a/llvm/test/CodeGen/AArch64/ctlz.ll +++ b/llvm/test/CodeGen/AArch64/ctlz.ll @@ -6,11 +6,10 @@ define void @v2i8(ptr %p1) { ; CHECK-SD-LABEL: v2i8: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ldrb w8, [x0] -; CHECK-SD-NEXT: ldrb w9, [x0, #1] +; CHECK-SD-NEXT: ldr h1, [x0] ; CHECK-SD-NEXT: movi v0.2s, #24 -; CHECK-SD-NEXT: fmov s1, w8 -; CHECK-SD-NEXT: mov v1.s[1], w9 +; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-SD-NEXT: clz v1.2s, v1.2s ; CHECK-SD-NEXT: sub v0.2s, v1.2s, v0.2s ; CHECK-SD-NEXT: mov s1, v0.s[1] @@ -47,10 +46,9 @@ define void @v3i8(ptr %p1) { ; CHECK-SD-NEXT: sub v0.4h, v1.4h, v0.4h ; CHECK-SD-NEXT: uzp1 v1.8b, v0.8b, v0.8b ; CHECK-SD-NEXT: mov h0, v0.h[2] -; CHECK-SD-NEXT: str s1, [sp, #12] -; CHECK-SD-NEXT: ldrh w8, [sp, #12] +; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-SD-NEXT: stur b0, [x0, #2] -; CHECK-SD-NEXT: strh w8, [x0] +; CHECK-SD-NEXT: str h1, [x0] ; CHECK-SD-NEXT: add sp, sp, #16 ; CHECK-SD-NEXT: ret ; @@ -145,11 +143,9 @@ entry: define void @v2i16(ptr %p1) { ; CHECK-SD-LABEL: v2i16: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ldrh w8, [x0] -; CHECK-SD-NEXT: ldrh w9, [x0, #2] +; CHECK-SD-NEXT: ldr s1, [x0] ; CHECK-SD-NEXT: movi v0.2s, #16 -; CHECK-SD-NEXT: fmov s1, w8 -; CHECK-SD-NEXT: mov v1.s[1], w9 +; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-SD-NEXT: clz v1.2s, v1.2s ; CHECK-SD-NEXT: sub v0.2s, v1.2s, v0.2s ; CHECK-SD-NEXT: mov s1, v0.s[1] diff --git a/llvm/test/CodeGen/AArch64/ctpop.ll b/llvm/test/CodeGen/AArch64/ctpop.ll index d547b6bec5b83..9c59f1b233b5d 100644 --- a/llvm/test/CodeGen/AArch64/ctpop.ll +++ b/llvm/test/CodeGen/AArch64/ctpop.ll @@ -6,10 +6,9 @@ define void @v2i8(ptr %p1) { ; CHECK-SD-LABEL: v2i8: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ldrb w8, [x0] -; CHECK-SD-NEXT: ldrb w9, [x0, #1] -; CHECK-SD-NEXT: fmov s0, w8 -; CHECK-SD-NEXT: mov v0.s[1], w9 +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-SD-NEXT: cnt v0.8b, v0.8b ; CHECK-SD-NEXT: uaddlp v0.4h, v0.8b ; CHECK-SD-NEXT: uaddlp v0.2s, v0.4h @@ -46,10 +45,9 @@ define void @v3i8(ptr %p1) { ; CHECK-SD-NEXT: uaddlp v0.4h, v0.8b ; CHECK-SD-NEXT: uzp1 v1.8b, v0.8b, v0.8b ; CHECK-SD-NEXT: mov h0, v0.h[2] -; CHECK-SD-NEXT: str s1, [sp, #12] -; CHECK-SD-NEXT: ldrh w8, [sp, #12] +; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-SD-NEXT: stur b0, [x0, #2] -; CHECK-SD-NEXT: strh w8, [x0] +; CHECK-SD-NEXT: str h1, [x0] ; CHECK-SD-NEXT: add sp, sp, #16 ; CHECK-SD-NEXT: ret ; @@ -143,10 +141,8 @@ entry: define void @v2i16(ptr %p1) { ; CHECK-SD-LABEL: v2i16: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ldrh w8, [x0] -; CHECK-SD-NEXT: ldrh w9, [x0, #2] -; CHECK-SD-NEXT: fmov s0, w8 -; CHECK-SD-NEXT: mov v0.s[1], w9 +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-SD-NEXT: cnt v0.8b, v0.8b ; CHECK-SD-NEXT: uaddlp v0.4h, v0.8b ; CHECK-SD-NEXT: uaddlp v0.2s, v0.4h diff --git a/llvm/test/CodeGen/AArch64/cttz.ll b/llvm/test/CodeGen/AArch64/cttz.ll index fc9bf2c0aca65..c9181b4c312d1 100644 --- a/llvm/test/CodeGen/AArch64/cttz.ll +++ b/llvm/test/CodeGen/AArch64/cttz.ll @@ -6,10 +6,10 @@ define void @v2i8(ptr %p1) { ; CHECK-SD-LABEL: v2i8: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ld1 { v0.b }[0], [x0] -; CHECK-SD-NEXT: add x8, x0, #1 +; CHECK-SD-NEXT: ldr h0, [x0] ; CHECK-SD-NEXT: movi v1.2s, #1 -; CHECK-SD-NEXT: ld1 { v0.b }[4], [x8] +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-SD-NEXT: orr v0.2s, #1, lsl #8 ; CHECK-SD-NEXT: sub v1.2s, v0.2s, v1.2s ; CHECK-SD-NEXT: bic v0.8b, v1.8b, v0.8b @@ -59,10 +59,9 @@ define void @v3i8(ptr %p1) { ; CHECK-SD-NEXT: sub v0.4h, v1.4h, v0.4h ; CHECK-SD-NEXT: uzp1 v1.8b, v0.8b, v0.8b ; CHECK-SD-NEXT: mov h0, v0.h[2] -; CHECK-SD-NEXT: str s1, [sp, #12] -; CHECK-SD-NEXT: ldrh w8, [sp, #12] +; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-SD-NEXT: stur b0, [x0, #2] -; CHECK-SD-NEXT: strh w8, [x0] +; CHECK-SD-NEXT: str h1, [x0] ; CHECK-SD-NEXT: add sp, sp, #16 ; CHECK-SD-NEXT: ret ; @@ -219,10 +218,9 @@ entry: define void @v2i16(ptr %p1) { ; CHECK-SD-LABEL: v2i16: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ld1 { v0.h }[0], [x0] -; CHECK-SD-NEXT: add x8, x0, #2 +; CHECK-SD-NEXT: ldr s0, [x0] ; CHECK-SD-NEXT: movi v1.2s, #1 -; CHECK-SD-NEXT: ld1 { v0.h }[2], [x8] +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-SD-NEXT: orr v0.2s, #1, lsl #16 ; CHECK-SD-NEXT: sub v1.2s, v0.2s, v1.2s ; CHECK-SD-NEXT: bic v0.8b, v1.8b, v0.8b diff --git a/llvm/test/CodeGen/AArch64/extbinopload.ll b/llvm/test/CodeGen/AArch64/extbinopload.ll index cabb0e7278e40..d18cff51c6101 100644 --- a/llvm/test/CodeGen/AArch64/extbinopload.ll +++ b/llvm/test/CodeGen/AArch64/extbinopload.ll @@ -263,16 +263,14 @@ define <16 x i16> @load_v16i8(ptr %p) { define <2 x i16> @std_v2i8_v2i16(ptr %p) { ; CHECK-LABEL: std_v2i8_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0, #2] -; CHECK-NEXT: ldrb w9, [x0, #3] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: mov v0.s[1], w9 -; CHECK-NEXT: ldrb w9, [x0, #1] -; CHECK-NEXT: mov v1.s[1], w9 +; CHECK-NEXT: ldr h0, [x0, #2] +; CHECK-NEXT: ldr h1, [x0] +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: shl v0.2s, v0.2s, #3 -; CHECK-NEXT: add v0.2s, v1.2s, v0.2s +; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %l1 = load <2 x i8>, ptr %p %q = getelementptr i8, ptr %p, i32 2 @@ -1394,12 +1392,12 @@ define <4 x i32> @volatile(ptr %p) { ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ldr s1, [x0, #4] -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-NEXT: ldr s0, [x0, #4] +; CHECK-NEXT: ldr s1, [x0] ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll v1.4s, v1.4h, #3 -; CHECK-NEXT: uaddw v0.4s, v1.4s, v0.4h +; CHECK-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #3 +; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %l1b = load volatile float, ptr %p diff --git a/llvm/test/CodeGen/AArch64/licm-regclass-copy.mir b/llvm/test/CodeGen/AArch64/licm-regclass-copy.mir new file mode 100644 index 0000000000000..6a10df68ddc71 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/licm-regclass-copy.mir @@ -0,0 +1,197 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64 -run-pass=early-machinelicm -o - %s | FileCheck %s + +# This test verifies that cross-bank copies (e.g., GPR to FPR, FPR to GPR) +# are hoisted out of loops by MachineLICM, as they are expensive on AArch64. + +--- | + declare void @use_float(float) + declare void @use_int(i32) + + define void @gpr_to_fpr_virtual_copy_hoisted() { + ret void + } + + define void @gpr_to_fpr_physical_copy_hoisted() { + ret void + } + + define void @fpr_to_gpr_virtual_copy_hoisted() { + ret void + } +... +--- +name: gpr_to_fpr_virtual_copy_hoisted +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: gpr_to_fpr_virtual_copy_hoisted + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY $wzr + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32all = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:fpr32 = COPY [[COPY1]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr32common = PHI [[COPY3]], %bb.0, %5, %bb.2 + ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[PHI]], [[COPY]], implicit-def $nzcv + ; CHECK-NEXT: Bcc 1, %bb.3, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $s0 = COPY [[COPY4]] + ; CHECK-NEXT: BL @use_float, implicit-def dead $lr, implicit $sp, implicit $s0, implicit-def $sp + ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[PHI]], 1, 0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr32all = COPY [[ADDWri]] + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: RET_ReallyLR + bb.0: + liveins: $w0, $w1 + %1:gpr32 = COPY $w0 + %0:gpr32 = COPY $w1 + %3:gpr32all = COPY $wzr + %2:gpr32all = COPY %3:gpr32all + + bb.1: + %4:gpr32common = PHI %2:gpr32all, %bb.0, %5:gpr32all, %bb.2 + %6:gpr32 = SUBSWrr %4:gpr32common, %1:gpr32, implicit-def $nzcv + Bcc 1, %bb.3, implicit $nzcv + B %bb.2 + + bb.2: + %7:fpr32 = COPY %0:gpr32 + $s0 = COPY %7:fpr32 + BL @use_float, implicit-def dead $lr, implicit $sp, implicit $s0, implicit-def $sp + %8:gpr32sp = ADDWri %4:gpr32common, 1, 0 + %5:gpr32all = COPY %8:gpr32sp + B %bb.1 + + bb.3: + RET_ReallyLR + +... +--- +name: gpr_to_fpr_physical_copy_hoisted +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: gpr_to_fpr_physical_copy_hoisted + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32all = COPY $wzr + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY $wzr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr32common = PHI [[COPY2]], %bb.0, %4, %bb.2 + ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[PHI]], [[COPY]], implicit-def $nzcv + ; CHECK-NEXT: Bcc 1, %bb.3, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $s0 = COPY [[COPY3]] + ; CHECK-NEXT: BL @use_float, implicit-def dead $lr, implicit $sp, implicit $s0, implicit-def $sp + ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[PHI]], 1, 0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32all = COPY [[ADDWri]] + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: RET_ReallyLR + bb.0: + liveins: $w0 + %1:gpr32 = COPY $w0 + %3:gpr32all = COPY $wzr + %2:gpr32all = COPY %3:gpr32all + + bb.1: + %4:gpr32common = PHI %2:gpr32all, %bb.0, %5:gpr32all, %bb.2 + %6:gpr32 = SUBSWrr %4:gpr32common, %1:gpr32, implicit-def $nzcv + Bcc 1, %bb.3, implicit $nzcv + B %bb.2 + + bb.2: + %7:fpr32 = COPY $wzr + $s0 = COPY %7:fpr32 + BL @use_float, implicit-def dead $lr, implicit $sp, implicit $s0, implicit-def $sp + %8:gpr32sp = ADDWri %4:gpr32common, 1, 0 + %5:gpr32all = COPY %8:gpr32sp + B %bb.1 + + bb.3: + RET_ReallyLR + +... +--- +name: fpr_to_gpr_virtual_copy_hoisted +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: fpr_to_gpr_virtual_copy_hoisted + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $w0, $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY $wzr + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32all = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY [[COPY1]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr32common = PHI [[COPY3]], %bb.0, %5, %bb.2 + ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[PHI]], [[COPY]], implicit-def $nzcv + ; CHECK-NEXT: Bcc 1, %bb.3, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $w0 = COPY [[COPY4]] + ; CHECK-NEXT: BL @use_int, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp + ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[PHI]], 1, 0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr32all = COPY [[ADDWri]] + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: RET_ReallyLR + bb.0: + liveins: $w0, $s0 + %1:gpr32 = COPY $w0 + %0:fpr32 = COPY $s0 + %3:gpr32all = COPY $wzr + %2:gpr32all = COPY %3:gpr32all + + bb.1: + %4:gpr32common = PHI %2:gpr32all, %bb.0, %5:gpr32all, %bb.2 + %6:gpr32 = SUBSWrr %4:gpr32common, %1:gpr32, implicit-def $nzcv + Bcc 1, %bb.3, implicit $nzcv + B %bb.2 + + bb.2: + %7:gpr32 = COPY %0:fpr32 + $w0 = COPY %7:gpr32 + BL @use_int, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp + %8:gpr32sp = ADDWri %4:gpr32common, 1, 0 + %5:gpr32all = COPY %8:gpr32sp + B %bb.1 + + bb.3: + RET_ReallyLR + +... diff --git a/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll b/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll index 4bf65e7d6fd08..cb042757a4a42 100644 --- a/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll +++ b/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll @@ -1,6 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-NOFP16 ; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-FP16 +; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK-NOFP16,CHECK-GI +; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK-FP16,CHECK-GI + +; CHECK-GI: warning: Instruction selection used fallback path for testmhhs +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for testmhws +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for testmhxs define i16 @testmhhs(half %x) { ; CHECK-NOFP16-LABEL: testmhhs: @@ -55,5 +61,3 @@ entry: %0 = tail call i64 @llvm.llround.i64.f16(half %x) ret i64 %0 } - -declare i64 @llvm.llround.i64.f16(half) nounwind readnone diff --git a/llvm/test/CodeGen/AArch64/llround-conv.ll b/llvm/test/CodeGen/AArch64/llround-conv.ll index 797136037f0e9..4cc089804ce97 100644 --- a/llvm/test/CodeGen/AArch64/llround-conv.ll +++ b/llvm/test/CodeGen/AArch64/llround-conv.ll @@ -1,60 +1,75 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s +; RUN: llc < %s -mtriple=aarch64 -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +; CHECK-GI: warning: Instruction selection used fallback path for testmswl +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for testmsll -; CHECK-LABEL: testmsws: -; CHECK: fcvtas x0, s0 -; CHECK: ret define i32 @testmsws(float %x) { +; CHECK-LABEL: testmsws: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtas x0, s0 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret entry: - %0 = tail call i64 @llvm.llround.f32(float %x) + %0 = tail call i64 @llvm.llround.i64.f32(float %x) %conv = trunc i64 %0 to i32 ret i32 %conv } -; CHECK-LABEL: testmsxs: -; CHECK: fcvtas x0, s0 -; CHECK-NEXT: ret define i64 @testmsxs(float %x) { +; CHECK-LABEL: testmsxs: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtas x0, s0 +; CHECK-NEXT: ret entry: - %0 = tail call i64 @llvm.llround.f32(float %x) + %0 = tail call i64 @llvm.llround.i64.f32(float %x) ret i64 %0 } -; CHECK-LABEL: testmswd: -; CHECK: fcvtas x0, d0 -; CHECK: ret define i32 @testmswd(double %x) { +; CHECK-LABEL: testmswd: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtas x0, d0 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret entry: - %0 = tail call i64 @llvm.llround.f64(double %x) + %0 = tail call i64 @llvm.llround.i64.f64(double %x) %conv = trunc i64 %0 to i32 ret i32 %conv } -; CHECK-LABEL: testmsxd: -; CHECK: fcvtas x0, d0 -; CHECK-NEXT: ret define i64 @testmsxd(double %x) { +; CHECK-LABEL: testmsxd: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtas x0, d0 +; CHECK-NEXT: ret entry: - %0 = tail call i64 @llvm.llround.f64(double %x) + %0 = tail call i64 @llvm.llround.i64.f64(double %x) ret i64 %0 } -; CHECK-LABEL: testmswl: -; CHECK: bl llroundl define i32 @testmswl(fp128 %x) { +; CHECK-LABEL: testmswl: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl llroundl +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret entry: - %0 = tail call i64 @llvm.llround.f128(fp128 %x) + %0 = tail call i64 @llvm.llround.i64.f128(fp128 %x) %conv = trunc i64 %0 to i32 ret i32 %conv } -; CHECK-LABEL: testmsll: -; CHECK: b llroundl define i64 @testmsll(fp128 %x) { +; CHECK-LABEL: testmsll: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: b llroundl entry: - %0 = tail call i64 @llvm.llround.f128(fp128 %x) + %0 = tail call i64 @llvm.llround.i64.f128(fp128 %x) ret i64 %0 } - -declare i64 @llvm.llround.f32(float) nounwind readnone -declare i64 @llvm.llround.f64(double) nounwind readnone -declare i64 @llvm.llround.f128(fp128) nounwind readnone diff --git a/llvm/test/CodeGen/AArch64/load.ll b/llvm/test/CodeGen/AArch64/load.ll index c4bb6e37d6eaf..b138fa4085427 100644 --- a/llvm/test/CodeGen/AArch64/load.ll +++ b/llvm/test/CodeGen/AArch64/load.ll @@ -230,9 +230,9 @@ define <2 x i64> @load_v2i64(ptr %ptr) { define <2 x i8> @load_v2i8(ptr %ptr, <2 x i8> %b) { ; CHECK-SD-LABEL: load_v2i8: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ld1 { v0.b }[0], [x0] -; CHECK-SD-NEXT: add x8, x0, #1 -; CHECK-SD-NEXT: ld1 { v0.b }[4], [x8] +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: ret ; @@ -269,9 +269,8 @@ define <32 x i8> @load_v32i8(ptr %ptr) { define <2 x i16> @load_v2i16(ptr %ptr) { ; CHECK-SD-LABEL: load_v2i16: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ld1 { v0.h }[0], [x0] -; CHECK-SD-NEXT: add x8, x0, #2 -; CHECK-SD-NEXT: ld1 { v0.h }[2], [x8] +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll b/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll index bf78fd456eac0..a29dea0eb9f9f 100644 --- a/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll +++ b/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll @@ -1,6 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-NOFP16 ; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-FP16 +; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK-NOFP16,CHECK-GI +; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK-FP16,CHECK-GI + +; CHECK-GI: warning: Instruction selection used fallback path for testmhhs +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for testmhws +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for testmhxs define i16 @testmhhs(half %x) { ; CHECK-NOFP16-LABEL: testmhhs: @@ -55,5 +61,3 @@ entry: %0 = tail call i64 @llvm.lround.i64.f16(half %x) ret i64 %0 } - -declare i64 @llvm.lround.i64.f16(half) nounwind readnone diff --git a/llvm/test/CodeGen/AArch64/lround-conv.ll b/llvm/test/CodeGen/AArch64/lround-conv.ll index 678d3149f20cc..0bf82b538e70c 100644 --- a/llvm/test/CodeGen/AArch64/lround-conv.ll +++ b/llvm/test/CodeGen/AArch64/lround-conv.ll @@ -1,60 +1,75 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s +; RUN: llc < %s -mtriple=aarch64 -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +; CHECK-GI: warning: Instruction selection used fallback path for testmswl +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for testmsll -; CHECK-LABEL: testmsws: -; CHECK: fcvtas x0, s0 -; CHECK: ret define i32 @testmsws(float %x) { +; CHECK-LABEL: testmsws: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtas x0, s0 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.lround.i64.f32(float %x) %conv = trunc i64 %0 to i32 ret i32 %conv } -; CHECK-LABEL: testmsxs: -; CHECK: fcvtas x0, s0 -; CHECK-NEXT: ret define i64 @testmsxs(float %x) { +; CHECK-LABEL: testmsxs: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtas x0, s0 +; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.lround.i64.f32(float %x) ret i64 %0 } -; CHECK-LABEL: testmswd: -; CHECK: fcvtas x0, d0 -; CHECK: ret define i32 @testmswd(double %x) { +; CHECK-LABEL: testmswd: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtas x0, d0 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.lround.i64.f64(double %x) %conv = trunc i64 %0 to i32 ret i32 %conv } -; CHECK-LABEL: testmsxd: -; CHECK: fcvtas x0, d0 -; CHECK-NEXT: ret define i64 @testmsxd(double %x) { +; CHECK-LABEL: testmsxd: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtas x0, d0 +; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.lround.i64.f64(double %x) ret i64 %0 } -; CHECK-LABEL: testmswl: -; CHECK: bl lroundl define i32 @testmswl(fp128 %x) { +; CHECK-LABEL: testmswl: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl lroundl +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.lround.i64.f128(fp128 %x) %conv = trunc i64 %0 to i32 ret i32 %conv } -; CHECK-LABEL: testmsll: -; CHECK: b lroundl define i64 @testmsll(fp128 %x) { +; CHECK-LABEL: testmsll: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: b lroundl entry: %0 = tail call i64 @llvm.lround.i64.f128(fp128 %x) ret i64 %0 } - -declare i64 @llvm.lround.i64.f32(float) nounwind readnone -declare i64 @llvm.lround.i64.f64(double) nounwind readnone -declare i64 @llvm.lround.i64.f128(fp128) nounwind readnone diff --git a/llvm/test/CodeGen/AArch64/mul.ll b/llvm/test/CodeGen/AArch64/mul.ll index 9c69a6f03b858..475bd22c6ebcb 100644 --- a/llvm/test/CodeGen/AArch64/mul.ll +++ b/llvm/test/CodeGen/AArch64/mul.ll @@ -68,13 +68,11 @@ entry: define void @v2i8(ptr %p1, ptr %p2) { ; CHECK-SD-LABEL: v2i8: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ld1 { v0.b }[0], [x0] -; CHECK-SD-NEXT: ld1 { v1.b }[0], [x1] -; CHECK-SD-NEXT: add x8, x0, #1 -; CHECK-SD-NEXT: add x9, x1, #1 -; CHECK-SD-NEXT: ld1 { v0.b }[4], [x8] -; CHECK-SD-NEXT: ld1 { v1.b }[4], [x9] -; CHECK-SD-NEXT: mul v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ldr h1, [x1] +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-SD-NEXT: umull v0.4s, v0.4h, v1.4h ; CHECK-SD-NEXT: mov s1, v0.s[1] ; CHECK-SD-NEXT: str b0, [x0] ; CHECK-SD-NEXT: stur b1, [x0, #1] @@ -113,10 +111,9 @@ define void @v3i8(ptr %p1, ptr %p2) { ; CHECK-SD-NEXT: mul v0.4h, v0.4h, v1.4h ; CHECK-SD-NEXT: uzp1 v1.8b, v0.8b, v0.8b ; CHECK-SD-NEXT: mov h0, v0.h[2] -; CHECK-SD-NEXT: str s1, [sp, #12] -; CHECK-SD-NEXT: ldrh w8, [sp, #12] +; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-SD-NEXT: stur b0, [x0, #2] -; CHECK-SD-NEXT: strh w8, [x0] +; CHECK-SD-NEXT: str h1, [x0] ; CHECK-SD-NEXT: add sp, sp, #16 ; CHECK-SD-NEXT: ret ; @@ -240,13 +237,9 @@ entry: define void @v2i16(ptr %p1, ptr %p2) { ; CHECK-SD-LABEL: v2i16: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ld1 { v0.h }[0], [x0] -; CHECK-SD-NEXT: ld1 { v1.h }[0], [x1] -; CHECK-SD-NEXT: add x8, x0, #2 -; CHECK-SD-NEXT: add x9, x1, #2 -; CHECK-SD-NEXT: ld1 { v0.h }[2], [x8] -; CHECK-SD-NEXT: ld1 { v1.h }[2], [x9] -; CHECK-SD-NEXT: mul v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ldr s1, [x1] +; CHECK-SD-NEXT: umull v0.4s, v0.4h, v1.4h ; CHECK-SD-NEXT: mov s1, v0.s[1] ; CHECK-SD-NEXT: str h0, [x0] ; CHECK-SD-NEXT: str h1, [x0, #2] diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll index 1c4a504d0ab70..b31a5ea0b5d79 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -159,12 +159,12 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-SD-LABEL: v2i8: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ld1 { v0.b }[0], [x0] -; CHECK-SD-NEXT: ld1 { v1.b }[0], [x1] -; CHECK-SD-NEXT: add x8, x0, #1 -; CHECK-SD-NEXT: add x9, x1, #1 -; CHECK-SD-NEXT: ld1 { v0.b }[4], [x8] -; CHECK-SD-NEXT: ld1 { v1.b }[4], [x9] +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ldr h1, [x1] +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-SD-NEXT: shl v1.2s, v1.2s, #24 ; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24 ; CHECK-SD-NEXT: sqadd v0.2s, v0.2s, v1.2s @@ -212,12 +212,10 @@ define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind { define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-SD-LABEL: v2i16: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ld1 { v0.h }[0], [x0] -; CHECK-SD-NEXT: ld1 { v1.h }[0], [x1] -; CHECK-SD-NEXT: add x8, x0, #2 -; CHECK-SD-NEXT: add x9, x1, #2 -; CHECK-SD-NEXT: ld1 { v0.h }[2], [x8] -; CHECK-SD-NEXT: ld1 { v1.h }[2], [x9] +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ldr s1, [x1] +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-SD-NEXT: shl v1.2s, v1.2s, #16 ; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16 ; CHECK-SD-NEXT: sqadd v0.2s, v0.2s, v1.2s diff --git a/llvm/test/CodeGen/AArch64/sitofp-to-tbl.ll b/llvm/test/CodeGen/AArch64/sitofp-to-tbl.ll index 3e708b0678fbc..297b25ed075e4 100644 --- a/llvm/test/CodeGen/AArch64/sitofp-to-tbl.ll +++ b/llvm/test/CodeGen/AArch64/sitofp-to-tbl.ll @@ -244,11 +244,9 @@ define void @sitofp_v2i8_to_v2f64(ptr %src, ptr %dst) { ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: .LBB3_1: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add x9, x0, x8, lsl #1 -; CHECK-NEXT: ldrsb w10, [x9] -; CHECK-NEXT: ldrsb w9, [x9, #1] -; CHECK-NEXT: fmov s0, w10 -; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: ldr h0, [x0, x8, lsl #1] +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-NEXT: sshll v0.2d, v0.2s, #0 ; CHECK-NEXT: scvtf v0.2d, v0.2d ; CHECK-NEXT: str q0, [x1, x8, lsl #4] diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll index 3af858713525b..02eb40b412efd 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -159,12 +159,12 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-SD-LABEL: v2i8: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ld1 { v0.b }[0], [x0] -; CHECK-SD-NEXT: ld1 { v1.b }[0], [x1] -; CHECK-SD-NEXT: add x8, x0, #1 -; CHECK-SD-NEXT: add x9, x1, #1 -; CHECK-SD-NEXT: ld1 { v0.b }[4], [x8] -; CHECK-SD-NEXT: ld1 { v1.b }[4], [x9] +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ldr h1, [x1] +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-SD-NEXT: shl v1.2s, v1.2s, #24 ; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24 ; CHECK-SD-NEXT: sqsub v0.2s, v0.2s, v1.2s @@ -212,12 +212,10 @@ define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind { define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-SD-LABEL: v2i16: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ld1 { v0.h }[0], [x0] -; CHECK-SD-NEXT: ld1 { v1.h }[0], [x1] -; CHECK-SD-NEXT: add x8, x0, #2 -; CHECK-SD-NEXT: add x9, x1, #2 -; CHECK-SD-NEXT: ld1 { v0.h }[2], [x8] -; CHECK-SD-NEXT: ld1 { v1.h }[2], [x9] +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ldr s1, [x1] +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-SD-NEXT: shl v1.2s, v1.2s, #16 ; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16 ; CHECK-SD-NEXT: sqsub v0.2s, v0.2s, v1.2s diff --git a/llvm/test/CodeGen/AArch64/store.ll b/llvm/test/CodeGen/AArch64/store.ll index 3a9f12b838702..1dc55fccc3dac 100644 --- a/llvm/test/CodeGen/AArch64/store.ll +++ b/llvm/test/CodeGen/AArch64/store.ll @@ -207,13 +207,12 @@ define void @store_v3i8(<3 x i8> %a, ptr %ptr){ ; CHECK-SD-NEXT: sub sp, sp, #16 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 ; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: strb w2, [x3, #2] ; CHECK-SD-NEXT: mov v0.h[1], w1 ; CHECK-SD-NEXT: mov v0.h[2], w2 ; CHECK-SD-NEXT: xtn v0.8b, v0.8h -; CHECK-SD-NEXT: str s0, [sp, #12] -; CHECK-SD-NEXT: ldrh w8, [sp, #12] -; CHECK-SD-NEXT: strb w2, [x3, #2] -; CHECK-SD-NEXT: strh w8, [x3] +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: str h0, [x3] ; CHECK-SD-NEXT: add sp, sp, #16 ; CHECK-SD-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/sub.ll b/llvm/test/CodeGen/AArch64/sub.ll index 5e278d59b6591..dd920b98e18eb 100644 --- a/llvm/test/CodeGen/AArch64/sub.ll +++ b/llvm/test/CodeGen/AArch64/sub.ll @@ -56,13 +56,11 @@ entry: define void @v2i8(ptr %p1, ptr %p2) { ; CHECK-SD-LABEL: v2i8: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ld1 { v0.b }[0], [x0] -; CHECK-SD-NEXT: ld1 { v1.b }[0], [x1] -; CHECK-SD-NEXT: add x8, x0, #1 -; CHECK-SD-NEXT: add x9, x1, #1 -; CHECK-SD-NEXT: ld1 { v0.b }[4], [x8] -; CHECK-SD-NEXT: ld1 { v1.b }[4], [x9] -; CHECK-SD-NEXT: sub v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ldr h1, [x1] +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-SD-NEXT: usubl v0.4s, v0.4h, v1.4h ; CHECK-SD-NEXT: mov s1, v0.s[1] ; CHECK-SD-NEXT: str b0, [x0] ; CHECK-SD-NEXT: stur b1, [x0, #1] @@ -101,10 +99,9 @@ define void @v3i8(ptr %p1, ptr %p2) { ; CHECK-SD-NEXT: sub v0.4h, v0.4h, v1.4h ; CHECK-SD-NEXT: uzp1 v1.8b, v0.8b, v0.8b ; CHECK-SD-NEXT: mov h0, v0.h[2] -; CHECK-SD-NEXT: str s1, [sp, #12] -; CHECK-SD-NEXT: ldrh w8, [sp, #12] +; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-SD-NEXT: stur b0, [x0, #2] -; CHECK-SD-NEXT: strh w8, [x0] +; CHECK-SD-NEXT: str h1, [x0] ; CHECK-SD-NEXT: add sp, sp, #16 ; CHECK-SD-NEXT: ret ; @@ -228,13 +225,9 @@ entry: define void @v2i16(ptr %p1, ptr %p2) { ; CHECK-SD-LABEL: v2i16: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ld1 { v0.h }[0], [x0] -; CHECK-SD-NEXT: ld1 { v1.h }[0], [x1] -; CHECK-SD-NEXT: add x8, x0, #2 -; CHECK-SD-NEXT: add x9, x1, #2 -; CHECK-SD-NEXT: ld1 { v0.h }[2], [x8] -; CHECK-SD-NEXT: ld1 { v1.h }[2], [x9] -; CHECK-SD-NEXT: sub v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ldr s1, [x1] +; CHECK-SD-NEXT: usubl v0.4s, v0.4h, v1.4h ; CHECK-SD-NEXT: mov s1, v0.s[1] ; CHECK-SD-NEXT: str h0, [x0] ; CHECK-SD-NEXT: str h1, [x0, #2] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-ext-loads.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-ext-loads.ll index ba7bee9a94bac..a77c74ab67b80 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-ext-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-ext-loads.ll @@ -7,8 +7,10 @@ target triple = "aarch64-unknown-linux-gnu" define <4 x i32> @load_zext_v4i16i32(ptr %ap) vscale_range(2,0) #0 { ; CHECK-LABEL: load_zext_v4i16i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldp s0, s1, [x0] ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: ret %a = load <4 x i16>, ptr %ap %val = zext <4 x i16> %a to <4 x i32> @@ -97,8 +99,10 @@ define void @load_zext_v64i16i32(ptr %ap, ptr %b) #0 { define <4 x i32> @load_sext_v4i16i32(ptr %ap) vscale_range(2,0) #0 { ; CHECK-LABEL: load_sext_v4i16i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldp s0, s1, [x0] ; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: sshll v1.4s, v1.4h, #0 +; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: ret %a = load <4 x i16>, ptr %ap %val = sext <4 x i16> %a to <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll index 6fd5b820a2242..b457e0307fbe1 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll @@ -12,11 +12,10 @@ target triple = "aarch64-unknown-linux-gnu" define void @masked_gather_v2i8(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: masked_gather_v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: ldrb w9, [x0, #1] +; CHECK-NEXT: ldr h0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: cmeq v0.2s, v0.2s, #0 ; CHECK-NEXT: sshll v0.2d, v0.2s, #0 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 @@ -165,11 +164,9 @@ define void @masked_gather_v32i8(ptr %a, ptr %b) vscale_range(16,0) #0 { define void @masked_gather_v2i16(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: masked_gather_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: ldrh w9, [x0, #2] +; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: cmeq v0.2s, v0.2s, #0 ; CHECK-NEXT: sshll v0.2d, v0.2s, #0 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll index ed03f9b322432..4fb3bf7392d4e 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll @@ -12,11 +12,10 @@ target triple = "aarch64-unknown-linux-gnu" define void @masked_scatter_v2i8(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: masked_scatter_v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: ldrb w9, [x0, #1] +; CHECK-NEXT: ldr h0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: cmeq v1.2s, v0.2s, #0 ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: sshll v1.2d, v1.2s, #0 @@ -159,11 +158,9 @@ define void @masked_scatter_v32i8(ptr %a, ptr %b) vscale_range(16,0) #0 { define void @masked_scatter_v2i16(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: masked_scatter_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: ldrh w9, [x0, #2] +; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: cmeq v1.2s, v0.2s, #0 ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: sshll v1.2d, v1.2s, #0 diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll index 3cfb24aaccb11..cd02d18e61643 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -156,16 +156,12 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-SD-LABEL: v2i8: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldrb w8, [x0] -; CHECK-SD-NEXT: ldrb w9, [x1] +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ldr h1, [x1] ; CHECK-SD-NEXT: movi d2, #0x0000ff000000ff -; CHECK-SD-NEXT: ldrb w10, [x0, #1] -; CHECK-SD-NEXT: ldrb w11, [x1, #1] -; CHECK-SD-NEXT: fmov s0, w8 -; CHECK-SD-NEXT: fmov s1, w9 -; CHECK-SD-NEXT: mov v0.s[1], w10 -; CHECK-SD-NEXT: mov v1.s[1], w11 -; CHECK-SD-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-SD-NEXT: uaddl v0.4s, v0.4h, v1.4h ; CHECK-SD-NEXT: umin v0.2s, v0.2s, v2.2s ; CHECK-SD-NEXT: mov s1, v0.s[1] ; CHECK-SD-NEXT: str b0, [x2] @@ -210,16 +206,10 @@ define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind { define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-SD-LABEL: v2i16: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldrh w8, [x0] -; CHECK-SD-NEXT: ldrh w9, [x1] +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ldr s1, [x1] ; CHECK-SD-NEXT: movi d2, #0x00ffff0000ffff -; CHECK-SD-NEXT: ldrh w10, [x0, #2] -; CHECK-SD-NEXT: ldrh w11, [x1, #2] -; CHECK-SD-NEXT: fmov s0, w8 -; CHECK-SD-NEXT: fmov s1, w9 -; CHECK-SD-NEXT: mov v0.s[1], w10 -; CHECK-SD-NEXT: mov v1.s[1], w11 -; CHECK-SD-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: uaddl v0.4s, v0.4h, v1.4h ; CHECK-SD-NEXT: umin v0.2s, v0.2s, v2.2s ; CHECK-SD-NEXT: mov s1, v0.s[1] ; CHECK-SD-NEXT: str h0, [x2] diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll index a71cf95a728db..ef70137e6deee 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -156,14 +156,12 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-SD-LABEL: v2i8: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldrb w8, [x0] -; CHECK-SD-NEXT: ldrb w9, [x1] -; CHECK-SD-NEXT: ldrb w10, [x0, #1] -; CHECK-SD-NEXT: ldrb w11, [x1, #1] -; CHECK-SD-NEXT: fmov s0, w8 -; CHECK-SD-NEXT: fmov s1, w9 -; CHECK-SD-NEXT: mov v0.s[1], w10 -; CHECK-SD-NEXT: mov v1.s[1], w11 +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ldr h1, [x1] +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-SD-NEXT: uqsub v0.2s, v0.2s, v1.2s ; CHECK-SD-NEXT: mov s1, v0.s[1] ; CHECK-SD-NEXT: str b0, [x2] @@ -208,14 +206,10 @@ define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind { define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-SD-LABEL: v2i16: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldrh w8, [x0] -; CHECK-SD-NEXT: ldrh w9, [x1] -; CHECK-SD-NEXT: ldrh w10, [x0, #2] -; CHECK-SD-NEXT: ldrh w11, [x1, #2] -; CHECK-SD-NEXT: fmov s0, w8 -; CHECK-SD-NEXT: fmov s1, w9 -; CHECK-SD-NEXT: mov v0.s[1], w10 -; CHECK-SD-NEXT: mov v1.s[1], w11 +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ldr s1, [x1] +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-SD-NEXT: uqsub v0.2s, v0.2s, v1.2s ; CHECK-SD-NEXT: mov s1, v0.s[1] ; CHECK-SD-NEXT: str h0, [x2] diff --git a/llvm/test/CodeGen/AArch64/v3f-to-int.ll b/llvm/test/CodeGen/AArch64/v3f-to-int.ll index f6553b6acec9d..6d4061fb02cff 100644 --- a/llvm/test/CodeGen/AArch64/v3f-to-int.ll +++ b/llvm/test/CodeGen/AArch64/v3f-to-int.ll @@ -1,9 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s -; CHECK-LABEL: convert_v3f32 -; CHECK: strb -; CHECK: strh define void @convert_v3f32() { +; CHECK-LABEL: convert_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: str wzr, [sp, #12] +; CHECK-NEXT: ldr s0, [sp, #12] +; CHECK-NEXT: strb wzr, [x8] +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: str h0, [x8] +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret entry: br label %bb diff --git a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll index 80029fb717575..ee74984125f77 100644 --- a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll +++ b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll @@ -896,16 +896,13 @@ define <2 x i8> @vector_to_vector_cast(<16 x i1> %arg) nounwind { ; CHECK-SD-NEXT: shl.16b v0, v0, #7 ; CHECK-SD-NEXT: adrp x8, lCPI20_0@PAGE ; CHECK-SD-NEXT: ldr q1, [x8, lCPI20_0@PAGEOFF] -; CHECK-SD-NEXT: add x8, sp, #14 ; CHECK-SD-NEXT: cmlt.16b v0, v0, #0 ; CHECK-SD-NEXT: and.16b v0, v0, v1 ; CHECK-SD-NEXT: ext.16b v1, v0, v0, #8 ; CHECK-SD-NEXT: zip1.16b v0, v0, v1 ; CHECK-SD-NEXT: addv.8h h0, v0 -; CHECK-SD-NEXT: str h0, [sp, #14] -; CHECK-SD-NEXT: ld1.b { v0 }[0], [x8] -; CHECK-SD-NEXT: orr x8, x8, #0x1 -; CHECK-SD-NEXT: ld1.b { v0 }[4], [x8] +; CHECK-SD-NEXT: ushll.8h v0, v0, #0 +; CHECK-SD-NEXT: ushll.4s v0, v0, #0 ; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: add sp, sp, #16 ; CHECK-SD-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/vec3-loads-ext-trunc-stores.ll b/llvm/test/CodeGen/AArch64/vec3-loads-ext-trunc-stores.ll index 7d3f5bc270d6b..60414adba75fc 100644 --- a/llvm/test/CodeGen/AArch64/vec3-loads-ext-trunc-stores.ll +++ b/llvm/test/CodeGen/AArch64/vec3-loads-ext-trunc-stores.ll @@ -372,13 +372,13 @@ define void @store_trunc_from_64bits(ptr %src, ptr %dst) { ; BE-NEXT: ldr s0, [x0] ; BE-NEXT: ldrh w8, [x0, #4] ; BE-NEXT: rev32 v0.4h, v0.4h +; BE-NEXT: strb w8, [x1, #2] ; BE-NEXT: mov v0.h[2], w8 ; BE-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; BE-NEXT: rev32 v0.16b, v0.16b -; BE-NEXT: str s0, [sp, #12] -; BE-NEXT: ldrh w9, [sp, #12] -; BE-NEXT: strb w8, [x1, #2] -; BE-NEXT: strh w9, [x1] +; BE-NEXT: rev32 v0.4h, v0.4h +; BE-NEXT: ushll v0.4s, v0.4h, #0 +; BE-NEXT: str h0, [x1] ; BE-NEXT: add sp, sp, #16 ; BE-NEXT: ret entry: @@ -422,10 +422,10 @@ define void @store_trunc_add_from_64bits(ptr %src, ptr %dst) { ; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b ; BE-NEXT: mov h0, v0.h[2] ; BE-NEXT: rev32 v1.16b, v1.16b -; BE-NEXT: str s1, [sp, #12] -; BE-NEXT: ldrh w8, [sp, #12] ; BE-NEXT: stur b0, [x1, #2] -; BE-NEXT: strh w8, [x1] +; BE-NEXT: rev32 v1.4h, v1.4h +; BE-NEXT: ushll v1.4s, v1.4h, #0 +; BE-NEXT: str h1, [x1] ; BE-NEXT: add sp, sp, #16 ; BE-NEXT: ret entry: @@ -604,10 +604,10 @@ define void @shift_trunc_store(ptr %src, ptr %dst) { ; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b ; BE-NEXT: mov h0, v0.h[2] ; BE-NEXT: rev32 v1.16b, v1.16b -; BE-NEXT: str s1, [sp, #12] -; BE-NEXT: ldrh w8, [sp, #12] ; BE-NEXT: stur b0, [x1, #2] -; BE-NEXT: strh w8, [x1] +; BE-NEXT: rev32 v1.4h, v1.4h +; BE-NEXT: ushll v1.4s, v1.4h, #0 +; BE-NEXT: str h1, [x1] ; BE-NEXT: add sp, sp, #16 ; BE-NEXT: ret %l = load <3 x i32>, ptr %src @@ -638,10 +638,10 @@ define void @shift_trunc_store_default_align(ptr %src, ptr %dst) { ; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b ; BE-NEXT: mov h0, v0.h[2] ; BE-NEXT: rev32 v1.16b, v1.16b -; BE-NEXT: str s1, [sp, #12] -; BE-NEXT: ldrh w8, [sp, #12] ; BE-NEXT: stur b0, [x1, #2] -; BE-NEXT: strh w8, [x1] +; BE-NEXT: rev32 v1.4h, v1.4h +; BE-NEXT: ushll v1.4s, v1.4h, #0 +; BE-NEXT: str h1, [x1] ; BE-NEXT: add sp, sp, #16 ; BE-NEXT: ret %l = load <3 x i32>, ptr %src @@ -672,10 +672,10 @@ define void @shift_trunc_store_align_4(ptr %src, ptr %dst) { ; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b ; BE-NEXT: mov h0, v0.h[2] ; BE-NEXT: rev32 v1.16b, v1.16b -; BE-NEXT: str s1, [sp, #12] -; BE-NEXT: ldrh w8, [sp, #12] ; BE-NEXT: stur b0, [x1, #2] -; BE-NEXT: strh w8, [x1] +; BE-NEXT: rev32 v1.4h, v1.4h +; BE-NEXT: ushll v1.4s, v1.4h, #0 +; BE-NEXT: str h1, [x1] ; BE-NEXT: add sp, sp, #16 ; BE-NEXT: ret %l = load <3 x i32>, ptr %src @@ -706,10 +706,10 @@ define void @shift_trunc_store_const_offset_1(ptr %src, ptr %dst) { ; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b ; BE-NEXT: mov h0, v0.h[2] ; BE-NEXT: rev32 v1.16b, v1.16b -; BE-NEXT: str s1, [sp, #12] -; BE-NEXT: ldrh w8, [sp, #12] ; BE-NEXT: stur b0, [x1, #3] -; BE-NEXT: sturh w8, [x1, #1] +; BE-NEXT: rev32 v1.4h, v1.4h +; BE-NEXT: ushll v1.4s, v1.4h, #0 +; BE-NEXT: stur h1, [x1, #1] ; BE-NEXT: add sp, sp, #16 ; BE-NEXT: ret %l = load <3 x i32>, ptr %src @@ -741,10 +741,10 @@ define void @shift_trunc_store_const_offset_3(ptr %src, ptr %dst) { ; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b ; BE-NEXT: mov h0, v0.h[2] ; BE-NEXT: rev32 v1.16b, v1.16b -; BE-NEXT: str s1, [sp, #12] -; BE-NEXT: ldrh w8, [sp, #12] ; BE-NEXT: stur b0, [x1, #5] -; BE-NEXT: sturh w8, [x1, #3] +; BE-NEXT: rev32 v1.4h, v1.4h +; BE-NEXT: ushll v1.4s, v1.4h, #0 +; BE-NEXT: stur h1, [x1, #3] ; BE-NEXT: add sp, sp, #16 ; BE-NEXT: ret %l = load <3 x i32>, ptr %src @@ -764,10 +764,9 @@ define void @shift_trunc_volatile_store(ptr %src, ptr %dst) { ; CHECK-NEXT: shrn.4h v0, v0, #16 ; CHECK-NEXT: uzp1.8b v1, v0, v0 ; CHECK-NEXT: mov h0, v0[2] -; CHECK-NEXT: str s1, [sp, #12] -; CHECK-NEXT: ldrh w8, [sp, #12] +; CHECK-NEXT: ushll.4s v1, v1, #0 ; CHECK-NEXT: stur b0, [x1, #2] -; CHECK-NEXT: strh w8, [x1] +; CHECK-NEXT: str h1, [x1] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; @@ -780,10 +779,10 @@ define void @shift_trunc_volatile_store(ptr %src, ptr %dst) { ; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b ; BE-NEXT: mov h0, v0.h[2] ; BE-NEXT: rev32 v1.16b, v1.16b -; BE-NEXT: str s1, [sp, #12] -; BE-NEXT: ldrh w8, [sp, #12] ; BE-NEXT: stur b0, [x1, #2] -; BE-NEXT: strh w8, [x1] +; BE-NEXT: rev32 v1.4h, v1.4h +; BE-NEXT: ushll v1.4s, v1.4h, #0 +; BE-NEXT: str h1, [x1] ; BE-NEXT: add sp, sp, #16 ; BE-NEXT: ret %l = load <3 x i32>, ptr %src @@ -832,10 +831,10 @@ define void @load_v3i8_zext_to_3xi32_add_trunc_store(ptr %src) { ; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b ; BE-NEXT: mov h0, v0.h[2] ; BE-NEXT: rev32 v1.16b, v1.16b -; BE-NEXT: str s1, [sp, #8] -; BE-NEXT: ldrh w8, [sp, #8] ; BE-NEXT: stur b0, [x0, #2] -; BE-NEXT: strh w8, [x0] +; BE-NEXT: rev32 v1.4h, v1.4h +; BE-NEXT: ushll v1.4s, v1.4h, #0 +; BE-NEXT: str h1, [x0] ; BE-NEXT: add sp, sp, #16 ; BE-NEXT: ret %l = load <3 x i8>, ptr %src, align 1 @@ -885,10 +884,10 @@ define void @load_v3i8_sext_to_3xi32_add_trunc_store(ptr %src) { ; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b ; BE-NEXT: mov h0, v0.h[2] ; BE-NEXT: rev32 v1.16b, v1.16b -; BE-NEXT: str s1, [sp, #8] -; BE-NEXT: ldrh w8, [sp, #8] ; BE-NEXT: stur b0, [x0, #2] -; BE-NEXT: strh w8, [x0] +; BE-NEXT: rev32 v1.4h, v1.4h +; BE-NEXT: ushll v1.4s, v1.4h, #0 +; BE-NEXT: str h1, [x0] ; BE-NEXT: add sp, sp, #16 ; BE-NEXT: ret %l = load <3 x i8>, ptr %src, align 1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.ll new file mode 100644 index 0000000000000..84ac58f899717 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.ll @@ -0,0 +1,165 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-FAKE16 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-TRUE16 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-FAKE16 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-TRUE16 %s + +define amdgpu_ps half @fmul_s16_uniform(half inreg %a, half inreg %b) { +; GFX11-FAKE16-LABEL: fmul_s16_uniform: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: v_mul_f16_e64 v0, s0, s1 +; GFX11-FAKE16-NEXT: ; return to shader part epilog +; +; GFX11-TRUE16-LABEL: fmul_s16_uniform: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: v_mul_f16_e64 v0.l, s0, s1 +; GFX11-TRUE16-NEXT: ; return to shader part epilog +; +; GFX12-LABEL: fmul_s16_uniform: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_mul_f16 s0, s0, s1 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) +; GFX12-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-NEXT: ; return to shader part epilog + %result = fmul half %a, %b + ret half %result +} + +define amdgpu_ps half @fmul_s16_div(half %a, half %b) { +; GFX11-FAKE16-LABEL: fmul_s16_div: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX11-FAKE16-NEXT: ; return to shader part epilog +; +; GFX11-TRUE16-LABEL: fmul_s16_div: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l +; GFX11-TRUE16-NEXT: ; return to shader part epilog +; +; GFX12-FAKE16-LABEL: fmul_s16_div: +; GFX12-FAKE16: ; %bb.0: +; GFX12-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX12-FAKE16-NEXT: ; return to shader part epilog +; +; GFX12-TRUE16-LABEL: fmul_s16_div: +; GFX12-TRUE16: ; %bb.0: +; GFX12-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l +; GFX12-TRUE16-NEXT: ; return to shader part epilog + %result = fmul half %a, %b + ret half %result +} + +define amdgpu_ps float @fmul_s32_uniform(float inreg %a, float inreg %b) { +; GFX11-LABEL: fmul_s32_uniform: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_mul_f32_e64 v0, s0, s1 +; GFX11-NEXT: ; return to shader part epilog +; +; GFX12-LABEL: fmul_s32_uniform: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_mul_f32 s0, s0, s1 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) +; GFX12-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-NEXT: ; return to shader part epilog + %result = fmul float %a, %b + ret float %result +} + +define amdgpu_ps float @fmul_s32_div(float %a, float %b) { +; GCN-LABEL: fmul_s32_div: +; GCN: ; %bb.0: +; GCN-NEXT: v_mul_f32_e32 v0, v0, v1 +; GCN-NEXT: ; return to shader part epilog + %result = fmul float %a, %b + ret float %result +} + +define amdgpu_ps void @fmul_s64_uniform(double inreg %a, double inreg %b, ptr addrspace(1) %ptr) { +; GFX11-LABEL: fmul_s64_uniform: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_mul_f64 v[2:3], s[0:1], s[2:3] +; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: fmul_s64_uniform: +; GFX12: ; %bb.0: +; GFX12-NEXT: v_mul_f64_e64 v[2:3], s[0:1], s[2:3] +; GFX12-NEXT: global_store_b64 v[0:1], v[2:3], off +; GFX12-NEXT: s_endpgm + %result = fmul double %a, %b + store double %result, ptr addrspace(1) %ptr + ret void +} + +define amdgpu_ps void @fmul_s64_div(double %a, double %b, ptr addrspace(1) %ptr) { +; GFX11-LABEL: fmul_s64_div: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX11-NEXT: global_store_b64 v[4:5], v[0:1], off +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: fmul_s64_div: +; GFX12: ; %bb.0: +; GFX12-NEXT: v_mul_f64_e32 v[0:1], v[0:1], v[2:3] +; GFX12-NEXT: global_store_b64 v[4:5], v[0:1], off +; GFX12-NEXT: s_endpgm + %result = fmul double %a, %b + store double %result, ptr addrspace(1) %ptr + ret void +} + +define amdgpu_ps <2 x half> @fmul_v2s16_uniform(<2 x half> inreg %a, <2 x half> inreg %b) { +; GFX11-LABEL: fmul_v2s16_uniform: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_pk_mul_f16 v0, s0, s1 +; GFX11-NEXT: ; return to shader part epilog +; +; GFX12-LABEL: fmul_v2s16_uniform: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_lshr_b32 s2, s0, 16 +; GFX12-NEXT: s_lshr_b32 s3, s1, 16 +; GFX12-NEXT: s_mul_f16 s0, s0, s1 +; GFX12-NEXT: s_mul_f16 s1, s2, s3 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: s_pack_ll_b32_b16 s0, s0, s1 +; GFX12-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-NEXT: ; return to shader part epilog + %result = fmul <2 x half> %a, %b + ret <2 x half> %result +} + +define amdgpu_ps <2 x half> @fmul_v2s16_div(<2 x half> %a, <2 x half> %b) { +; GCN-LABEL: fmul_v2s16_div: +; GCN: ; %bb.0: +; GCN-NEXT: v_pk_mul_f16 v0, v0, v1 +; GCN-NEXT: ; return to shader part epilog + %result = fmul <2 x half> %a, %b + ret <2 x half> %result +} + +define amdgpu_ps <2 x float> @fmul_v2s32_uniform(<2 x float> inreg %a, <2 x float> inreg %b) { +; GFX11-LABEL: fmul_v2s32_uniform: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_mul_f32_e64 v0, s0, s2 +; GFX11-NEXT: v_mul_f32_e64 v1, s1, s3 +; GFX11-NEXT: ; return to shader part epilog +; +; GFX12-LABEL: fmul_v2s32_uniform: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_mul_f32 s0, s0, s2 +; GFX12-NEXT: s_mul_f32 s1, s1, s3 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) +; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX12-NEXT: ; return to shader part epilog + %result = fmul <2 x float> %a, %b + ret <2 x float> %result +} + +define amdgpu_ps <2 x float> @fmul_v2s32_div(<2 x float> %a, <2 x float> %b) { +; GCN-LABEL: fmul_v2s32_div: +; GCN: ; %bb.0: +; GCN-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3 +; GCN-NEXT: ; return to shader part epilog + %result = fmul <2 x float> %a, %b + ret <2 x float> %result +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll index e03aa18d3147f..1220c0e3b1ead 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll @@ -4,6 +4,8 @@ ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s +; TODO: Switch test to use -new-reg-bank-select after adding G_FNEG support. + define <2 x half> @v_fmul_v2f16(<2 x half> %a, <2 x half> %b) { ; GFX9-LABEL: v_fmul_v2f16: ; GFX9: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fmul.mir index 5766c05426b2d..f289566a27c12 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fmul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fmul.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s --- name: fmul_ss @@ -17,6 +17,7 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY2]], [[COPY3]] + ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[FMUL]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_FMUL %0, %1 diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-mma-tensor-formatted.ll b/llvm/test/CodeGen/NVPTX/tcgen05-mma-tensor-formatted.ll new file mode 100644 index 0000000000000..479de53dd90f2 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/tcgen05-mma-tensor-formatted.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; NOTE: This sample test demonstrates the pretty print feature for NVPTX intrinsics +; RUN: llvm-as < %s | llvm-dis | FileCheck %s + +target triple = "nvptx64-nvidia-cuda" + +define void @tcgen05_mma_fp16_cta1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d) { + ; CHECK-LABEL: define void @tcgen05_mma_fp16_cta1( + ; CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, /* kind=f16 */ i32 0, /* cta_group= */ i32 1, /* collector=discard */ i32 0) + call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 0, i32 1, i32 0) + + ; CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, /* kind=f16 */ i32 0, /* cta_group= */ i32 1, /* collector=lastuse */ i32 1) + call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 0, i32 1, i32 1) + + ; CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, /* kind=f16 */ i32 0, /* cta_group= */ i32 1, /* collector=fill */ i32 2) + call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 0, i32 1, i32 2) + + ; CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, /* kind=f16 */ i32 0, /* cta_group= */ i32 1, /* collector=use */ i32 3) + call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 0, i32 1, i32 3) + + ret void +} + +define void @tcgen05_mma_f8f6f4_cta2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d) { + ; CHECK-LABEL: define void @tcgen05_mma_f8f6f4_cta2( + ; CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, /* kind=f8f6f4 */ i32 2, /* cta_group= */ i32 2, /* collector=discard */ i32 0) + call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 2, i32 2, i32 0) + + ; CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, /* kind=f8f6f4 */ i32 2, /* cta_group= */ i32 2, /* collector=lastuse */ i32 1) + call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 2, i32 2, i32 1) + + ; CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, /* kind=f8f6f4 */ i32 2, /* cta_group= */ i32 2, /* collector=fill */ i32 2) + call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 2, i32 2, i32 2) + + ; CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, /* kind=f8f6f4 */ i32 2, /* cta_group= */ i32 2, /* collector=use */ i32 3) + call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 2, i32 2, i32 3) + + ret void +} + +; This test verifies that printImmArg is safe to call on all constant arguments, but only prints comments for arguments that have pretty printing configured. +define void @test_mixed_constants_edge_case(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor) { + ; CHECK-LABEL: define void @test_mixed_constants_edge_case( + ; CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 42, i32 100, i1 true, /* kind=i8 */ i32 3, /* cta_group= */ i32 1, /* collector=discard */ i32 0) + call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 42, i32 100, i1 true, i32 3, i32 1, i32 0) + + ret void +} + +declare void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6), ptr addrspace(6), i64, i32, i1, i32, i32, i32) diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index 4ab4ff84dac57..fb26b8b16a290 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -5016,3 +5016,74 @@ define ptr @shl_add_knownbits(ptr %p, i64 %i) { %r = getelementptr i8, ptr %p, i64 %shr ret ptr %r } + +define i64 @exactashr1mul6(i64 %a) { +; RV64I-LABEL: exactashr1mul6: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: exactashr1mul6: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh1add a0, a0, a0 +; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: exactashr1mul6: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a0 +; RV64XANDESPERF-NEXT: ret + %c = ashr exact i64 %a, 1 + %d = mul i64 %c, 6 + ret i64 %d +} + +define i64 @exactlshr3mul22(i64 %a) { +; RV64I-LABEL: exactlshr3mul22: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a0, a0, 3 +; RV64I-NEXT: li a1, 22 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: exactlshr3mul22: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: srli a0, a0, 2 +; RV64ZBA-NEXT: sh2add a1, a0, a0 +; RV64ZBA-NEXT: sh1add a0, a1, a0 +; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: exactlshr3mul22: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: srli a0, a0, 2 +; RV64XANDESPERF-NEXT: nds.lea.w a1, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a1 +; RV64XANDESPERF-NEXT: ret + %c = lshr exact i64 %a, 3 + %d = mul i64 %c, 22 + ret i64 %d +} + +define i64 @exactashr1mul36(i64 %a) { +; RV64I-LABEL: exactashr1mul36: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: slli a0, a0, 4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: exactashr1mul36: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: slli a0, a0, 1 +; RV64ZBA-NEXT: sh3add a0, a0, a0 +; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: exactashr1mul36: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a0, a0, 1 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV64XANDESPERF-NEXT: ret + %c = ashr exact i64 %a, 1 + %d = mul i64 %c, 36 + ret i64 %d +} diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll index 0fbfb42d2a4dd..78a02b11b17bb 100644 --- a/llvm/test/CodeGen/X86/O0-pipeline.ll +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll @@ -68,8 +68,6 @@ ; CHECK-NEXT: X86 Indirect Branch Tracking ; CHECK-NEXT: X86 vzeroupper inserter ; CHECK-NEXT: Compressing EVEX instrs when possible -; CHECK-NEXT: X86 Discriminate Memory Operands -; CHECK-NEXT: X86 Insert Cache Prefetches ; CHECK-NEXT: X86 insert wait instruction ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: Remove Loads Into Fake Uses diff --git a/llvm/test/CodeGen/X86/discriminate-mem-ops-missing-info.ll b/llvm/test/CodeGen/X86/discriminate-mem-ops-missing-info.ll deleted file mode 100644 index 6bbf3eb307da3..0000000000000 --- a/llvm/test/CodeGen/X86/discriminate-mem-ops-missing-info.ll +++ /dev/null @@ -1,55 +0,0 @@ -; RUN: llc -x86-discriminate-memops < %s | FileCheck %s -; -; original source, compiled with -O3 -gmlt -fdebug-info-for-profiling: -; int sum(int* arr, int pos1, int pos2) { -; return arr[pos1] + arr[pos2]; -; } -; -; ModuleID = 'test.cc' -source_filename = "test.cc" -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -declare void @llvm.prefetch(ptr, i32, i32, i32) -; Function Attrs: norecurse nounwind readonly uwtable -define i32 @sum(ptr %arr, i32 %pos1, i32 %pos2) !dbg !7 { -entry: - %idxprom = sext i32 %pos1 to i64 - %arrayidx = getelementptr inbounds i32, ptr %arr, i64 %idxprom - %0 = load i32, ptr %arrayidx, align 4 - %idxprom1 = sext i32 %pos2 to i64 - %arrayidx2 = getelementptr inbounds i32, ptr %arr, i64 %idxprom1 - %1 = load i32, ptr %arrayidx2, align 4 - %add = add nsw i32 %1, %0, !dbg !15 - ret i32 %add -} - -attributes #0 = { "target-cpu"="x86-64" } - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5} -!llvm.ident = !{!6} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, debugInfoForProfiling: true) -!1 = !DIFile(filename: "test.cc", directory: "/tmp") -!2 = !{} -!3 = !{i32 2, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 4} -!6 = !{!"clang version 7.0.0 (trunk 322155) (llvm/trunk 322159)"} -!7 = distinct !DISubprogram(name: "sum", linkageName: "sum", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0) -!8 = !DISubroutineType(types: !2) -!9 = !DILocation(line: 2, column: 10, scope: !7) -!10 = !{!11, !11, i64 0} -!11 = !{!"int", !12, i64 0} -!12 = !{!"omnipotent char", !13, i64 0} -!13 = !{!"Simple C++ TBAA"} -!15 = !DILocation(line: 2, column: 20, scope: !7) - - -;CHECK-LABEL: sum: -;CHECK: # %bb.0: -;CHECK: .loc 1 1 0 {{.*}} discriminator 2 -;CHECK-NEXT: movl (%rdi,%rax,4), %eax -;CHECK-NEXT: .loc 1 2 20 -;CHECK-NEXT: addl (%rdi,%rcx,4), %eax diff --git a/llvm/test/CodeGen/X86/discriminate-mem-ops-skip-pfetch.ll b/llvm/test/CodeGen/X86/discriminate-mem-ops-skip-pfetch.ll deleted file mode 100644 index ca412c590b2e3..0000000000000 --- a/llvm/test/CodeGen/X86/discriminate-mem-ops-skip-pfetch.ll +++ /dev/null @@ -1,68 +0,0 @@ -; RUN: llc -x86-discriminate-memops < %s | FileCheck %s -; RUN: llc -x86-discriminate-memops -x86-bypass-prefetch-instructions=0 < %s | FileCheck %s -check-prefix=NOBYPASS -; -; original source, compiled with -O3 -gmlt -fdebug-info-for-profiling: -; int sum(int* arr, int pos1, int pos2) { -; return arr[pos1] + arr[pos2]; -; } -; -; ModuleID = 'test.cc' -source_filename = "test.cc" -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -declare void @llvm.prefetch(ptr, i32, i32, i32) -; Function Attrs: norecurse nounwind readonly uwtable -define i32 @sum(ptr %arr, i32 %pos1, i32 %pos2) !dbg !7 { -entry: - %idxprom = sext i32 %pos1 to i64, !dbg !9 - %arrayidx = getelementptr inbounds i32, ptr %arr, i64 %idxprom, !dbg !9 - %0 = load i32, ptr %arrayidx, align 4, !dbg !9, !tbaa !10 - %idxprom1 = sext i32 %pos2 to i64, !dbg !14 - %arrayidx2 = getelementptr inbounds i32, ptr %arr, i64 %idxprom1, !dbg !14 - call void @llvm.prefetch(ptr %arrayidx2, i32 0, i32 3, i32 1) - %1 = load i32, ptr %arrayidx2, align 4, !dbg !14, !tbaa !10 - %add = add nsw i32 %1, %0, !dbg !15 - ret i32 %add, !dbg !16 -} - -attributes #0 = { "target-cpu"="x86-64" } - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5} -!llvm.ident = !{!6} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, debugInfoForProfiling: true) -!1 = !DIFile(filename: "test.cc", directory: "/tmp") -!2 = !{} -!3 = !{i32 2, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 4} -!6 = !{!"clang version 7.0.0 (trunk 322155) (llvm/trunk 322159)"} -!7 = distinct !DISubprogram(name: "sum", linkageName: "sum", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0) -!8 = !DISubroutineType(types: !2) -!9 = !DILocation(line: 2, column: 10, scope: !7) -!10 = !{!11, !11, i64 0} -!11 = !{!"int", !12, i64 0} -!12 = !{!"omnipotent char", !13, i64 0} -!13 = !{!"Simple C++ TBAA"} -!14 = !DILocation(line: 2, column: 22, scope: !7) -!15 = !DILocation(line: 2, column: 20, scope: !7) -!16 = !DILocation(line: 2, column: 3, scope: !7) - -;CHECK-LABEL: sum: -;CHECK: # %bb.0: -;CHECK: prefetcht0 (%rdi,%rax,4) -;CHECK-NEXT: movl (%rdi,%rax,4), %eax -;CHECK-NEXT: .loc 1 2 20 discriminator 2 # test.cc:2:20 -;CHECK-NEXT: addl (%rdi,%rcx,4), %eax -;CHECK-NEXT: .loc 1 2 3 # test.cc:2:3 - -;NOBYPASS-LABEL: sum: -;NOBYPASS: # %bb.0: -;NOBYPASS: prefetcht0 (%rdi,%rax,4) -;NOBYPASS-NEXT: .loc 1 2 22 -;NOBYPASS-NEXT: movl (%rdi,%rax,4), %eax -;NOBYPASS-NEXT: .loc 1 2 20 {{.*}} discriminator 2 # test.cc:2:20 -;NOBYPASS-NEXT: addl (%rdi,%rcx,4), %eax -;NOBYPASS-NEXT: .loc 1 2 3 # test.cc:2:3 diff --git a/llvm/test/CodeGen/X86/discriminate-mem-ops.ll b/llvm/test/CodeGen/X86/discriminate-mem-ops.ll deleted file mode 100644 index a8421d9506a87..0000000000000 --- a/llvm/test/CodeGen/X86/discriminate-mem-ops.ll +++ /dev/null @@ -1,55 +0,0 @@ -; RUN: llc -x86-discriminate-memops < %s | FileCheck %s -; -; original source, compiled with -O3 -gmlt -fdebug-info-for-profiling: -; int sum(int* arr, int pos1, int pos2) { -; return arr[pos1] + arr[pos2]; -; } -; -; ModuleID = 'test.cc' -source_filename = "test.cc" -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: norecurse nounwind readonly uwtable -define i32 @sum(ptr %arr, i32 %pos1, i32 %pos2) !dbg !7 { -entry: - %idxprom = sext i32 %pos1 to i64, !dbg !9 - %arrayidx = getelementptr inbounds i32, ptr %arr, i64 %idxprom, !dbg !9 - %0 = load i32, ptr %arrayidx, align 4, !dbg !9, !tbaa !10 - %idxprom1 = sext i32 %pos2 to i64, !dbg !14 - %arrayidx2 = getelementptr inbounds i32, ptr %arr, i64 %idxprom1, !dbg !14 - %1 = load i32, ptr %arrayidx2, align 4, !dbg !14, !tbaa !10 - %add = add nsw i32 %1, %0, !dbg !15 - ret i32 %add, !dbg !16 -} - -attributes #0 = { "target-cpu"="x86-64" } - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5} -!llvm.ident = !{!6} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, debugInfoForProfiling: true) -!1 = !DIFile(filename: "test.cc", directory: "/tmp") -!2 = !{} -!3 = !{i32 2, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 4} -!6 = !{!"clang version 7.0.0 (trunk 322155) (llvm/trunk 322159)"} -!7 = distinct !DISubprogram(name: "sum", linkageName: "sum", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0) -!8 = !DISubroutineType(types: !2) -!9 = !DILocation(line: 2, column: 10, scope: !7) -!10 = !{!11, !11, i64 0} -!11 = !{!"int", !12, i64 0} -!12 = !{!"omnipotent char", !13, i64 0} -!13 = !{!"Simple C++ TBAA"} -!14 = !DILocation(line: 2, column: 22, scope: !7) -!15 = !DILocation(line: 2, column: 20, scope: !7) -!16 = !DILocation(line: 2, column: 3, scope: !7) - -;CHECK-LABEL: sum: -;CHECK: # %bb.0: -;CHECK: movl (%rdi,%rax,4), %eax -;CHECK-NEXT: .loc 1 2 20 discriminator 2 # test.cc:2:20 -;CHECK-NEXT: addl (%rdi,%rcx,4), %eax -;CHECK-NEXT: .loc 1 2 3 # test.cc:2:3 diff --git a/llvm/test/CodeGen/X86/insert-prefetch-inline.afdo b/llvm/test/CodeGen/X86/insert-prefetch-inline.afdo deleted file mode 100644 index 935b707ff1072..0000000000000 --- a/llvm/test/CodeGen/X86/insert-prefetch-inline.afdo +++ /dev/null @@ -1,4 +0,0 @@ -caller:0:0 - 2: sum:0 - 3: 0 __prefetch_nta_0:23456 - 3.1: 0 __prefetch_nta_0:8764 __prefetch_nta_1:64 \ No newline at end of file diff --git a/llvm/test/CodeGen/X86/insert-prefetch-inline.ll b/llvm/test/CodeGen/X86/insert-prefetch-inline.ll deleted file mode 100644 index 05f542799c08b..0000000000000 --- a/llvm/test/CodeGen/X86/insert-prefetch-inline.ll +++ /dev/null @@ -1,76 +0,0 @@ -; RUN: llc < %s -x86-discriminate-memops -prefetch-hints-file=%S/insert-prefetch-inline.afdo | FileCheck %s -; -; Verify we can insert prefetch instructions in code belonging to inlined -; functions. -; -; ModuleID = 'test.cc' - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: norecurse nounwind readonly uwtable -define dso_local i32 @sum(ptr nocapture readonly %arr, i32 %pos1, i32 %pos2) local_unnamed_addr #0 !dbg !7 { -entry: - %idxprom = sext i32 %pos1 to i64, !dbg !10 - %arrayidx = getelementptr inbounds i32, ptr %arr, i64 %idxprom, !dbg !10 - %0 = load i32, ptr %arrayidx, align 4, !dbg !10, !tbaa !11 - %idxprom1 = sext i32 %pos2 to i64, !dbg !15 - %arrayidx2 = getelementptr inbounds i32, ptr %arr, i64 %idxprom1, !dbg !15 - %1 = load i32, ptr %arrayidx2, align 4, !dbg !15, !tbaa !11 - %add = add nsw i32 %1, %0, !dbg !16 - ret i32 %add, !dbg !17 -} - -; "caller" inlines "sum". The associated .afdo file references instructions -; in "caller" that came from "sum"'s inlining. -; -; Function Attrs: norecurse nounwind readonly uwtable -define dso_local i32 @caller(ptr nocapture readonly %arr) local_unnamed_addr #0 !dbg !18 { -entry: - %0 = load i32, ptr %arr, align 4, !dbg !19, !tbaa !11 - %arrayidx2.i = getelementptr inbounds i32, ptr %arr, i64 2, !dbg !21 - %1 = load i32, ptr %arrayidx2.i, align 4, !dbg !21, !tbaa !11 - %add.i = add nsw i32 %1, %0, !dbg !22 - ret i32 %add.i, !dbg !23 -} - -attributes #0 = { "target-cpu"="x86-64" } - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5} -!llvm.ident = !{!6} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 7.0.0 (trunk 324940) (llvm/trunk 324941)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, debugInfoForProfiling: true) -!1 = !DIFile(filename: "test.cc", directory: "/tmp") -!2 = !{} -!3 = !{i32 2, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 4} -!6 = !{!"clang version 7.0.0 (trunk 324940) (llvm/trunk 324941)"} -!7 = distinct !DISubprogram(name: "sum", linkageName: "sum", scope: !8, file: !8, line: 3, type: !9, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0) -!8 = !DIFile(filename: "./test.h", directory: "/tmp") -!9 = !DISubroutineType(types: !2) -!10 = !DILocation(line: 6, column: 10, scope: !7) -!11 = !{!12, !12, i64 0} -!12 = !{!"int", !13, i64 0} -!13 = !{!"omnipotent char", !14, i64 0} -!14 = !{!"Simple C++ TBAA"} -!15 = !DILocation(line: 6, column: 22, scope: !7) -!16 = !DILocation(line: 6, column: 20, scope: !7) -!17 = !DILocation(line: 6, column: 3, scope: !7) -!18 = distinct !DISubprogram(name: "caller", linkageName: "caller", scope: !1, file: !1, line: 4, type: !9, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: true, unit: !0) -!19 = !DILocation(line: 6, column: 10, scope: !7, inlinedAt: !20) -!20 = distinct !DILocation(line: 6, column: 10, scope: !18) -!21 = !DILocation(line: 6, column: 22, scope: !7, inlinedAt: !20) -!22 = !DILocation(line: 6, column: 20, scope: !7, inlinedAt: !20) -!23 = !DILocation(line: 6, column: 3, scope: !18) - -; CHECK-LABEL: caller: -; CHECK-LABEL: # %bb.0: -; CHECK-NEXT: .loc 1 6 22 prologue_end -; CHECK-NEXT: prefetchnta 23464(%rdi) -; CHECK-NEXT: movl 8(%rdi), %eax -; CHECK-NEXT: .loc 1 6 20 is_stmt 0 discriminator 2 -; CHECK-NEXT: prefetchnta 8764(%rdi) -; CHECK-NEXT: prefetchnta 64(%rdi) -; CHECK-NEXT: addl (%rdi), %eax diff --git a/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.afdo b/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.afdo deleted file mode 100644 index 6385a498b8f92..0000000000000 --- a/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.afdo +++ /dev/null @@ -1,2 +0,0 @@ -main:0:0 - 6: 0 __prefetch_nta_0:42 \ No newline at end of file diff --git a/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll b/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll deleted file mode 100644 index f8e25028cfdee..0000000000000 --- a/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll +++ /dev/null @@ -1,41 +0,0 @@ -; RUN: llc < %s -x86-discriminate-memops -prefetch-hints-file=%S/insert-prefetch-invalid-instr.afdo | FileCheck %s -; ModuleID = 'prefetch.cc' -source_filename = "prefetch.cc" -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: norecurse nounwind uwtable -define dso_local i32 @main() local_unnamed_addr #0 !dbg !7 { -entry: - tail call void @llvm.prefetch(ptr inttoptr (i64 291 to ptr), i32 0, i32 0, i32 1), !dbg !9 - ret i32 291, !dbg !11 -} - -; Function Attrs: inaccessiblemem_or_argmemonly nounwind -declare void @llvm.prefetch(ptr nocapture readonly, i32, i32, i32) #1 - -attributes #0 = {"target-cpu"="x86-64" "target-features"="+sse4.2,+ssse3"} -attributes #1 = { inaccessiblemem_or_argmemonly nounwind } -attributes #2 = { argmemonly nounwind } - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5} -!llvm.ident = !{!6} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, debugInfoForProfiling: true) -!1 = !DIFile(filename: "prefetch.cc", directory: "/tmp") -!2 = !{} -!3 = !{i32 2, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 4} -!6 = !{!"clang version 7.0.0 (trunk 327078) (llvm/trunk 327086)"} -!7 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 8, type: !8, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: true, unit: !0) -!8 = !DISubroutineType(types: !2) -!9 = !DILocation(line: 12, column: 3, scope: !7) -!10 = !DILocation(line: 14, column: 3, scope: !7) -!11 = !DILocation(line: 15, column: 3, scope: !7) - -;CHECK-LABEL: main: -;CHECK: # %bb.0: -;CHECK: prefetchnta 291 -;CHECK-NOT: prefetchnta 42(%rax,%ymm0) diff --git a/llvm/test/CodeGen/X86/insert-prefetch-other.afdo b/llvm/test/CodeGen/X86/insert-prefetch-other.afdo deleted file mode 100644 index 783da34f7f84c..0000000000000 --- a/llvm/test/CodeGen/X86/insert-prefetch-other.afdo +++ /dev/null @@ -1,3 +0,0 @@ -sum:0:0 - 1: 0 __prefetch_t0_1:0 __prefetch_t2_0:42 - 1.1: 0 __prefetch_t1_0:18446744073709551615 diff --git a/llvm/test/CodeGen/X86/insert-prefetch.afdo b/llvm/test/CodeGen/X86/insert-prefetch.afdo deleted file mode 100644 index 96487e85eaaf2..0000000000000 --- a/llvm/test/CodeGen/X86/insert-prefetch.afdo +++ /dev/null @@ -1,3 +0,0 @@ -sum:0:0 - 1: 0 __prefetch_nta_1:0 __prefetch_nta_0:42 - 1.1: 0 __prefetch_nta_0:18446744073709551615 diff --git a/llvm/test/CodeGen/X86/insert-prefetch.ll b/llvm/test/CodeGen/X86/insert-prefetch.ll deleted file mode 100644 index 971a6193862d0..0000000000000 --- a/llvm/test/CodeGen/X86/insert-prefetch.ll +++ /dev/null @@ -1,101 +0,0 @@ -; RUN: llc < %s -x86-discriminate-memops -prefetch-hints-file=%S/insert-prefetch.afdo | FileCheck %s -; RUN: llc < %s -x86-discriminate-memops -prefetch-hints-file=%S/insert-prefetch-other.afdo | FileCheck %s -check-prefix=OTHERS -; -; original source, compiled with -O3 -gmlt -fdebug-info-for-profiling: -; int sum(int* arr, int pos1, int pos2) { -; return arr[pos1] + arr[pos2]; -; } -; -; NOTE: debug line numbers were adjusted such that the function would start -; at line 15 (an arbitrary number). The sample profile file format uses -; offsets from the start of the symbol instead of file-relative line numbers. -; The .afdo file reflects that - the instructions are offset '1'. -; -; ModuleID = 'test.cc' -source_filename = "test.cc" -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -define i32 @sum(ptr %arr, i32 %pos1, i32 %pos2) !dbg !35 !prof !37 { -entry: - %idxprom = sext i32 %pos1 to i64, !dbg !38 - %arrayidx = getelementptr inbounds i32, ptr %arr, i64 %idxprom, !dbg !38 - %0 = load i32, ptr %arrayidx, align 4, !dbg !38, !tbaa !39 - %idxprom1 = sext i32 %pos2 to i64, !dbg !43 - %arrayidx2 = getelementptr inbounds i32, ptr %arr, i64 %idxprom1, !dbg !43 - %1 = load i32, ptr %arrayidx2, align 4, !dbg !43, !tbaa !39 - %add = add nsw i32 %1, %0, !dbg !44 - ret i32 %add, !dbg !45 -} - -attributes #0 = { "target-cpu"="x86-64" } - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5, !6} -!llvm.ident = !{!33} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, debugInfoForProfiling: true) -!1 = !DIFile(filename: "test.cc", directory: "/tmp") -!2 = !{} -!3 = !{i32 2, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 4} -!6 = !{i32 1, !"ProfileSummary", !7} -!7 = !{!8, !9, !10, !11, !12, !13, !14, !15} -!8 = !{!"ProfileFormat", !"SampleProfile"} -!9 = !{!"TotalCount", i64 0} -!10 = !{!"MaxCount", i64 0} -!11 = !{!"MaxInternalCount", i64 0} -!12 = !{!"MaxFunctionCount", i64 0} -!13 = !{!"NumCounts", i64 2} -!14 = !{!"NumFunctions", i64 1} -!15 = !{!"DetailedSummary", !16} -!16 = !{!17, !18, !19, !20, !21, !22, !22, !23, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32} -!17 = !{i32 10000, i64 0, i32 0} -!18 = !{i32 100000, i64 0, i32 0} -!19 = !{i32 200000, i64 0, i32 0} -!20 = !{i32 300000, i64 0, i32 0} -!21 = !{i32 400000, i64 0, i32 0} -!22 = !{i32 500000, i64 0, i32 0} -!23 = !{i32 600000, i64 0, i32 0} -!24 = !{i32 700000, i64 0, i32 0} -!25 = !{i32 800000, i64 0, i32 0} -!26 = !{i32 900000, i64 0, i32 0} -!27 = !{i32 950000, i64 0, i32 0} -!28 = !{i32 990000, i64 0, i32 0} -!29 = !{i32 999000, i64 0, i32 0} -!30 = !{i32 999900, i64 0, i32 0} -!31 = !{i32 999990, i64 0, i32 0} -!32 = !{i32 999999, i64 0, i32 0} -!33 = !{!"clang version 7.0.0 (trunk 322593) (llvm/trunk 322526)"} -!35 = distinct !DISubprogram(name: "sum", linkageName: "sum", scope: !1, file: !1, line: 15, type: !36, isLocal: false, isDefinition: true, scopeLine: 15, flags: DIFlagPrototyped, isOptimized: true, unit: !0) -!36 = !DISubroutineType(types: !2) -!37 = !{!"function_entry_count", i64 -1} -!38 = !DILocation(line: 16, column: 10, scope: !35) -!39 = !{!40, !40, i64 0} -!40 = !{!"int", !41, i64 0} -!41 = !{!"omnipotent char", !42, i64 0} -!42 = !{!"Simple C++ TBAA"} -!43 = !DILocation(line: 16, column: 22, scope: !35) -!44 = !DILocation(line: 16, column: 20, scope: !35) -!45 = !DILocation(line: 16, column: 3, scope: !35) - -;CHECK-LABEL: sum: -;CHECK: # %bb.0: -;CHECK: prefetchnta 42(%rdi,%rax,4) -;CHECK-NEXT: prefetchnta (%rdi,%rax,4) -;CHECK-NEXT: movl (%rdi,%rax,4), %eax -;CHECK-NEXT: .loc 1 16 20 discriminator 2 # test.cc:16:20 -;CHECK-NEXT: prefetchnta -1(%rdi,%rcx,4) -;CHECK-NEXT: addl (%rdi,%rcx,4), %eax -;CHECK-NEXT: .loc 1 16 3 # test.cc:16:3 - -;OTHERS-LABEL: sum: -;OTHERS: # %bb.0: -;OTHERS: prefetcht2 42(%rdi,%rax,4) -;OTHERS-NEXT: prefetcht0 (%rdi,%rax,4) -;OTHERS-NEXT: movl (%rdi,%rax,4), %eax -;OTHERS-NEXT: .loc 1 16 20 discriminator 2 # test.cc:16:20 -;OTHERS-NEXT: prefetcht1 -1(%rdi,%rcx,4) -;OTHERS-NEXT: addl (%rdi,%rcx,4), %eax -;OTHERS-NEXT: .loc 1 16 3 # test.cc:16:3 diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll index 81390e59d0d0a..276232e27c000 100644 --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -208,8 +208,6 @@ ; CHECK-NEXT: X86 Fixup Inst Tuning ; CHECK-NEXT: X86 Fixup Vector Constants ; CHECK-NEXT: Compressing EVEX instrs when possible -; CHECK-NEXT: X86 Discriminate Memory Operands -; CHECK-NEXT: X86 Insert Cache Prefetches ; CHECK-NEXT: X86 insert wait instruction ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: Remove Loads Into Fake Uses diff --git a/llvm/test/MC/AsmParser/directive_base64.s b/llvm/test/MC/AsmParser/directive_base64.s new file mode 100644 index 0000000000000..46a477eef51dc --- /dev/null +++ b/llvm/test/MC/AsmParser/directive_base64.s @@ -0,0 +1,37 @@ +# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s +# RUN: not llvm-mc -triple i386-unknown-unknown -defsym=ERR=1 -o /dev/null %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR + + .data +# CHECK-LABEL: TEST0: +# CHECK-NEXT: .byte 0 +TEST0: + .base64 "AA==" + +# CHECK-LABEL: TEST1: +# CHECK-NEXT: .ascii "abcxyz" +TEST1: + .base64 "YWJjeHl6" + +# CHECK-LABEL: TEST2: +# CHECK-NEXT: .byte 1 +# CHECK-NEXT: .byte 2 +TEST2: + .base64 "AQ==" + .base64 "Ag==" + +# CHECK-LABEL: TEST3: +# CHECK-NEXT: .byte 1 +# CHECK-NEXT: .byte 2 +TEST3: + .base64 "AQ==", "Ag==" + +.ifdef ERR +# CHECK-ERROR: [[#@LINE+1]]:17: error: expected string + .base64 not-a-string + +# CHECK-ERROR: [[#@LINE+1]]:17: error: failed to base64 decode string data + .base64 "AA" + +# CHECK-ERROR: [[#@LINE+1]]:17: error: expected nonempty string + .base64 "" +.endif diff --git a/llvm/test/TableGen/intrinsic-arginfo.td b/llvm/test/TableGen/intrinsic-arginfo.td new file mode 100644 index 0000000000000..eab1f5e032bc3 --- /dev/null +++ b/llvm/test/TableGen/intrinsic-arginfo.td @@ -0,0 +1,71 @@ +// RUN: llvm-tblgen -gen-intrinsic-impl -I %p/../../include %s | FileCheck %s + +// Test ArgInfo property for pretty-printing intrinsic arguments. +// This test verifies that TableGen generates the correct pretty-printing code +// for intrinsics that use the ArgInfo property. + +include "llvm/IR/Intrinsics.td" + +// Simple intrinsic with two arguments that have ArgInfo. +def int_dummy_foo_bar : DefaultAttrsIntrinsic< + [llvm_i32_ty], + [llvm_i32_ty, // data + llvm_i32_ty, // mode + llvm_i32_ty], // stride + [IntrNoMem, + ImmArg>, + ArgInfo, [ArgName<"mode">, ImmArgPrinter<"printDummyMode">]>, + ArgInfo, [ArgName<"stride">]>]>; + +// A custom floating point add with rounding and sat mode. +def int_my_fadd_f32 : DefaultAttrsIntrinsic< + [llvm_float_ty], + [llvm_float_ty, // a + llvm_float_ty, // b + llvm_i32_ty, // rounding_mode + llvm_i1_ty], // saturation_mode + [IntrNoMem, + ImmArg>, + ImmArg>, + ArgInfo, [ArgName<"rounding_mode">, ImmArgPrinter<"printRoundingMode">]>, + ArgInfo, [ArgName<"saturation_mode">]>]>; + +// CHECK: #ifdef GET_INTRINSIC_PRETTY_PRINT_TABLE +// CHECK-NEXT: static constexpr uint8_t PPTable[] = { + +// CHECK: #endif // GET_INTRINSIC_PRETTY_PRINT_TABLE + +// CHECK: #ifdef GET_INTRINSIC_PRETTY_PRINT_ARGUMENTS +// CHECK: void Intrinsic::printImmArg(ID IID, unsigned ArgIdx, raw_ostream &OS, const Constant *ImmArgVal) { + +// CHECK: case dummy_foo_bar: +// CHECK-NEXT: switch (ArgIdx) { + +// CHECK-NEXT: case 1: +// CHECK-NEXT: OS << "mode="; +// CHECK-NEXT: printDummyMode(OS, ImmArgVal); +// CHECK-NEXT: return; + +// CHECK-NEXT: case 2: +// CHECK-NEXT: OS << "stride="; +// CHECK-NEXT: return; + +// CHECK-NEXT: } +// CHECK-NEXT: break; + +// CHECK: case my_fadd_f32: +// CHECK-NEXT: switch (ArgIdx) { + +// CHECK-NEXT: case 2: +// CHECK-NEXT: OS << "rounding_mode="; +// CHECK-NEXT: printRoundingMode(OS, ImmArgVal); +// CHECK-NEXT: return; + +// CHECK-NEXT: case 3: +// CHECK-NEXT: OS << "saturation_mode="; +// CHECK-NEXT: return; + +// CHECK-NEXT: } +// CHECK-NEXT: break; + +// CHECK: #endif // GET_INTRINSIC_PRETTY_PRINT_ARGUMENTS diff --git a/llvm/test/ThinLTO/X86/memprof-fixup.ll b/llvm/test/ThinLTO/X86/memprof-fixup.ll new file mode 100644 index 0000000000000..afed80fc562c1 --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-fixup.ll @@ -0,0 +1,129 @@ +;; Test fixup of largest cold contexts. + +;; This case has multiple recursive cycles in the cold context, which can be +;; made non-recursive with the inlining in the code. + +;; -stats requires asserts +; REQUIRES: asserts + +;; Need context sizes in summary, so enable reporting. +; RUN: opt -thinlto-bc -memprof-report-hinted-sizes %s >%t.o + +;; First try disabling detection of the largest cold contexts. +;; We will not get any cloning. +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ +; RUN: -memprof-top-n-important=0 \ +; RUN: -r=%t.o,E,plx \ +; RUN: -r=%t.o,DB,plx \ +; RUN: -r=%t.o,CB,plx \ +; RUN: -r=%t.o,A,plx \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats \ +; RUN: -pass-remarks=memprof-context-disambiguation \ +; RUN: -o %t.out 2>&1 | FileCheck %s --implicit-check-not="created clone" \ +; RUN: --implicit-check-not="Number of cold static allocations" \ +; RUN: --implicit-check-not="Number of function clones" \ +; RUN: --implicit-check-not="Number of important context ids" \ +; RUN: --implicit-check-not="Number of fixup" + +;; Allow default detection of the largest cold contexts, but disable fixup. +;; We should find 1 important context, but still not get cloning. +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ +; RUN: -memprof-fixup-important=false \ +; RUN: -r=%t.o,E,plx \ +; RUN: -r=%t.o,DB,plx \ +; RUN: -r=%t.o,CB,plx \ +; RUN: -r=%t.o,A,plx \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats \ +; RUN: -pass-remarks=memprof-context-disambiguation \ +; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=TOPN1-NOFIXUP \ +; RUN: --implicit-check-not="created clone" \ +; RUN: --implicit-check-not="Number of cold static allocations" \ +; RUN: --implicit-check-not="Number of function clones" \ +; RUN: --implicit-check-not="Number of fixup" + +; TOPN1-NOFIXUP: 1 memprof-context-disambiguation - Number of important context ids + +;; Allow default detection of largest cold contexts, fixup is enabled by default. +;; This case should get fixup and cloning. +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ +; RUN: -r=%t.o,E,plx \ +; RUN: -r=%t.o,DB,plx \ +; RUN: -r=%t.o,CB,plx \ +; RUN: -r=%t.o,A,plx \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats \ +; RUN: -pass-remarks=memprof-context-disambiguation \ +; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=TOPN1 + +; TOPN1: created clone E.memprof.1 +; TOPN1: call in clone E marked with memprof allocation attribute notcold +; TOPN1: call in clone E.memprof.1 marked with memprof allocation attribute cold +; TOPN1: created clone DB.memprof.1 +; TOPN1: call in clone DB.memprof.1 assigned to call function clone E.memprof.1 +; TOPN1: created clone CB.memprof.1 +; TOPN1: call in clone CB.memprof.1 assigned to call function clone DB.memprof.1 +; TOPN1: created clone A.memprof.1 +; TOPN1: call in clone A.memprof.1 assigned to call function clone CB.memprof.1 +; TOPN1: call in clone main assigned to call function clone A.memprof.1 + +; TOPN1: 1 memprof-context-disambiguation - Number of contexts with fixed edges +; TOPN1: 2 memprof-context-disambiguation - Number of fixup edges added +; TOPN1: 1 memprof-context-disambiguation - Number of important context ids + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @E() { +entry: + %call = tail call ptr @_Znam(i64 10), !memprof !7, !callsite !14 + ret void +} + +define void @DB() { +entry: + tail call void @E(), !callsite !17 + ret void +} + +define void @CB() { +entry: + tail call void @DB(), !callsite !22 + ret void +} + +define void @A() { +entry: + tail call void @CB(), !callsite !20 + ret void +} + +define i32 @main() { +entry: + tail call void @A(), !callsite !25 + tail call void @A(), !callsite !27 + ret i32 0 +} + +declare ptr @_Znam(i64) + +!7 = !{!8, !10} +!8 = !{!9, !"cold", !2} +!9 = !{i64 123, i64 234, i64 345, i64 234, i64 456, i64 234, i64 567, i64 678} +!2 = !{i64 12345, i64 200} +!10 = !{!11, !"notcold", !3} +!3 = !{i64 23456, i64 200} +!11 = !{i64 123, i64 234, i64 345, i64 234, i64 456, i64 234, i64 567, i64 789} +!14 = !{i64 123} +!17 = !{i64 234, i64 345} +!22 = !{i64 234, i64 456} +!20 = !{i64 234, i64 567} +!25 = !{i64 678} +!27 = !{i64 789} diff --git a/llvm/test/Transforms/LoopVectorize/metadata.ll b/llvm/test/Transforms/LoopVectorize/metadata.ll index fe25d1b231efc..ed027e8b9a895 100644 --- a/llvm/test/Transforms/LoopVectorize/metadata.ll +++ b/llvm/test/Transforms/LoopVectorize/metadata.ll @@ -497,6 +497,129 @@ exit: ret void } +define void @noalias_metadata(ptr align 8 %dst, ptr align 8 %src) { +; CHECK-LABEL: define void @noalias_metadata( +; CHECK-SAME: ptr align 8 [[DST:%.*]], ptr align 8 [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[SRC4:%.*]] = ptrtoint ptr [[SRC]] to i64 +; CHECK-NEXT: [[DST3:%.*]] = ptrtoint ptr [[DST]] to i64 +; CHECK-NEXT: [[SRC2:%.*]] = ptrtoint ptr [[SRC]] to i64 +; CHECK-NEXT: [[DST1:%.*]] = ptrtoint ptr [[DST]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[DST3]], [[SRC4]] +; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK: [[VECTOR_MEMCHECK]]: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[DST1]], 8 +; CHECK-NEXT: [[TMP22:%.*]] = sub i64 [[TMP5]], [[SRC2]] +; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP22]] +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP5]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 2 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[N_VEC]], 8 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP23]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP26:%.*]] = mul i64 [[INDEX]], 8 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP26]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x ptr>, ptr [[NEXT_GEP]], align 8, !alias.scope [[META14:![0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x ptr> [[WIDE_LOAD]], i32 1 +; CHECK-NEXT: store ptr [[TMP7]], ptr [[DST]], align 8, !alias.scope [[META17:![0-9]+]], !noalias [[META19:![0-9]+]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP24]], %[[MIDDLE_BLOCK]] ], [ [[SRC]], %[[ENTRY]] ], [ [[SRC]], %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ [[PTR_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[VAL:%.*]] = load ptr, ptr [[PTR]], align 8 +; CHECK-NEXT: store ptr [[VAL]], ptr [[DST]], align 8, !noalias [[META23:![0-9]+]] +; CHECK-NEXT: [[PTR_NEXT]] = getelementptr inbounds i8, ptr [[PTR]], i64 8 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[PTR]], [[DST]] +; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +; INTERLEAVE-LABEL: define void @noalias_metadata( +; INTERLEAVE-SAME: ptr align 8 [[DST:%.*]], ptr align 8 [[SRC:%.*]]) { +; INTERLEAVE-NEXT: [[ENTRY:.*]]: +; INTERLEAVE-NEXT: [[SRC4:%.*]] = ptrtoint ptr [[SRC]] to i64 +; INTERLEAVE-NEXT: [[DST3:%.*]] = ptrtoint ptr [[DST]] to i64 +; INTERLEAVE-NEXT: [[SRC2:%.*]] = ptrtoint ptr [[SRC]] to i64 +; INTERLEAVE-NEXT: [[DST1:%.*]] = ptrtoint ptr [[DST]] to i64 +; INTERLEAVE-NEXT: [[TMP2:%.*]] = sub i64 [[DST3]], [[SRC4]] +; INTERLEAVE-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3 +; INTERLEAVE-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 +; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], 4 +; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] +; INTERLEAVE: [[VECTOR_MEMCHECK]]: +; INTERLEAVE-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 8 +; INTERLEAVE-NEXT: [[TMP5:%.*]] = add i64 [[DST1]], 8 +; INTERLEAVE-NEXT: [[TMP22:%.*]] = sub i64 [[TMP5]], [[SRC2]] +; INTERLEAVE-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP22]] +; INTERLEAVE-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP5]] +; INTERLEAVE-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]] +; INTERLEAVE-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; INTERLEAVE-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; INTERLEAVE: [[VECTOR_PH]]: +; INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 4 +; INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]] +; INTERLEAVE-NEXT: [[TMP23:%.*]] = mul i64 [[N_VEC]], 8 +; INTERLEAVE-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP23]] +; INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] +; INTERLEAVE: [[VECTOR_BODY]]: +; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; INTERLEAVE-NEXT: [[TMP26:%.*]] = mul i64 [[INDEX]], 8 +; INTERLEAVE-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP26]] +; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 2 +; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x ptr>, ptr [[TMP7]], align 8, !alias.scope [[META14:![0-9]+]] +; INTERLEAVE-NEXT: [[TMP8:%.*]] = extractelement <2 x ptr> [[WIDE_LOAD]], i32 1 +; INTERLEAVE-NEXT: store ptr [[TMP8]], ptr [[DST]], align 8, !alias.scope [[META17:![0-9]+]], !noalias [[META19:![0-9]+]] +; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; INTERLEAVE-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; INTERLEAVE-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; INTERLEAVE: [[MIDDLE_BLOCK]]: +; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] +; INTERLEAVE-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; INTERLEAVE: [[SCALAR_PH]]: +; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP24]], %[[MIDDLE_BLOCK]] ], [ [[SRC]], %[[ENTRY]] ], [ [[SRC]], %[[VECTOR_MEMCHECK]] ] +; INTERLEAVE-NEXT: br label %[[LOOP:.*]] +; INTERLEAVE: [[LOOP]]: +; INTERLEAVE-NEXT: [[PTR:%.*]] = phi ptr [ [[PTR_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; INTERLEAVE-NEXT: [[VAL:%.*]] = load ptr, ptr [[PTR]], align 8 +; INTERLEAVE-NEXT: store ptr [[VAL]], ptr [[DST]], align 8, !noalias [[META23:![0-9]+]] +; INTERLEAVE-NEXT: [[PTR_NEXT]] = getelementptr inbounds i8, ptr [[PTR]], i64 8 +; INTERLEAVE-NEXT: [[CMP:%.*]] = icmp eq ptr [[PTR]], [[DST]] +; INTERLEAVE-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP24:![0-9]+]] +; INTERLEAVE: [[EXIT]]: +; INTERLEAVE-NEXT: ret void +; +entry: + br label %loop + +loop: + %ptr = phi ptr [ %ptr.next, %loop ], [ %src, %entry ] + %val = load ptr, ptr %ptr, align 8 + store ptr %val, ptr %dst, align 8, !noalias !4 + %ptr.next = getelementptr inbounds i8, ptr %ptr, i64 8 + %cmp = icmp eq ptr %ptr, %dst + br i1 %cmp, label %exit, label %loop + +exit: + ret void +} + declare i64 @foo(i64) declare double @bar(double) @@ -510,6 +633,9 @@ attributes #1 = { nounwind "vector-function-abi-variant"="_ZGV_LLVM_N2v_bar(bar_ !1 = !{ i64 0, i64 2 } !2 = !{!"Simple C/C++ TBAA"} !3 = !{!"omnipotent char", !2, i64 0} +!4 = !{!5} +!5 = distinct !{!5, !6, !"g1"} +!6 = distinct !{!6, !"t2"} ;. ; CHECK: [[CHAR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0, i64 0} @@ -526,6 +652,17 @@ attributes #1 = { nounwind "vector-function-abi-variant"="_ZGV_LLVM_N2v_bar(bar_ ; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]], [[META6]]} ; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META5]], [[META6]]} ; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META6]], [[META5]]} +; CHECK: [[META14]] = !{[[META15:![0-9]+]]} +; CHECK: [[META15]] = distinct !{[[META15]], [[META16:![0-9]+]]} +; CHECK: [[META16]] = distinct !{[[META16]], !"LVerDomain"} +; CHECK: [[META17]] = !{[[META18:![0-9]+]]} +; CHECK: [[META18]] = distinct !{[[META18]], [[META16]]} +; CHECK: [[META19]] = !{[[META20:![0-9]+]], [[META15]]} +; CHECK: [[META20]] = distinct !{[[META20]], [[META21:![0-9]+]], !"g1"} +; CHECK: [[META21]] = distinct !{[[META21]], !"t2"} +; CHECK: [[LOOP22]] = distinct !{[[LOOP22]], [[META5]], [[META6]]} +; CHECK: [[META23]] = !{[[META20]]} +; CHECK: [[LOOP24]] = distinct !{[[LOOP24]], [[META5]]} ;. ; INTERLEAVE: [[CHAR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0, i64 0} ; INTERLEAVE: [[META1]] = !{!"omnipotent char", [[META2]]} @@ -541,4 +678,15 @@ attributes #1 = { nounwind "vector-function-abi-variant"="_ZGV_LLVM_N2v_bar(bar_ ; INTERLEAVE: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]], [[META6]]} ; INTERLEAVE: [[LOOP12]] = distinct !{[[LOOP12]], [[META5]], [[META6]]} ; INTERLEAVE: [[LOOP13]] = distinct !{[[LOOP13]], [[META6]], [[META5]]} +; INTERLEAVE: [[META14]] = !{[[META15:![0-9]+]]} +; INTERLEAVE: [[META15]] = distinct !{[[META15]], [[META16:![0-9]+]]} +; INTERLEAVE: [[META16]] = distinct !{[[META16]], !"LVerDomain"} +; INTERLEAVE: [[META17]] = !{[[META18:![0-9]+]]} +; INTERLEAVE: [[META18]] = distinct !{[[META18]], [[META16]]} +; INTERLEAVE: [[META19]] = !{[[META20:![0-9]+]], [[META15]]} +; INTERLEAVE: [[META20]] = distinct !{[[META20]], [[META21:![0-9]+]], !"g1"} +; INTERLEAVE: [[META21]] = distinct !{[[META21]], !"t2"} +; INTERLEAVE: [[LOOP22]] = distinct !{[[LOOP22]], [[META5]], [[META6]]} +; INTERLEAVE: [[META23]] = !{[[META20]]} +; INTERLEAVE: [[LOOP24]] = distinct !{[[LOOP24]], [[META5]]} ;. diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/fixup.ll b/llvm/test/Transforms/MemProfContextDisambiguation/fixup.ll new file mode 100644 index 0000000000000..a08f89b5bbe97 --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/fixup.ll @@ -0,0 +1,105 @@ +;; Test fixup of largest cold contexts. + +;; This case has multiple recursive cycles in the cold context, which can be +;; made non-recursive with the inlining in the code. + +;; -stats requires asserts +; REQUIRES: asserts + +;; First try disabling detection of the largest cold contexts. +;; We will not get any cloning. +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ +; RUN: -memprof-top-n-important=0 \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats \ +; RUNL -pass-remarks=memprof-context-disambiguation \ +; RUN: %s -S 2>&1 | FileCheck %s --implicit-check-not="created clone" \ +; RUN: --implicit-check-not="Number of cold static allocations" \ +; RUN: --implicit-check-not="Number of function clones" \ +; RUN: --implicit-check-not="Number of important context ids" \ +; RUN: --implicit-check-not="Number of fixup" + +;; Allow default detection of the largest cold contexts, but disable fixup. +;; We should find 1 important context, but still not get cloning. +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ +; RUN: -memprof-fixup-important=false \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats \ +; RUNL -pass-remarks=memprof-context-disambiguation \ +; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=TOPN1-NOFIXUP \ +; RUN: --implicit-check-not="created clone" \ +; RUN: --implicit-check-not="Number of cold static allocations" \ +; RUN: --implicit-check-not="Number of function clones" \ +; RUN: --implicit-check-not="Number of fixup" + +; TOPN1-NOFIXUP: 1 memprof-context-disambiguation - Number of important context ids + +;; Allow default detection of largest cold contexts, fixup is enabled by default. +;; This case should get fixup and cloning. +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats \ +; RUN: -pass-remarks=memprof-context-disambiguation \ +; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=TOPN1 + +; TOPN1: created clone E.memprof.1 +; TOPN1: created clone DB.memprof.1 +; TOPN1: created clone CB.memprof.1 +; TOPN1: created clone A.memprof.1 +; TOPN1: call in clone main assigned to call function clone A.memprof.1 +; TOPN1: call in clone A.memprof.1 assigned to call function clone CB.memprof.1 +; TOPN1: call in clone CB.memprof.1 assigned to call function clone DB.memprof.1 +; TOPN1: call in clone DB.memprof.1 assigned to call function clone E.memprof.1 +; TOPN1: call in clone E.memprof.1 marked with memprof allocation attribute cold +; TOPN1: call in clone E marked with memprof allocation attribute notcold + +; TOPN1: 1 memprof-context-disambiguation - Number of contexts with fixed edges +; TOPN1: 2 memprof-context-disambiguation - Number of fixup edges added +; TOPN1: 1 memprof-context-disambiguation - Number of important context ids + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @E() { +entry: + %call = tail call ptr @_Znam(i64 10), !memprof !7, !callsite !14 + ret void +} + +define void @DB() { +entry: + tail call void @E(), !callsite !17 + ret void +} + +define void @CB() { +entry: + tail call void @DB(), !callsite !22 + ret void +} + +define void @A() { +entry: + tail call void @CB(), !callsite !20 + ret void +} + +define i32 @main() { +entry: + tail call void @A(), !callsite !25 + tail call void @A(), !callsite !27 + ret i32 0 +} + +declare ptr @_Znam(i64) + +!7 = !{!8, !10} +!8 = !{!9, !"cold", !2} +!9 = !{i64 123, i64 234, i64 345, i64 234, i64 456, i64 234, i64 567, i64 678} +!2 = !{i64 12345, i64 200} +!10 = !{!11, !"notcold", !3} +!3 = !{i64 23456, i64 200} +!11 = !{i64 123, i64 234, i64 345, i64 234, i64 456, i64 234, i64 567, i64 789} +!14 = !{i64 123} +!17 = !{i64 234, i64 345} +!22 = !{i64 234, i64 456} +!20 = !{i64 234, i64 567} +!25 = !{i64 678} +!27 = !{i64 789} diff --git a/llvm/tools/llvm-objdump/OtoolOpts.td b/llvm/tools/llvm-objdump/OtoolOpts.td index dc7a5b445cffe..706d9e0182f58 100644 --- a/llvm/tools/llvm-objdump/OtoolOpts.td +++ b/llvm/tools/llvm-objdump/OtoolOpts.td @@ -14,7 +14,7 @@ def G : Flag<["-"], "G">, HelpText<"print data-in-code table">; def h : Flag<["-"], "h">, HelpText<"print mach header">; def I : Flag<["-"], "I">, HelpText<"print indirect symbol table">; def j : Flag<["-"], "j">, HelpText<"print opcode bytes">; -def l : Flag<["-"], "l">, HelpText<"print load commnads">; +def l : Flag<["-"], "l">, HelpText<"print load commands">; def L : Flag<["-"], "L">, HelpText<"print used shared libraries">; def mcpu_EQ : Joined<["-"], "mcpu=">, HelpText<"select cpu for disassembly">; def o : Flag<["-"], "o">, HelpText<"print Objective-C segment">; diff --git a/llvm/unittests/CodeGen/CMakeLists.txt b/llvm/unittests/CodeGen/CMakeLists.txt index 4d07462babefa..80d10138d7bfe 100644 --- a/llvm/unittests/CodeGen/CMakeLists.txt +++ b/llvm/unittests/CodeGen/CMakeLists.txt @@ -49,7 +49,6 @@ add_llvm_unittest(CodeGenTests TypeTraitsTest.cpp TargetOptionsTest.cpp TestAsmPrinter.cpp - MLRegAllocDevelopmentFeatures.cpp X86MCInstLowerTest.cpp ) diff --git a/llvm/unittests/CodeGen/MLRegAllocDevelopmentFeatures.cpp b/llvm/unittests/CodeGen/MLRegAllocDevelopmentFeatures.cpp deleted file mode 100644 index 00c2c3abf8533..0000000000000 --- a/llvm/unittests/CodeGen/MLRegAllocDevelopmentFeatures.cpp +++ /dev/null @@ -1,293 +0,0 @@ -//===- MLRegAllocDevelopmentFeatures.cpp - test dev MLRegAlloc features ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "../../lib/CodeGen/MLRegAllocEvictAdvisor.h" -#include "llvm/Analysis/NoInferenceModelRunner.h" -#include "llvm/CodeGen/CodeGenTargetMachineImpl.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/SlotIndexes.h" -#include "llvm/CodeGen/TargetFrameLowering.h" -#include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetLowering.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/MC/TargetRegistry.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/CodeGen.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/TargetParser/Triple.h" -#include "gmock/gmock.h" -#include "gtest/gtest.h" - -#include -#include - -using namespace llvm; -using testing::ContainerEq; -using testing::Test; - -namespace { - -#include "MFCommon.inc" - -struct LRPosInfoIndexes { - size_t StartIndex; - size_t EndIndex; - size_t PhysReg; -}; - -class RegAllocDevelopmentFeaturesTest : public ::Test { -protected: - SmallVector - setupOverlapProblem(const SmallVectorImpl &Segments, - simple_ilist &IndexList) { - SmallVector PositionsToReturn; - PositionsToReturn.reserve(Segments.size()); - for (auto CurrentPosIndexInfo : Segments) { - LRStartEndInfo CurrentPosInfo = {}; - CurrentPosInfo.Pos = CurrentPosIndexInfo.PhysReg; - PositionsToReturn.push_back(CurrentPosInfo); - } - size_t CurrentSegmentIndex = 0; - size_t CurrentIndex = 0; - while (CurrentSegmentIndex < Segments.size()) { - auto *CurrentLEMem = static_cast( - Allocator.Allocate(sizeof(IndexListEntry), alignof(IndexListEntry))); - auto *CurrentListEntry = - new (CurrentLEMem) IndexListEntry(nullptr, CurrentIndex); - IndexList.push_back(*CurrentListEntry); - for (size_t CurrentPosInfoIndex = 0; - CurrentPosInfoIndex < Segments.size(); ++CurrentPosInfoIndex) { - if ((CurrentIndex / SlotIndex::InstrDist) == - Segments[CurrentPosInfoIndex].StartIndex) { - PositionsToReturn[CurrentPosInfoIndex].Begin = - SlotIndex(CurrentListEntry, 0); - } else if ((CurrentIndex / SlotIndex::InstrDist) == - Segments[CurrentPosInfoIndex].EndIndex) { - PositionsToReturn[CurrentPosInfoIndex].End = - SlotIndex(CurrentListEntry, 0); - ++CurrentSegmentIndex; - } - } - CurrentIndex += SlotIndex::InstrDist; - } - return PositionsToReturn; - } - - NoInferenceModelRunner setupModelRunner() { - const std::vector Inputs{ - TensorSpec::createSpec("instructions", InstructionsShape), - TensorSpec::createSpec("instructions_mapping", - InstructionsMappingShape), - TensorSpec::createSpec("mbb_frequencies", MBBFrequencyShape), - TensorSpec::createSpec("mbb_mapping", InstructionsShape)}; - LLVMContext Ctx; - return NoInferenceModelRunner(Ctx, Inputs); - } - - std::vector - getExpectedMappingMatrix(SmallVectorImpl &OverlapSetup) { - std::vector ExpectedMappingMatrix( - NumberOfInterferences * ModelMaxSupportedInstructionCount, 0); - for (auto NewSegment : OverlapSetup) { - for (size_t CurrentIndex = NewSegment.StartIndex; - CurrentIndex <= NewSegment.EndIndex; ++CurrentIndex) { - ExpectedMappingMatrix[NewSegment.PhysReg * - ModelMaxSupportedInstructionCount + - CurrentIndex] = 1; - } - } - return ExpectedMappingMatrix; - } - - void runOverlapTest(SmallVectorImpl &OverlapSetup) { - simple_ilist IndexList; - auto OverlapProblem = setupOverlapProblem(OverlapSetup, IndexList); - NoInferenceModelRunner ModelRunner = setupModelRunner(); - size_t MaxIndex = 0; - for (size_t CurrentOverlap = 0; CurrentOverlap < OverlapSetup.size(); - ++CurrentOverlap) { - if (OverlapSetup[CurrentOverlap].EndIndex > - OverlapSetup[MaxIndex].EndIndex) { - MaxIndex = CurrentOverlap; - } - } - SlotIndex LastIndex = OverlapProblem[MaxIndex].End; - extractInstructionFeatures( - OverlapProblem, &ModelRunner, - [](SlotIndex InputSlot) -> int { return 0; }, - [](SlotIndex InputSlot) -> float { return 0.0f; }, - [](SlotIndex InputSlot) -> MachineBasicBlock * { return nullptr; }, 0, - 1, 2, 3, LastIndex); - std::vector MappingMatrix( - ModelRunner.getTensor(1), - ModelRunner.getTensor(1) + - NumberOfInterferences * ModelMaxSupportedInstructionCount); - ASSERT_THAT(MappingMatrix, - ContainerEq(getExpectedMappingMatrix(OverlapSetup))); - IndexList.clear(); - } - - BumpPtrAllocator Allocator; -}; - -// meta tests to ensure that test setup works correctly - -TEST_F(RegAllocDevelopmentFeaturesTest, - MetaOverlapInstructionDistancesAreCorrect) { - SmallVector OverlapSetup; - OverlapSetup.push_back({0, 5, 0}); - OverlapSetup.push_back({5, 10, 0}); - simple_ilist IndexList; - auto OverlapProblem = setupOverlapProblem(OverlapSetup, IndexList); - ASSERT_EQ(OverlapProblem[0].End.distance(OverlapProblem[1].End), - 5 * SlotIndex::InstrDist); - ASSERT_EQ(OverlapProblem[0].End.distance(OverlapProblem[1].Begin), 0); -} - -TEST_F(RegAllocDevelopmentFeaturesTest, MetaSlotIndicesAreValid) { - SmallVector OverlapSetup; - OverlapSetup.push_back({0, 10, 0}); - simple_ilist IndexList; - auto OverlapProblem = setupOverlapProblem(OverlapSetup, IndexList); - ASSERT_TRUE(OverlapProblem[0].Begin.isValid()); - ASSERT_TRUE(OverlapProblem[0].End.isValid()); -} - -// Testing of feature extraction for per-instruction features - -TEST_F(RegAllocDevelopmentFeaturesTest, InstructionOpcodesAreCorrect) { - SmallVector OverlapSetup; - OverlapSetup.push_back({0, ModelMaxSupportedInstructionCount - 1, 0}); - simple_ilist IndexList; - auto OverlapProblem = setupOverlapProblem(OverlapSetup, IndexList); - NoInferenceModelRunner ModelRunner = setupModelRunner(); - SlotIndex LastIndex = OverlapProblem[0].End; - SlotIndex FirstIndex = OverlapProblem[0].Begin; - extractInstructionFeatures( - OverlapProblem, &ModelRunner, - [FirstIndex](SlotIndex InputSlot) -> int { - return FirstIndex.distance(InputSlot) / SlotIndex::InstrDist; - }, - [](SlotIndex InputSlot) -> float { return 0.0f; }, - [](SlotIndex InputSlot) -> MachineBasicBlock * { return nullptr; }, 0, 1, - 2, 3, LastIndex); - for (size_t CurrentInstructionIndex = 0; - CurrentInstructionIndex < ModelMaxSupportedInstructionCount; - ++CurrentInstructionIndex) { - ASSERT_EQ( - (size_t)ModelRunner.getTensor(0)[CurrentInstructionIndex], - CurrentInstructionIndex); - } -} - -TEST_F(RegAllocDevelopmentFeaturesTest, FullOverlap) { - SmallVector OverlapSetup; - OverlapSetup.push_back({0, ModelMaxSupportedInstructionCount - 1, 0}); - OverlapSetup.push_back({0, ModelMaxSupportedInstructionCount - 1, 1}); - runOverlapTest(OverlapSetup); -} - -TEST_F(RegAllocDevelopmentFeaturesTest, PartialOverlap) { - SmallVector OverlapSetup; - OverlapSetup.push_back({0, 20, 0}); - OverlapSetup.push_back({15, 30, 1}); - runOverlapTest(OverlapSetup); -} - -TEST_F(RegAllocDevelopmentFeaturesTest, PartialOverlapOpposite) { - SmallVector OverlapSetup; - OverlapSetup.push_back({15, 30, 1}); - OverlapSetup.push_back({0, 20, 0}); - runOverlapTest(OverlapSetup); -} - -TEST_F(RegAllocDevelopmentFeaturesTest, InternalOverlap) { - SmallVector OverlapSetup; - OverlapSetup.push_back({0, 30, 0}); - OverlapSetup.push_back({10, 20, 1}); - runOverlapTest(OverlapSetup); -} - -TEST_F(RegAllocDevelopmentFeaturesTest, TripleInternalOverlap) { - SmallVector OverlapSetup; - OverlapSetup.push_back({0, 30, 0}); - OverlapSetup.push_back({10, 25, 1}); - OverlapSetup.push_back({15, 20, 2}); - runOverlapTest(OverlapSetup); -} - -TEST_F(RegAllocDevelopmentFeaturesTest, InternalMultiOverlap) { - SmallVector OverlapSetup; - OverlapSetup.push_back({0, 45, 0}); - OverlapSetup.push_back({30, 40, 1}); - OverlapSetup.push_back({35, 60, 2}); - runOverlapTest(OverlapSetup); -} - -TEST_F(RegAllocDevelopmentFeaturesTest, SingleMBBTest) { - NoInferenceModelRunner ModelRunner = setupModelRunner(); - SlotIndex CurrentIndex; - // set index to 1 so we can ensure that the mapping actually get set - std::map VisitedMBBs = {{nullptr, 1}}; - extractMBBFrequency( - CurrentIndex, 0, VisitedMBBs, - [](SlotIndex InputSlot) -> float { return 1.0f; }, nullptr, &ModelRunner, - 2, 3); - ASSERT_FLOAT_EQ(ModelRunner.getTensor(2)[1], 1.0f); - ASSERT_EQ(ModelRunner.getTensor(3)[0], 1); -} - -TEST_F(RegAllocDevelopmentFeaturesTest, MBBFullTruncated) { - SmallVector OverlapSetup; - OverlapSetup.push_back({0, ModelMaxSupportedInstructionCount - 1, 0}); - simple_ilist IndexList; - auto OverlapProblem = setupOverlapProblem(OverlapSetup, IndexList); - NoInferenceModelRunner ModelRunner = setupModelRunner(); - SlotIndex LastIndex = OverlapProblem[0].End; - SlotIndex FirstIndex = OverlapProblem[0].Begin; - - LLVMContext Ctx; - Module Mod("Module", Ctx); - auto MF = createMachineFunction(Ctx, Mod); - std::array - MBBsForTest; - for (size_t I = 0; I < ModelMaxSupportedInstructionCount; ++I) { - MBBsForTest[I] = MF->CreateMachineBasicBlock(); - } - - extractInstructionFeatures( - OverlapProblem, &ModelRunner, - [](SlotIndex InputSlot) -> int { return 0; }, - [FirstIndex](SlotIndex InputSlot) -> float { - return static_cast(FirstIndex.distance(InputSlot) / - SlotIndex::InstrDist); - }, - [FirstIndex, MBBsForTest](SlotIndex InputSlot) -> MachineBasicBlock * { - return MBBsForTest[FirstIndex.distance(InputSlot) / - SlotIndex::InstrDist]; - }, - 0, 1, 2, 3, LastIndex); - for (size_t MBBIndex = 0; MBBIndex < ModelMaxSupportedMBBCount; ++MBBIndex) { - ASSERT_FLOAT_EQ(ModelRunner.getTensor(2)[MBBIndex], - static_cast(MBBIndex)); - ASSERT_EQ(ModelRunner.getTensor(3)[MBBIndex], - static_cast(MBBIndex)); - } - // the rest of the mapping values should be zero (truncated to 100 MBBs) - for (size_t MBBIndex = ModelMaxSupportedMBBCount; - MBBIndex < ModelMaxSupportedInstructionCount; ++MBBIndex) { - ASSERT_EQ(ModelRunner.getTensor(3)[MBBIndex], - static_cast(0)); - } -} - -} // end namespace diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index ee7fa175ca918..0e76c64f09f59 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -1009,7 +1009,7 @@ TEST_F(VPRecipeTest, CastVPWidenRecipeToVPUser) { SmallVector Args; Args.push_back(Op1); Args.push_back(Op2); - VPWidenRecipe WidenR(*AI, make_range(Args.begin(), Args.end())); + VPWidenRecipe WidenR(*AI, Args, VPIRMetadata(), DebugLoc()); checkVPRecipeCastImpl(&WidenR); delete AI; @@ -1092,7 +1092,7 @@ TEST_F(VPRecipeTest, CastVPWidenCastRecipeToVPUser) { IntegerType *Int64 = IntegerType::get(C, 64); auto *Cast = CastInst::CreateZExtOrBitCast(PoisonValue::get(Int32), Int64); VPValue *Op1 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); - VPWidenCastRecipe Recipe(Instruction::ZExt, Op1, Int64, *Cast); + VPWidenCastRecipe Recipe(Instruction::ZExt, Op1, Int64, *Cast, {}); checkVPRecipeCastImpl(&Recipe); delete Cast; @@ -1263,7 +1263,7 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { SmallVector Args; Args.push_back(Op1); Args.push_back(Op2); - VPWidenRecipe Recipe(*AI, make_range(Args.begin(), Args.end())); + VPWidenRecipe Recipe(*AI, Args, VPIRMetadata(), DebugLoc()); EXPECT_FALSE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); EXPECT_FALSE(Recipe.mayWriteToMemory()); @@ -1468,7 +1468,7 @@ TEST_F(VPRecipeTest, dumpRecipeInPlan) { Args.push_back(ExtVPV1); Args.push_back(ExtVPV2); VPWidenRecipe *WidenR = - new VPWidenRecipe(*AI, make_range(Args.begin(), Args.end())); + new VPWidenRecipe(*AI, Args, VPIRMetadata(), DebugLoc()); VPBB1->appendRecipe(WidenR); { diff --git a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp index ff894853b9771..228969ab37f85 100644 --- a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp +++ b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp @@ -449,6 +449,29 @@ void CodeGenIntrinsic::setProperty(const Record *R) { int64_t Lower = R->getValueAsInt("Lower"); int64_t Upper = R->getValueAsInt("Upper"); addArgAttribute(ArgNo, Range, Lower, Upper); + } else if (R->isSubClassOf("ArgInfo")) { + unsigned ArgNo = R->getValueAsInt("ArgNo"); + if (ArgNo < 1) + PrintFatalError(R->getLoc(), + "ArgInfo requires ArgNo >= 1 (0 is return value)"); + const ListInit *Properties = R->getValueAsListInit("Properties"); + StringRef ArgName; + StringRef FuncName; + + for (const Init *PropInit : Properties->getElements()) { + if (const auto *PropDef = dyn_cast(PropInit)) { + const Record *PropRec = PropDef->getDef(); + + if (PropRec->isSubClassOf("ArgName")) + ArgName = PropRec->getValueAsString("Name"); + else if (PropRec->isSubClassOf("ImmArgPrinter")) + FuncName = PropRec->getValueAsString("FuncName"); + else + PrintFatalError(PropRec->getLoc(), + "Unknown ArgProperty type: " + PropRec->getName()); + } + } + addPrettyPrintFunction(ArgNo - 1, ArgName, FuncName); } else { llvm_unreachable("Unknown property!"); } @@ -476,3 +499,16 @@ void CodeGenIntrinsic::addArgAttribute(unsigned Idx, ArgAttrKind AK, uint64_t V, ArgumentAttributes.resize(Idx + 1); ArgumentAttributes[Idx].emplace_back(AK, V, V2); } + +void CodeGenIntrinsic::addPrettyPrintFunction(unsigned ArgIdx, + StringRef ArgName, + StringRef FuncName) { + auto It = llvm::find_if(PrettyPrintFunctions, [ArgIdx](const auto &Info) { + return Info.ArgIdx == ArgIdx; + }); + if (It != PrettyPrintFunctions.end()) + PrintFatalError(TheDef->getLoc(), "ArgInfo for argument " + Twine(ArgIdx) + + " is already defined as '" + + It->FuncName + "'"); + PrettyPrintFunctions.emplace_back(ArgIdx, ArgName, FuncName); +} diff --git a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h index 15e803c4feba1..6ac6f734326d8 100644 --- a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h +++ b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h @@ -152,6 +152,22 @@ struct CodeGenIntrinsic { void addArgAttribute(unsigned Idx, ArgAttrKind AK, uint64_t V = 0, uint64_t V2 = 0); + /// Structure to store pretty print and argument information. + struct PrettyPrintArgInfo { + unsigned ArgIdx; + StringRef ArgName; + StringRef FuncName; + + PrettyPrintArgInfo(unsigned Idx, StringRef Name, StringRef Func) + : ArgIdx(Idx), ArgName(Name), FuncName(Func) {} + }; + + /// Vector that stores ArgInfo (ArgIndex, ArgName, FunctionName). + SmallVector PrettyPrintFunctions; + + void addPrettyPrintFunction(unsigned ArgIdx, StringRef ArgName, + StringRef FuncName); + bool hasProperty(enum SDNP Prop) const { return Properties & (1 << Prop); } /// Goes through all IntrProperties that have IsDefault value set and sets diff --git a/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp b/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp index 452d2b08f25c3..3ac23185ef91c 100644 --- a/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp +++ b/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp @@ -60,8 +60,16 @@ class IntrinsicEmitter { raw_ostream &OS); void EmitIntrinsicToOverloadTable(const CodeGenIntrinsicTable &Ints, raw_ostream &OS); + void EmitIntrinsicToPrettyPrintTable(const CodeGenIntrinsicTable &Ints, + raw_ostream &OS); + void EmitIntrinsicBitTable( + const CodeGenIntrinsicTable &Ints, raw_ostream &OS, StringRef Guard, + StringRef TableName, StringRef Comment, + function_ref GetProperty); void EmitGenerator(const CodeGenIntrinsicTable &Ints, raw_ostream &OS); void EmitAttributes(const CodeGenIntrinsicTable &Ints, raw_ostream &OS); + void EmitPrettyPrintArguments(const CodeGenIntrinsicTable &Ints, + raw_ostream &OS); void EmitIntrinsicToBuiltinMap(const CodeGenIntrinsicTable &Ints, bool IsClang, raw_ostream &OS); }; @@ -109,6 +117,12 @@ void IntrinsicEmitter::run(raw_ostream &OS, bool Enums) { // Emit the intrinsic parameter attributes. EmitAttributes(Ints, OS); + // Emit the intrinsic ID -> pretty print table. + EmitIntrinsicToPrettyPrintTable(Ints, OS); + + // Emit Pretty Print attribute. + EmitPrettyPrintArguments(Ints, OS); + // Emit code to translate Clang builtins into LLVM intrinsics. EmitIntrinsicToBuiltinMap(Ints, true, OS); @@ -240,6 +254,29 @@ static constexpr IntrinsicTargetInfo TargetInfos[] = { )"; } +/// Helper function to emit a bit table for intrinsic properties. +/// This is used for both overload and pretty print bit tables. +void IntrinsicEmitter::EmitIntrinsicBitTable( + const CodeGenIntrinsicTable &Ints, raw_ostream &OS, StringRef Guard, + StringRef TableName, StringRef Comment, + function_ref GetProperty) { + OS << formatv("// {}\n", Comment); + OS << formatv("#ifdef {}\n", Guard); + OS << formatv("static constexpr uint8_t {}[] = {{\n", TableName); + OS << " 0\n "; + for (auto [I, Int] : enumerate(Ints)) { + // Add one to the index so we emit a null bit for the invalid #0 intrinsic. + size_t Idx = I + 1; + if (Idx % 8 == 0) + OS << ",\n 0"; + if (GetProperty(Int)) + OS << " | (1<<" << Idx % 8 << ')'; + } + OS << "\n};\n\n"; + OS << formatv("return ({}[id/8] & (1 << (id%8))) != 0;\n", TableName); + OS << formatv("#endif // {}\n\n", Guard); +} + void IntrinsicEmitter::EmitIntrinsicToNameTable( const CodeGenIntrinsicTable &Ints, raw_ostream &OS) { // Built up a table of the intrinsic names. @@ -276,24 +313,10 @@ static constexpr unsigned IntrinsicNameOffsetTable[] = { void IntrinsicEmitter::EmitIntrinsicToOverloadTable( const CodeGenIntrinsicTable &Ints, raw_ostream &OS) { - OS << R"(// Intrinsic ID to overload bitset. -#ifdef GET_INTRINSIC_OVERLOAD_TABLE -static constexpr uint8_t OTable[] = { - 0 - )"; - for (auto [I, Int] : enumerate(Ints)) { - // Add one to the index so we emit a null bit for the invalid #0 intrinsic. - size_t Idx = I + 1; - - if (Idx % 8 == 0) - OS << ",\n 0"; - if (Int.isOverloaded) - OS << " | (1<<" << Idx % 8 << ')'; - } - OS << "\n};\n\n"; - // OTable contains a true bit at the position if the intrinsic is overloaded. - OS << "return (OTable[id/8] & (1 << (id%8))) != 0;\n"; - OS << "#endif\n\n"; + EmitIntrinsicBitTable( + Ints, OS, "GET_INTRINSIC_OVERLOAD_TABLE", "OTable", + "Intrinsic ID to overload bitset.", + [](const CodeGenIntrinsic &Int) { return Int.isOverloaded; }); } using TypeSigTy = SmallVector; @@ -809,6 +832,52 @@ AttributeSet Intrinsic::getFnAttributes(LLVMContext &C, ID id) {{ NoFunctionAttrsID); } +void IntrinsicEmitter::EmitIntrinsicToPrettyPrintTable( + const CodeGenIntrinsicTable &Ints, raw_ostream &OS) { + EmitIntrinsicBitTable(Ints, OS, "GET_INTRINSIC_PRETTY_PRINT_TABLE", "PPTable", + "Intrinsic ID to pretty print bitset.", + [](const CodeGenIntrinsic &Int) { + return !Int.PrettyPrintFunctions.empty(); + }); +} + +void IntrinsicEmitter::EmitPrettyPrintArguments( + const CodeGenIntrinsicTable &Ints, raw_ostream &OS) { + OS << R"( +#ifdef GET_INTRINSIC_PRETTY_PRINT_ARGUMENTS +void Intrinsic::printImmArg(ID IID, unsigned ArgIdx, raw_ostream &OS, const Constant *ImmArgVal) { + using namespace Intrinsic; + switch (IID) { +)"; + + for (const CodeGenIntrinsic &Int : Ints) { + if (Int.PrettyPrintFunctions.empty()) + continue; + + OS << " case " << Int.EnumName << ":\n"; + OS << " switch (ArgIdx) {\n"; + for (const auto [ArgIdx, ArgName, FuncName] : Int.PrettyPrintFunctions) { + OS << " case " << ArgIdx << ":\n"; + OS << " OS << \"" << ArgName << "=\";\n"; + if (!FuncName.empty()) { + OS << " "; + if (!Int.TargetPrefix.empty()) + OS << Int.TargetPrefix << "::"; + OS << FuncName << "(OS, ImmArgVal);\n"; + } + OS << " return;\n"; + } + OS << " }\n"; + OS << " break;\n"; + } + OS << R"( default: + break; + } +} +#endif // GET_INTRINSIC_PRETTY_PRINT_ARGUMENTS +)"; +} + void IntrinsicEmitter::EmitIntrinsicToBuiltinMap( const CodeGenIntrinsicTable &Ints, bool IsClang, raw_ostream &OS) { StringRef CompilerName = IsClang ? "Clang" : "MS"; diff --git a/mlir/include/mlir/Conversion/ArithCommon/AttrToLLVMConverter.h b/mlir/include/mlir/Conversion/ArithCommon/AttrToLLVMConverter.h index 7ffc861331760..7020e24517d09 100644 --- a/mlir/include/mlir/Conversion/ArithCommon/AttrToLLVMConverter.h +++ b/mlir/include/mlir/Conversion/ArithCommon/AttrToLLVMConverter.h @@ -65,11 +65,8 @@ class AttrConvertFastMathToLLVM { convertArithFastMathAttrToLLVM(arithFMFAttr)); } } - ArrayRef getAttrs() const { return convertedAttr.getAttrs(); } - LLVM::IntegerOverflowFlags getOverflowFlags() const { - return LLVM::IntegerOverflowFlags::none; - } + Attribute getPropAttr() const { return {}; } private: NamedAttrList convertedAttr; @@ -82,23 +79,36 @@ template class AttrConvertOverflowToLLVM { public: AttrConvertOverflowToLLVM(SourceOp srcOp) { + using IntegerOverflowFlagsAttr = LLVM::IntegerOverflowFlagsAttr; + // Copy the source attributes. convertedAttr = NamedAttrList{srcOp->getAttrs()}; // Get the name of the arith overflow attribute. StringRef arithAttrName = SourceOp::getIntegerOverflowAttrName(); - // Remove the source overflow attribute. + // Remove the source overflow attribute from the set that will be present + // in the target. if (auto arithAttr = dyn_cast_if_present( convertedAttr.erase(arithAttrName))) { - overflowFlags = convertArithOverflowFlagsToLLVM(arithAttr.getValue()); + auto llvmFlag = convertArithOverflowFlagsToLLVM(arithAttr.getValue()); + // Create a dictionary attribute holding the overflow flags property. + // (In the LLVM dialect, the overflow flags are a property, not an + // attribute.) + MLIRContext *ctx = srcOp.getOperation()->getContext(); + Builder b(ctx); + auto llvmFlagAttr = IntegerOverflowFlagsAttr::get(ctx, llvmFlag); + StringRef llvmAttrName = TargetOp::getOverflowFlagsAttrName(); + NamedAttribute attr{llvmAttrName, llvmFlagAttr}; + // Set the properties attribute of the operation state so that the + // property can be updated when the operation is created. + propertiesAttr = b.getDictionaryAttr(ArrayRef(attr)); } } - ArrayRef getAttrs() const { return convertedAttr.getAttrs(); } - LLVM::IntegerOverflowFlags getOverflowFlags() const { return overflowFlags; } + Attribute getPropAttr() const { return propertiesAttr; } private: NamedAttrList convertedAttr; - LLVM::IntegerOverflowFlags overflowFlags = LLVM::IntegerOverflowFlags::none; + DictionaryAttr propertiesAttr; }; template @@ -129,9 +139,7 @@ class AttrConverterConstrainedFPToLLVM { } ArrayRef getAttrs() const { return convertedAttr.getAttrs(); } - LLVM::IntegerOverflowFlags getOverflowFlags() const { - return LLVM::IntegerOverflowFlags::none; - } + Attribute getPropAttr() const { return {}; } private: NamedAttrList convertedAttr; diff --git a/mlir/include/mlir/Conversion/LLVMCommon/Pattern.h b/mlir/include/mlir/Conversion/LLVMCommon/Pattern.h index c292e3727f46c..f8e0ccc093f8b 100644 --- a/mlir/include/mlir/Conversion/LLVMCommon/Pattern.h +++ b/mlir/include/mlir/Conversion/LLVMCommon/Pattern.h @@ -19,16 +19,14 @@ class CallOpInterface; namespace LLVM { namespace detail { -/// Handle generically setting flags as native properties on LLVM operations. -void setNativeProperties(Operation *op, IntegerOverflowFlags overflowFlags); - /// Replaces the given operation "op" with a new operation of type "targetOp" /// and given operands. -LogicalResult oneToOneRewrite( - Operation *op, StringRef targetOp, ValueRange operands, - ArrayRef targetAttrs, - const LLVMTypeConverter &typeConverter, ConversionPatternRewriter &rewriter, - IntegerOverflowFlags overflowFlags = IntegerOverflowFlags::none); +LogicalResult oneToOneRewrite(Operation *op, StringRef targetOp, + ValueRange operands, + ArrayRef targetAttrs, + Attribute propertiesAttr, + const LLVMTypeConverter &typeConverter, + ConversionPatternRewriter &rewriter); /// Replaces the given operation "op" with a call to an LLVM intrinsic with the /// specified name "intrinsic" and operands. @@ -307,9 +305,9 @@ class OneToOneConvertToLLVMPattern : public ConvertOpToLLVMPattern { LogicalResult matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor, ConversionPatternRewriter &rewriter) const override { - return LLVM::detail::oneToOneRewrite(op, TargetOp::getOperationName(), - adaptor.getOperands(), op->getAttrs(), - *this->getTypeConverter(), rewriter); + return LLVM::detail::oneToOneRewrite( + op, TargetOp::getOperationName(), adaptor.getOperands(), op->getAttrs(), + /*propertiesAttr=*/Attribute{}, *this->getTypeConverter(), rewriter); } }; diff --git a/mlir/include/mlir/Conversion/LLVMCommon/VectorPattern.h b/mlir/include/mlir/Conversion/LLVMCommon/VectorPattern.h index e7ab63abfeaa1..47b8381eefda8 100644 --- a/mlir/include/mlir/Conversion/LLVMCommon/VectorPattern.h +++ b/mlir/include/mlir/Conversion/LLVMCommon/VectorPattern.h @@ -54,25 +54,26 @@ LogicalResult handleMultidimensionalVectors( std::function createOperand, ConversionPatternRewriter &rewriter); -LogicalResult vectorOneToOneRewrite( - Operation *op, StringRef targetOp, ValueRange operands, - ArrayRef targetAttrs, - const LLVMTypeConverter &typeConverter, ConversionPatternRewriter &rewriter, - IntegerOverflowFlags overflowFlags = IntegerOverflowFlags::none); +LogicalResult vectorOneToOneRewrite(Operation *op, StringRef targetOp, + ValueRange operands, + ArrayRef targetAttrs, + Attribute propertiesAttr, + const LLVMTypeConverter &typeConverter, + ConversionPatternRewriter &rewriter); } // namespace detail } // namespace LLVM // Default attribute conversion class, which passes all source attributes -// through to the target op, unmodified. +// through to the target op, unmodified. The attribute to set properties of the +// target operation will be nullptr (i.e. any properties that exist in will have +// default values). template class AttrConvertPassThrough { public: AttrConvertPassThrough(SourceOp srcOp) : srcAttrs(srcOp->getAttrs()) {} ArrayRef getAttrs() const { return srcAttrs; } - LLVM::IntegerOverflowFlags getOverflowFlags() const { - return LLVM::IntegerOverflowFlags::none; - } + Attribute getPropAttr() const { return {}; } private: ArrayRef srcAttrs; @@ -80,10 +81,13 @@ class AttrConvertPassThrough { /// Basic lowering implementation to rewrite Ops with just one result to the /// LLVM Dialect. This supports higher-dimensional vector types. -/// The AttrConvert template template parameter should be a template class -/// with SourceOp and TargetOp type parameters, a constructor that takes -/// a SourceOp instance, and a getAttrs() method that returns -/// ArrayRef. +/// The AttrConvert template template parameter should: +// - be a template class with SourceOp and TargetOp type parameters +// - have a constructor that takes a SourceOp instance +// - a getAttrs() method that returns ArrayRef containing +// attributes that the target operation will have +// - a getPropAttr() method that returns either a NULL attribute or a +// DictionaryAttribute with properties that exist for the target operation template typename AttrConvert = AttrConvertPassThrough, @@ -137,8 +141,8 @@ class VectorConvertToLLVMPattern : public ConvertOpToLLVMPattern { return LLVM::detail::vectorOneToOneRewrite( op, TargetOp::getOperationName(), adaptor.getOperands(), - attrConvert.getAttrs(), *this->getTypeConverter(), rewriter, - attrConvert.getOverflowFlags()); + attrConvert.getAttrs(), attrConvert.getPropAttr(), + *this->getTypeConverter(), rewriter); } }; } // namespace mlir diff --git a/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td b/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td index a38cf41a3e09b..77d780425c3c3 100644 --- a/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td +++ b/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td @@ -158,6 +158,18 @@ class Arith_IntBinaryOpWithOverflowFlags traits = [ attr-dict `:` type($result) }]; } +class Arith_IntBinaryOpWithExactFlag traits = []> : + Arith_BinaryOp]>, + Arguments<(ins SignlessIntegerOrIndexLike:$lhs, + SignlessIntegerOrIndexLike:$rhs, + UnitAttr:$isExact)>, + Results<(outs SignlessIntegerOrIndexLike:$result)> { + + let assemblyFormat = [{ $lhs `,` $rhs (`exact` $isExact^)? + attr-dict `:` type($result) }]; +} + //===----------------------------------------------------------------------===// // ConstantOp //===----------------------------------------------------------------------===// @@ -482,7 +494,8 @@ def Arith_MulUIExtendedOp : Arith_Op<"mului_extended", [Pure, Commutative, // DivUIOp //===----------------------------------------------------------------------===// -def Arith_DivUIOp : Arith_IntBinaryOp<"divui", [ConditionallySpeculatable]> { +def Arith_DivUIOp : Arith_IntBinaryOpWithExactFlag<"divui", + [ConditionallySpeculatable]> { let summary = "unsigned integer division operation"; let description = [{ Unsigned integer division. Rounds towards zero. Treats the leading bit as @@ -493,12 +506,18 @@ def Arith_DivUIOp : Arith_IntBinaryOp<"divui", [ConditionallySpeculatable]> { `tensor` values, the behavior is undefined if _any_ elements are divided by zero. + If the `exact` attribute is present, the result value is poison if `lhs` is + not a multiple of `rhs`. + Example: ```mlir // Scalar unsigned integer division. %a = arith.divui %b, %c : i64 + // Scalar unsigned integer division where %b is known to be a multiple of %c. + %a = arith.divui %b, %c exact : i64 + // SIMD vector element-wise division. %f = arith.divui %g, %h : vector<4xi32> @@ -519,7 +538,8 @@ def Arith_DivUIOp : Arith_IntBinaryOp<"divui", [ConditionallySpeculatable]> { // DivSIOp //===----------------------------------------------------------------------===// -def Arith_DivSIOp : Arith_IntBinaryOp<"divsi", [ConditionallySpeculatable]> { +def Arith_DivSIOp : Arith_IntBinaryOpWithExactFlag<"divsi", + [ConditionallySpeculatable]> { let summary = "signed integer division operation"; let description = [{ Signed integer division. Rounds towards zero. Treats the leading bit as @@ -530,12 +550,18 @@ def Arith_DivSIOp : Arith_IntBinaryOp<"divsi", [ConditionallySpeculatable]> { behavior is undefined if _any_ of its elements are divided by zero or has a signed division overflow. + If the `exact` attribute is present, the result value is poison if `lhs` is + not a multiple of `rhs`. + Example: ```mlir // Scalar signed integer division. %a = arith.divsi %b, %c : i64 + // Scalar signed integer division where %b is known to be a multiple of %c. + %a = arith.divsi %b, %c exact : i64 + // SIMD vector element-wise division. %f = arith.divsi %g, %h : vector<4xi32> @@ -821,7 +847,7 @@ def Arith_ShLIOp : Arith_IntBinaryOpWithOverflowFlags<"shli"> { // ShRUIOp //===----------------------------------------------------------------------===// -def Arith_ShRUIOp : Arith_TotalIntBinaryOp<"shrui"> { +def Arith_ShRUIOp : Arith_IntBinaryOpWithExactFlag<"shrui", [Pure]> { let summary = "unsigned integer right-shift"; let description = [{ The `shrui` operation shifts an integer value of the first operand to the right @@ -830,12 +856,17 @@ def Arith_ShRUIOp : Arith_TotalIntBinaryOp<"shrui"> { filled with zeros. If the value of the second operand is greater or equal than the bitwidth of the first operand, then the operation returns poison. + If the `exact` attribute is present, the result value of shrui is a poison + value if any of the bits shifted out are non-zero. + Example: ```mlir - %1 = arith.constant 160 : i8 // %1 is 0b10100000 + %1 = arith.constant 160 : i8 // %1 is 0b10100000 %2 = arith.constant 3 : i8 - %3 = arith.shrui %1, %2 : (i8, i8) -> i8 // %3 is 0b00010100 + %3 = arith.constant 6 : i8 + %4 = arith.shrui %1, %2 exact : i8 // %4 is 0b00010100 + %5 = arith.shrui %1, %3 : i8 // %3 is 0b00000010 ``` }]; let hasFolder = 1; @@ -845,7 +876,7 @@ def Arith_ShRUIOp : Arith_TotalIntBinaryOp<"shrui"> { // ShRSIOp //===----------------------------------------------------------------------===// -def Arith_ShRSIOp : Arith_TotalIntBinaryOp<"shrsi"> { +def Arith_ShRSIOp : Arith_IntBinaryOpWithExactFlag<"shrsi", [Pure]> { let summary = "signed integer right-shift"; let description = [{ The `shrsi` operation shifts an integer value of the first operand to the right @@ -856,14 +887,17 @@ def Arith_ShRSIOp : Arith_TotalIntBinaryOp<"shrsi"> { operand is greater or equal than bitwidth of the first operand, then the operation returns poison. + If the `exact` attribute is present, the result value of shrsi is a poison + value if any of the bits shifted out are non-zero. + Example: ```mlir - %1 = arith.constant 160 : i8 // %1 is 0b10100000 + %1 = arith.constant 160 : i8 // %1 is 0b10100000 %2 = arith.constant 3 : i8 - %3 = arith.shrsi %1, %2 : (i8, i8) -> i8 // %3 is 0b11110100 - %4 = arith.constant 96 : i8 // %4 is 0b01100000 - %5 = arith.shrsi %4, %2 : (i8, i8) -> i8 // %5 is 0b00001100 + %3 = arith.shrsi %1, %2 exact : i8 // %3 is 0b11110100 + %4 = arith.constant 98 : i8 // %4 is 0b01100010 + %5 = arith.shrsi %4, %2 : i8 // %5 is 0b00001100 ``` }]; let hasFolder = 1; diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index e425e16a4b1a6..971710fa3ee13 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -39,7 +39,7 @@ class LLVM_TerminatorOp traits = []> : class LLVM_ArithmeticOpBase traits = []> : LLVM_Op, + !listconcat([SameOperandsAndResultType, NoMemoryEffect], traits)>, LLVM_Builder<"$res = builder.Create" # instName # "($lhs, $rhs);"> { dag commonArgs = (ins LLVM_ScalarOrVectorOf:$lhs, LLVM_ScalarOrVectorOf:$rhs); @@ -116,7 +116,8 @@ class LLVM_IntArithmeticOpWithDisjointFlag traits = []> : LLVM_ArithmeticOpBase], traits)> { + !listconcat([DeclareOpInterfaceMethods, Pure], + traits)> { dag fmfArg = ( ins DefaultValuedAttr:$fastmathFlags); let arguments = !con(commonArgs, fmfArg); @@ -149,24 +150,26 @@ class LLVM_UnaryFloatArithmeticOp; -def LLVM_SubOp : LLVM_IntArithmeticOpWithOverflowFlag<"sub", "Sub", []>; + [Commutative, Pure]>; +def LLVM_SubOp : LLVM_IntArithmeticOpWithOverflowFlag<"sub", "Sub", [Pure]>; def LLVM_MulOp : LLVM_IntArithmeticOpWithOverflowFlag<"mul", "Mul", - [Commutative]>; -def LLVM_UDivOp : LLVM_IntArithmeticOpWithExactFlag<"udiv", "UDiv">; -def LLVM_SDivOp : LLVM_IntArithmeticOpWithExactFlag<"sdiv", "SDiv">; -def LLVM_URemOp : LLVM_IntArithmeticOp<"urem", "URem">; -def LLVM_SRemOp : LLVM_IntArithmeticOp<"srem", "SRem">; -def LLVM_AndOp : LLVM_IntArithmeticOp<"and", "And">; -def LLVM_OrOp : LLVM_IntArithmeticOpWithDisjointFlag<"or", "Or"> { + [Commutative, Pure]>; +def LLVM_UDivOp : LLVM_IntArithmeticOpWithExactFlag<"udiv", "UDiv", + [DeclareOpInterfaceMethods]>; +def LLVM_SDivOp : LLVM_IntArithmeticOpWithExactFlag<"sdiv", "SDiv", + [DeclareOpInterfaceMethods]>; +def LLVM_URemOp : LLVM_IntArithmeticOp<"urem", "URem", [Pure]>; +def LLVM_SRemOp : LLVM_IntArithmeticOp<"srem", "SRem", [Pure]>; +def LLVM_AndOp : LLVM_IntArithmeticOp<"and", "And", [Pure]>; +def LLVM_OrOp : LLVM_IntArithmeticOpWithDisjointFlag<"or", "Or", [Pure]> { let hasFolder = 1; } -def LLVM_XOrOp : LLVM_IntArithmeticOp<"xor", "Xor">; -def LLVM_ShlOp : LLVM_IntArithmeticOpWithOverflowFlag<"shl", "Shl", []> { +def LLVM_XOrOp : LLVM_IntArithmeticOp<"xor", "Xor", [Pure]>; +def LLVM_ShlOp : LLVM_IntArithmeticOpWithOverflowFlag<"shl", "Shl", [Pure]> { let hasFolder = 1; } -def LLVM_LShrOp : LLVM_IntArithmeticOpWithExactFlag<"lshr", "LShr">; -def LLVM_AShrOp : LLVM_IntArithmeticOpWithExactFlag<"ashr", "AShr">; +def LLVM_LShrOp : LLVM_IntArithmeticOpWithExactFlag<"lshr", "LShr", [Pure]>; +def LLVM_AShrOp : LLVM_IntArithmeticOpWithExactFlag<"ashr", "AShr", [Pure]>; // Base class for compare operations. A compare operation takes two operands // of the same type and returns a boolean result. If the operands are diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index d4ef5104d3c1f..6e3a92b5bde42 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -79,6 +79,40 @@ def NVVM_Dialect : Dialect { sequence must be expressed directly, NVVM provides an `nvvm.inline_ptx` op to embed PTX inline as a last-resort escape hatch, with explicit operands and results. + + + **Memory Spaces:** The NVVM dialect introduces the following memory spaces, + each with distinct scopes and lifetimes: +``` + | Memory Space | Address Space | Scope | Lifetime | + |-------------------|---------------|----------------------|-------------------| + | `generic` | 0 | All threads | Context-dependent | + | `global` | 1 | All threads (device) | Application | + | `shared` | 3 | Thread block (CTA) | Kernel execution | + | `constant` | 4 | All threads (RO) | Application | + | `local` | 5 | Single thread | Kernel execution | + | `tensor` | 6 | Thread block (CTA) | Kernel execution | + | `shared_cluster` | 7 | Thread block cluster | Kernel execution | +``` + **Memory Space Details:** + - **generic**: Can point to any memory space; requires runtime resolution of + actual address space. Use when pointer origin is unknown at compile time. + Performance varies based on the underlying memory space. + - **global**: Accessible by all threads across all blocks; persists across + kernel launches. Highest latency but largest capacity (device memory). Best + for large data and inter-kernel communication. + - **shared**: Shared within a thread block (CTA); very fast on-chip memory for + cooperation between threads in the same block. Limited capacity. Ideal for + block-level collaboration, caching, and reducing global memory traffic. + - **constant**: Read-only memory cached per SM. Size typically limited to + 64KB. Best for read-only data and uniform values accessed by all threads. + - **local**: Private to each thread. Use for per-thread private data and + automatic variables that don't fit in registers. + - **tensor**: Special memory space for tensor core operations. Used by + `tcgen05` instructions on SM 100+ for tensor input/output operations. + - **shared_cluster**: Distributed shared memory across thread blocks within + a cluster (SM 90+). Enables collaboration beyond single-block scope with + fast access across cluster threads. }]; let name = "nvvm"; @@ -228,6 +262,33 @@ def NVVMMemorySpaceAttr : let assemblyFormat = "`<` $value `>`"; } +// Attrs describing the scope of the Memory Operation +def MemScopeKindCTA : I32EnumAttrCase<"CTA", 0, "cta">; +def MemScopeKindCluster : I32EnumAttrCase<"CLUSTER", 1, "cluster">; +def MemScopeKindGPU : I32EnumAttrCase<"GPU", 2, "gpu">; +def MemScopeKindSYS : I32EnumAttrCase<"SYS", 3, "sys">; + +def MemScopeKind : I32EnumAttr<"MemScopeKind", "NVVM Memory Scope kind", + [MemScopeKindCTA, MemScopeKindCluster, MemScopeKindGPU, MemScopeKindSYS]> { + let genSpecializedAttr = 0; + let cppNamespace = "::mlir::NVVM"; +} +def MemScopeKindAttr : EnumAttr { + let assemblyFormat = "`<` $value `>`"; +} + +// Attrs to disambiguate the cta or cluster space within shared memory +def SharedSpaceCTA : I32EnumAttrCase<"shared_cta", 0, "cta">; +def SharedSpaceCluster : I32EnumAttrCase<"shared_cluster", 1, "cluster">; +def SharedSpace : I32EnumAttr<"SharedSpace", "Shared memory space", + [SharedSpaceCTA, SharedSpaceCluster]> { + let genSpecializedAttr = 0; + let cppNamespace = "::mlir::NVVM"; +} +def SharedSpaceAttr : EnumAttr { + let assemblyFormat = "`<` $value `>`"; +} + //===----------------------------------------------------------------------===// // NVVM intrinsic operations //===----------------------------------------------------------------------===// @@ -1107,17 +1168,6 @@ def NVVM_FenceScClusterOp : NVVM_Op<"fence.sc.cluster"> { let assemblyFormat = "attr-dict"; } -def SharedSpaceCTA : I32EnumAttrCase<"shared_cta", 0, "cta">; -def SharedSpaceCluster : I32EnumAttrCase<"shared_cluster", 1, "cluster">; -def SharedSpace : I32EnumAttr<"SharedSpace", "Shared memory space", - [SharedSpaceCTA, SharedSpaceCluster]> { - let genSpecializedAttr = 0; - let cppNamespace = "::mlir::NVVM"; -} -def SharedSpaceAttr : EnumAttr { - let assemblyFormat = "`<` $value `>`"; -} - def ProxyAlias : I32EnumAttrCase<"alias", 0, "alias">; def ProxyAsync : I32EnumAttrCase<"async", 1, "async">; def ProxyAsyncGlobal : I32EnumAttrCase<"async_global", 2, "async.global">; @@ -1158,21 +1208,6 @@ def NVVM_FenceProxyOp : NVVM_PTXBuilder_Op<"fence.proxy">, let hasVerifier = 1; } -// Attrs describing the scope of the Memory Operation -def MemScopeKindCTA : I32EnumAttrCase<"CTA", 0, "cta">; -def MemScopeKindCluster : I32EnumAttrCase<"CLUSTER", 1, "cluster">; -def MemScopeKindGPU : I32EnumAttrCase<"GPU", 2, "gpu">; -def MemScopeKindSYS : I32EnumAttrCase<"SYS", 3, "sys">; - -def MemScopeKind : I32EnumAttr<"MemScopeKind", "NVVM Memory Scope kind", - [MemScopeKindCTA, MemScopeKindCluster, MemScopeKindGPU, MemScopeKindSYS]> { - let genSpecializedAttr = 0; - let cppNamespace = "::mlir::NVVM"; -} -def MemScopeKindAttr : EnumAttr { - let assemblyFormat = "`<` $value `>`"; -} - def NVVM_FenceProxyAcquireOp : NVVM_Op<"fence.proxy.acquire">, Arguments<(ins MemScopeKindAttr:$scope, LLVM_PointerGeneric:$addr, I32:$size, DefaultValuedAttr { ]; } +def ACCImplicitRoutine : Pass<"acc-implicit-routine", "mlir::ModuleOp"> { + let summary = "Generate implicit acc routine for functions in acc regions"; + let description = [{ + This pass implements the implicit rules described in OpenACC specification + for `Routine Directive` (OpenACC 3.4 spec, section 2.15.1). + + "If no explicit routine directive applies to a procedure whose definition + appears in the program unit being compiled, then the implementation applies + an implicit routine directive to that procedure if any of the following + conditions holds: + - The procedure is called or its address is accessed in a compute region." + + The specification further states: + "When the implementation applies an implicit routine directive to a procedure, + it must recursively apply implicit routine directives to other procedures for + which the above rules specify relevant dependencies. Such dependencies can + form a cycle, so the implementation must take care to avoid infinite recursion." + + This pass implements these requirements by: + 1. Walking through all OpenACC compute constructs and functions already + marked with `acc routine` in the module and identifying function calls + within these regions. + 2. Creating implicit `acc.routine` operations for functions that don't already + have routine declarations. + 3. Recursively walking through all existing `acc routine` and creating + implicit routine operations for function calls within these routines, + while avoiding infinite recursion through proper tracking. + }]; + let dependentDialects = ["mlir::acc::OpenACCDialect"]; + let options = [ + Option<"deviceType", "device-type", "mlir::acc::DeviceType", + "mlir::acc::DeviceType::None", + "Target device type for implicit routine generation. " + "Ensures that `acc routine` device_type clauses are " + "properly considered not just default clauses."> + ]; +} + #endif // MLIR_DIALECT_OPENACC_TRANSFORMS_PASSES diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h index af64370a62dd7..419ecda80e9a5 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h @@ -58,9 +58,10 @@ enum class SparseEmitStrategy { namespace sparse_tensor { /// Defines a strategy for loop ordering during sparse code generation. +/// See Passes.td for strategy descriptions. enum class LoopOrderingStrategy : unsigned { - kDefault, ///< Default strategy (eagerly selects last loop in topological - ///< sort). + kDefault, + kDenseOuter, }; } // namespace sparse_tensor diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td index 75e77d67db1b3..0b8562e484f51 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td @@ -85,7 +85,9 @@ def SparseReinterpretMap : Pass<"sparse-reinterpret-map", "ModuleOp"> { "mlir::sparse_tensor::LoopOrderingStrategy::kDefault", "Set the loop ordering strategy for sparse code generation", [{llvm::cl::values( clEnumValN(mlir::sparse_tensor::LoopOrderingStrategy::kDefault, "default", - "Default strategy (eagerly selects last loop in topological sort)"))}]>, + "Default strategy (eagerly selects last loop in topological sort)"), + clEnumValN(mlir::sparse_tensor::LoopOrderingStrategy::kDenseOuter, "dense-outer", + "Prefer dense, then compressed, then singleton dimensions outermost"))}]>, ]; } diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td index 689ebd0d1179a..4c67856b559b1 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td @@ -844,7 +844,7 @@ def XeGPU_LoadGatherOp : XeGPU_Op<"load", [MemoryEffects<[MemRead]>]> { OptionalAttr:$l1_hint, OptionalAttr:$l2_hint, OptionalAttr:$l3_hint, - OptionalAttr:$layout); + OptionalAttr:$layout); let results = (outs AnyTypeOf<[XeGPU_ValueType, XeGPU_ScalarType]>:$value); let extraClassDeclaration = extraBaseClassDeclaration # [{ @@ -903,7 +903,7 @@ def XeGPU_LoadGatherOp : XeGPU_Op<"load", [MemoryEffects<[MemRead]>]> { "xegpu::CachePolicyAttr": $l1_hint, "xegpu::CachePolicyAttr": $l2_hint, "xegpu::CachePolicyAttr": $l3_hint, - "xegpu::LayoutAttr": $layout)> + "xegpu::DistributeLayoutAttr": $layout)> ]; let hasVerifier = 1; @@ -988,7 +988,7 @@ def XeGPU_StoreScatterOp : XeGPU_Op<"store", [MemoryEffects<[MemWrite]>]> { OptionalAttr:$l1_hint, OptionalAttr:$l2_hint, OptionalAttr:$l3_hint, - OptionalAttr:$layout); + OptionalAttr:$layout); let extraClassDeclaration = extraBaseClassDeclaration#[{ Type getDestType() { @@ -1046,7 +1046,7 @@ def XeGPU_StoreScatterOp : XeGPU_Op<"store", [MemoryEffects<[MemWrite]>]> { "xegpu::CachePolicyAttr": $l1_hint, "xegpu::CachePolicyAttr": $l2_hint, "xegpu::CachePolicyAttr": $l3_hint, - "xegpu::LayoutAttr": $layout)> + "xegpu::DistributeLayoutAttr": $layout)> ]; let hasVerifier = 1; diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp index a5831559558ac..edc6565f44f00 100644 --- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp +++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp @@ -43,6 +43,7 @@ constexpr Chipset kGfx908 = Chipset(9, 0, 8); constexpr Chipset kGfx90a = Chipset(9, 0, 0xa); constexpr Chipset kGfx942 = Chipset(9, 4, 2); constexpr Chipset kGfx950 = Chipset(9, 5, 0); +constexpr Chipset kGfx1250 = Chipset(12, 5, 0); /// Convert an unsigned number `val` to i32. static Value convertUnsignedToI32(ConversionPatternRewriter &rewriter, @@ -1149,7 +1150,7 @@ static std::optional wmmaOpToIntrinsic(WMMAOp wmma, k, isRDNA3); // Handle gfx1250. - if (chipset == Chipset{12, 5, 0}) + if (chipset == kGfx1250) return wmmaOpToIntrinsicGfx1250(elemSourceType, elemBSourceType, elemDestType, k); @@ -1300,7 +1301,7 @@ struct WMMAOpLowering : public ConvertOpToLLVMPattern { if (chipset.majorVersion != 11 && chipset.majorVersion != 12) return op->emitOpError("WMMA only supported on gfx11 and gfx12"); - bool isGFX1250 = chipset >= Chipset(12, 5, 0); + bool isGFX1250 = chipset >= kGfx1250; // The WMMA operations represent vectors of bf16s as vectors of i16s // (except on gfx1250), so we need to bitcast bfloats to i16 and then @@ -1505,6 +1506,19 @@ struct ExtPackedFp8OpLowering final ConversionPatternRewriter &rewriter) const override; }; +struct ScaledExtPacked816OpLowering final + : public ConvertOpToLLVMPattern { + ScaledExtPacked816OpLowering(const LLVMTypeConverter &converter, + Chipset chipset) + : ConvertOpToLLVMPattern(converter), + chipset(chipset) {} + Chipset chipset; + + LogicalResult + matchAndRewrite(ScaledExtPacked816Op op, ScaledExtPacked816OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override; +}; + struct PackedTrunc2xFp8OpLowering final : public ConvertOpToLLVMPattern { PackedTrunc2xFp8OpLowering(const LLVMTypeConverter &converter, @@ -1613,6 +1627,170 @@ LogicalResult ExtPackedFp8OpLowering::matchAndRewrite( return success(); } +int32_t getScaleSel(int32_t blockSize, unsigned bitWidth, + int32_t firstScaleLane, int32_t firstScaleByte) { + // When lowering amdgpu.scaled_ext_packed816 to rocdl.cvt.scale.pk*.f*.f* + // operations, the attributes blockSize, sourceType, firstScaleLane and + // firstScaleByte are merged into a single attribute scaleSel. This is how + // those values are merged together. + assert(llvm::is_contained({16, 32}, blockSize)); + assert(llvm::is_contained(llvm::ArrayRef{4, 6, 8}, bitWidth)); + + const bool is_fp8 = bitWidth == 8; + const bool is_block_16 = blockSize == 16; + + if (!is_fp8) { + int bit_0 = is_block_16; + assert(llvm::is_contained({0, 1, 2}, firstScaleByte)); + int bit_1 = (firstScaleByte == 2) << 1; + assert(llvm::is_contained({0, 1}, firstScaleLane)); + int bit_2 = firstScaleLane << 2; + return bit_2 | bit_1 | bit_0; + } + + int bit_0 = is_block_16; + // firstScaleByte is guaranteed to be defined by two bits. + assert(llvm::is_contained({0, 1, 2, 3}, firstScaleByte)); + int bit_2_and_1 = firstScaleByte << 1; + assert(llvm::is_contained({0, 1}, firstScaleLane)); + int bit_3 = firstScaleLane << 3; + int bits = bit_3 | bit_2_and_1 | bit_0; + // These are invalid cases. + assert(!llvm::is_contained( + {0b0011, 0b0101, 0b0111, 0b1000, 0b1001, 0b1011, 0b1111}, bits)); + return bits; +} + +static std::optional +scaledExtPacked816ToIntrinsic(Type srcElemType, Type destElemType) { + using fp4 = Float4E2M1FNType; + using fp8 = Float8E4M3FNType; + using bf8 = Float8E5M2Type; + using fp6 = Float6E2M3FNType; + using bf6 = Float6E3M2FNType; + if (isa(srcElemType)) { + if (destElemType.isF16()) + return ROCDL::CvtPkScalePk8F16Fp4Op::getOperationName(); + if (destElemType.isBF16()) + return ROCDL::CvtPkScalePk8Bf16Fp4Op::getOperationName(); + if (destElemType.isF32()) + return ROCDL::CvtPkScalePk8F32Fp4Op::getOperationName(); + return std::nullopt; + } + if (isa(srcElemType)) { + if (destElemType.isF16()) + return ROCDL::CvtPkScalePk8F16Fp8Op::getOperationName(); + if (destElemType.isBF16()) + return ROCDL::CvtPkScalePk8Bf16Fp8Op::getOperationName(); + if (destElemType.isF32()) + return ROCDL::CvtPkScalePk8F32Fp8Op::getOperationName(); + return std::nullopt; + } + if (isa(srcElemType)) { + if (destElemType.isF16()) + return ROCDL::CvtPkScalePk8F16Bf8Op::getOperationName(); + if (destElemType.isBF16()) + return ROCDL::CvtPkScalePk8Bf16Bf8Op::getOperationName(); + if (destElemType.isF32()) + return ROCDL::CvtPkScalePk8F32Bf8Op::getOperationName(); + return std::nullopt; + } + if (isa(srcElemType)) { + if (destElemType.isF16()) + return ROCDL::CvtPkScalePk16F16Fp6Op::getOperationName(); + if (destElemType.isBF16()) + return ROCDL::CvtPkScalePk16Bf16Fp6Op::getOperationName(); + if (destElemType.isF32()) + return ROCDL::CvtPkScalePk16F32Fp6Op::getOperationName(); + return std::nullopt; + } + if (isa(srcElemType)) { + if (destElemType.isF16()) + return ROCDL::CvtPkScalePk16F16Bf6Op::getOperationName(); + if (destElemType.isBF16()) + return ROCDL::CvtPkScalePk16Bf16Bf6Op::getOperationName(); + if (destElemType.isF32()) + return ROCDL::CvtPkScalePk16F32Bf6Op::getOperationName(); + return std::nullopt; + } + llvm_unreachable("invalid combination of element types for packed conversion " + "instructions"); +} + +LogicalResult ScaledExtPacked816OpLowering::matchAndRewrite( + ScaledExtPacked816Op op, ScaledExtPacked816OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const { + using fp4 = Float4E2M1FNType; + using fp8 = Float8E4M3FNType; + using bf8 = Float8E5M2Type; + using fp6 = Float6E2M3FNType; + using bf6 = Float6E3M2FNType; + Location loc = op.getLoc(); + if (chipset != kGfx1250) { + return rewriter.notifyMatchFailure( + loc, + "Scaled fp packed conversion instructions are not available on target " + "architecture and their emulation is not implemented"); + } + int32_t firstScaleLane = op.getFirstScaleLane(); + int32_t firstScaleByte = op.getFirstScaleByte(); + int32_t blockSize = op.getBlockSize(); + auto sourceType = cast(op.getSource().getType()); + auto srcElemType = cast(sourceType.getElementType()); + unsigned bitWidth = srcElemType.getWidth(); + int32_t scaleSel = + getScaleSel(blockSize, bitWidth, firstScaleLane, firstScaleByte); + + auto targetType = cast(op.getResult().getType()); + auto destElemType = cast(targetType.getElementType()); + IntegerType i32 = rewriter.getI32Type(); + Value castedScale = + LLVM::BitcastOp::create(rewriter, loc, i32, adaptor.getScale()); + + Value source = adaptor.getSource(); + Type llvmResultType = typeConverter->convertType(op.getResult().getType()); + Type packedType = nullptr; + if (isa(srcElemType)) { + packedType = i32; + packedType = getTypeConverter()->convertType(packedType); + } else if (isa(srcElemType)) { + packedType = VectorType::get(2, i32); + packedType = getTypeConverter()->convertType(packedType); + } else if (isa(srcElemType)) { + packedType = VectorType::get(3, i32); + packedType = getTypeConverter()->convertType(packedType); + } else { + llvm_unreachable("invalid element type for packed scaled ext"); + } + + if (!packedType || !llvmResultType) { + return rewriter.notifyMatchFailure(op, "type conversion failed"); + } + + Value castedSource = + LLVM::BitcastOp::create(rewriter, loc, packedType, source); + + std::optional maybeIntrinsic = + scaledExtPacked816ToIntrinsic(srcElemType, destElemType); + if (!maybeIntrinsic.has_value()) + return op.emitOpError( + "no intrinsic matching packed scaled conversion on the given chipset"); + + OperationState loweredOp(loc, *maybeIntrinsic); + loweredOp.addTypes({llvmResultType}); + loweredOp.addOperands({castedSource, castedScale}); + + SmallVector attrs; + attrs.push_back( + NamedAttribute("scaleSel", rewriter.getI32IntegerAttr(scaleSel))); + + loweredOp.addAttributes(attrs); + Operation *lowered = rewriter.create(loweredOp); + rewriter.replaceOp(op, lowered); + + return success(); +} + LogicalResult ScaledExtPackedOpLowering::matchAndRewrite( ScaledExtPackedOp op, ScaledExtPackedOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const { @@ -2151,9 +2329,10 @@ void mlir::populateAMDGPUToROCDLConversionPatterns(LLVMTypeConverter &converter, ROCDL::RawPtrBufferAtomicCmpSwap>, AMDGPUDPPLowering, MemoryCounterWaitOpLowering, LDSBarrierOpLowering, SchedBarrierOpLowering, MFMAOpLowering, ScaledMFMAOpLowering, - WMMAOpLowering, ExtPackedFp8OpLowering, ScaledExtPackedOpLowering, - PackedScaledTruncOpLowering, PackedTrunc2xFp8OpLowering, - PackedStochRoundFp8OpLowering, GatherToLDSOpLowering, - TransposeLoadOpLowering, AMDGPUPermlaneLowering>(converter, chipset); + WMMAOpLowering, ExtPackedFp8OpLowering, ScaledExtPacked816OpLowering, + ScaledExtPackedOpLowering, PackedScaledTruncOpLowering, + PackedTrunc2xFp8OpLowering, PackedStochRoundFp8OpLowering, + GatherToLDSOpLowering, TransposeLoadOpLowering, + AMDGPUPermlaneLowering>(converter, chipset); patterns.add(converter); } diff --git a/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp b/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp index f2bacc3399144..cc3e8468f298b 100644 --- a/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp +++ b/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp @@ -281,6 +281,7 @@ ConstantOpLowering::matchAndRewrite(arith::ConstantOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const { return LLVM::detail::oneToOneRewrite(op, LLVM::ConstantOp::getOperationName(), adaptor.getOperands(), op->getAttrs(), + /*propAttr=*/Attribute{}, *getTypeConverter(), rewriter); } diff --git a/mlir/lib/Conversion/ComplexToLLVM/ComplexToLLVM.cpp b/mlir/lib/Conversion/ComplexToLLVM/ComplexToLLVM.cpp index 86d02e6c6209f..6a0c21185983e 100644 --- a/mlir/lib/Conversion/ComplexToLLVM/ComplexToLLVM.cpp +++ b/mlir/lib/Conversion/ComplexToLLVM/ComplexToLLVM.cpp @@ -96,7 +96,8 @@ struct ConstantOpLowering : public ConvertOpToLLVMPattern { ConversionPatternRewriter &rewriter) const override { return LLVM::detail::oneToOneRewrite( op, LLVM::ConstantOp::getOperationName(), adaptor.getOperands(), - op->getAttrs(), *getTypeConverter(), rewriter); + op->getAttrs(), /*propAttr=*/Attribute{}, *getTypeConverter(), + rewriter); } }; diff --git a/mlir/lib/Conversion/LLVMCommon/Pattern.cpp b/mlir/lib/Conversion/LLVMCommon/Pattern.cpp index 48a03198fd465..f28a6ccb42455 100644 --- a/mlir/lib/Conversion/LLVMCommon/Pattern.cpp +++ b/mlir/lib/Conversion/LLVMCommon/Pattern.cpp @@ -296,19 +296,13 @@ LogicalResult ConvertToLLVMPattern::copyUnrankedDescriptors( // Detail methods //===----------------------------------------------------------------------===// -void LLVM::detail::setNativeProperties(Operation *op, - IntegerOverflowFlags overflowFlags) { - if (auto iface = dyn_cast(op)) - iface.setOverflowFlags(overflowFlags); -} - /// Replaces the given operation "op" with a new operation of type "targetOp" /// and given operands. LogicalResult LLVM::detail::oneToOneRewrite( Operation *op, StringRef targetOp, ValueRange operands, - ArrayRef targetAttrs, - const LLVMTypeConverter &typeConverter, ConversionPatternRewriter &rewriter, - IntegerOverflowFlags overflowFlags) { + ArrayRef targetAttrs, Attribute propertiesAttr, + const LLVMTypeConverter &typeConverter, + ConversionPatternRewriter &rewriter) { unsigned numResults = op->getNumResults(); SmallVector resultTypes; @@ -320,11 +314,10 @@ LogicalResult LLVM::detail::oneToOneRewrite( } // Create the operation through state since we don't know its C++ type. - Operation *newOp = - rewriter.create(op->getLoc(), rewriter.getStringAttr(targetOp), operands, - resultTypes, targetAttrs); - - setNativeProperties(newOp, overflowFlags); + OperationState state(op->getLoc(), rewriter.getStringAttr(targetOp), operands, + resultTypes, targetAttrs); + state.propertiesAttr = propertiesAttr; + Operation *newOp = rewriter.create(state); // If the operation produced 0 or 1 result, return them immediately. if (numResults == 0) diff --git a/mlir/lib/Conversion/LLVMCommon/VectorPattern.cpp b/mlir/lib/Conversion/LLVMCommon/VectorPattern.cpp index e7dd0b506e12d..24b01259f0499 100644 --- a/mlir/lib/Conversion/LLVMCommon/VectorPattern.cpp +++ b/mlir/lib/Conversion/LLVMCommon/VectorPattern.cpp @@ -105,9 +105,9 @@ LogicalResult LLVM::detail::handleMultidimensionalVectors( LogicalResult LLVM::detail::vectorOneToOneRewrite( Operation *op, StringRef targetOp, ValueRange operands, - ArrayRef targetAttrs, - const LLVMTypeConverter &typeConverter, ConversionPatternRewriter &rewriter, - IntegerOverflowFlags overflowFlags) { + ArrayRef targetAttrs, Attribute propertiesAttr, + const LLVMTypeConverter &typeConverter, + ConversionPatternRewriter &rewriter) { assert(!operands.empty()); // Cannot convert ops if their operands are not of LLVM type. @@ -116,15 +116,14 @@ LogicalResult LLVM::detail::vectorOneToOneRewrite( auto llvmNDVectorTy = operands[0].getType(); if (!isa(llvmNDVectorTy)) - return oneToOneRewrite(op, targetOp, operands, targetAttrs, typeConverter, - rewriter, overflowFlags); - - auto callback = [op, targetOp, targetAttrs, overflowFlags, + return oneToOneRewrite(op, targetOp, operands, targetAttrs, propertiesAttr, + typeConverter, rewriter); + auto callback = [op, targetOp, targetAttrs, propertiesAttr, &rewriter](Type llvm1DVectorTy, ValueRange operands) { - Operation *newOp = - rewriter.create(op->getLoc(), rewriter.getStringAttr(targetOp), - operands, llvm1DVectorTy, targetAttrs); - LLVM::detail::setNativeProperties(newOp, overflowFlags); + OperationState state(op->getLoc(), rewriter.getStringAttr(targetOp), + operands, llvm1DVectorTy, targetAttrs); + state.propertiesAttr = propertiesAttr; + Operation *newOp = rewriter.create(state); return newOp->getResult(0); }; diff --git a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp index 5c35823678576..d55f3cec47c1f 100644 --- a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp +++ b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp @@ -343,28 +343,41 @@ void RawBufferAtomicCmpswapOp::getCanonicalizationPatterns( //===----------------------------------------------------------------------===// LogicalResult ScaledExtPacked816Op::verify() { int blockSize = getBlockSize(); - assert((blockSize == 16 || blockSize == 32) && "invalid block size"); + assert(llvm::is_contained({16, 32}, blockSize) && "invalid block size"); int firstScaleByte = getFirstScaleByte(); + int firstScaleLane = getFirstScaleLane(); auto sourceType = cast(getSource().getType()); Type elementType = sourceType.getElementType(); auto floatType = cast(elementType); - int bitWidth = floatType.getWidth(); + unsigned bitWidth = floatType.getWidth(); - if (llvm::is_contained({4, 6}, bitWidth) && blockSize == 16 && - !llvm::is_contained({0, 1}, firstScaleByte)) { - return emitOpError("blockSize of 16 can only have firstScaleByte be 0 or 1 " - "for f4 and f6."); - } - if (llvm::is_contained({4, 6}, bitWidth) && blockSize == 32 && - !llvm::is_contained({0, 2}, firstScaleByte)) { - return emitOpError("blockSize of 32 can only have firstScaleByte be 0 or 2 " - "for f4 and f6."); - } - if (bitWidth == 8 && blockSize == 16 && - !llvm::is_contained({0, 2}, firstScaleByte)) { - return emitOpError( - "blockSize of 16 can only have firstScaleByte be 0 or 2 for f8."); + assert(llvm::is_contained(llvm::ArrayRef{4, 6, 8}, bitWidth)); + + const bool is_fp8 = bitWidth == 8; + const bool is_block_16 = blockSize == 16; + + if (!is_fp8) { + if (is_block_16) { + if (!llvm::is_contained({0, 1}, firstScaleByte)) { + return emitOpError("blockSize of 16 can only have firstScaleByte be 0 " + "or 1 for f4 and f6."); + } + } else { + if (!llvm::is_contained({0, 2}, firstScaleByte)) { + return emitOpError("blockSize of 32 can only have firstScaleByte be 0 " + "or 2 for f4 and f6."); + } + } + } else { + if (is_block_16) { + bool is_valid = ((firstScaleLane == 0) && (firstScaleByte == 0)) || + ((firstScaleLane == 1) && (firstScaleByte == 2)); + if (!is_valid) { + return emitOpError("blockSize of 16 can only have (firstScaleLane, " + "firstScaleByte) be (0, 0) or (1, 2) for f8."); + } + } } return success(); diff --git a/mlir/lib/Dialect/Arith/IR/ArithCanonicalization.td b/mlir/lib/Dialect/Arith/IR/ArithCanonicalization.td index de3efc9fe3506..e256915933a71 100644 --- a/mlir/lib/Dialect/Arith/IR/ArithCanonicalization.td +++ b/mlir/lib/Dialect/Arith/IR/ArithCanonicalization.td @@ -389,8 +389,8 @@ def TruncIExtUIToExtUI : // trunci(shrsi(x, c)) -> trunci(shrui(x, c)) def TruncIShrSIToTrunciShrUI : Pat<(Arith_TruncIOp:$tr - (Arith_ShRSIOp $x, (ConstantLikeMatcher TypedAttrInterface:$c0)), $overflow), - (Arith_TruncIOp (Arith_ShRUIOp $x, (Arith_ConstantOp (cast<"TypedAttr"> $c0))), $overflow), + (Arith_ShRSIOp $x, (ConstantLikeMatcher TypedAttrInterface:$c0), $exact), $overflow), + (Arith_TruncIOp (Arith_ShRUIOp $x, (Arith_ConstantOp (cast<"TypedAttr"> $c0)), $exact), $overflow), [(TruncationMatchesShiftAmount $x, $tr, $c0)]>; //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 7bfc3f6664d74..3ea9b772398ee 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -4223,6 +4223,34 @@ LogicalResult InlineAsmOp::verify() { return success(); } +//===----------------------------------------------------------------------===// +// UDivOp +//===----------------------------------------------------------------------===// +Speculation::Speculatability UDivOp::getSpeculatability() { + // X / 0 => UB + Value divisor = getRhs(); + if (matchPattern(divisor, m_IntRangeWithoutZeroU())) + return Speculation::Speculatable; + + return Speculation::NotSpeculatable; +} + +//===----------------------------------------------------------------------===// +// SDivOp +//===----------------------------------------------------------------------===// +Speculation::Speculatability SDivOp::getSpeculatability() { + // This function conservatively assumes that all signed division by -1 are + // not speculatable. + // X / 0 => UB + // INT_MIN / -1 => UB + Value divisor = getRhs(); + if (matchPattern(divisor, m_IntRangeWithoutZeroS()) && + matchPattern(divisor, m_IntRangeWithoutNegOneS())) + return Speculation::Speculatable; + + return Speculation::NotSpeculatable; +} + //===----------------------------------------------------------------------===// // LLVMDialect initialization, type parsing, and registration. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitRoutine.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitRoutine.cpp new file mode 100644 index 0000000000000..12efaf487a8ca --- /dev/null +++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitRoutine.cpp @@ -0,0 +1,237 @@ +//===- ACCImplicitRoutine.cpp - OpenACC Implicit Routine Transform -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass implements the implicit rules described in OpenACC specification +// for `Routine Directive` (OpenACC 3.4 spec, section 2.15.1). +// +// "If no explicit routine directive applies to a procedure whose definition +// appears in the program unit being compiled, then the implementation applies +// an implicit routine directive to that procedure if any of the following +// conditions holds: +// - The procedure is called or its address is accessed in a compute region." +// +// The specification further states: +// "When the implementation applies an implicit routine directive to a +// procedure, it must recursively apply implicit routine directives to other +// procedures for which the above rules specify relevant dependencies. Such +// dependencies can form a cycle, so the implementation must take care to avoid +// infinite recursion." +// +// This pass implements these requirements by: +// 1. Walking through all OpenACC compute constructs and functions already +// marked with `acc routine` in the module and identifying function calls +// within these regions. +// 2. Creating implicit `acc.routine` operations for functions that don't +// already have routine declarations. +// 3. Recursively walking through all existing `acc routine` and creating +// implicit routine operations for function calls within these routines, +// while avoiding infinite recursion through proper tracking. +// +// Requirements: +// ------------- +// To use this pass in a pipeline, the following requirements must be met: +// +// 1. Operation Interface Implementation: Operations that define functions +// or call functions should implement `mlir::FunctionOpInterface` and +// `mlir::CallOpInterface` respectively. +// +// 2. Analysis Registration (Optional): If custom behavior is needed for +// determining if a symbol use is valid within GPU regions, the dialect +// should pre-register the `acc::OpenACCSupport` analysis. +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/OpenACC/Transforms/Passes.h" + +#include "mlir/Dialect/OpenACC/Analysis/OpenACCSupport.h" +#include "mlir/Dialect/OpenACC/OpenACC.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/Value.h" +#include "mlir/Interfaces/CallInterfaces.h" +#include "mlir/Interfaces/FunctionInterfaces.h" +#include + +#define DEBUG_TYPE "acc-implicit-routine" + +namespace mlir { +namespace acc { +#define GEN_PASS_DEF_ACCIMPLICITROUTINE +#include "mlir/Dialect/OpenACC/Transforms/Passes.h.inc" +} // namespace acc +} // namespace mlir + +namespace { + +using namespace mlir; + +class ACCImplicitRoutine + : public acc::impl::ACCImplicitRoutineBase { +private: + unsigned routineCounter = 0; + static constexpr llvm::StringRef accRoutinePrefix = "acc_routine_"; + + // Count existing routine operations and update counter + void initRoutineCounter(ModuleOp module) { + module.walk([&](acc::RoutineOp routineOp) { routineCounter++; }); + } + + // Check if routine has a default bind clause or a device-type specific bind + // clause. Returns true if `acc routine` has a default bind clause or + // a device-type specific bind clause. + bool isACCRoutineBindDefaultOrDeviceType(acc::RoutineOp op, + acc::DeviceType deviceType) { + // Fast check to avoid device-type specific lookups. + if (!op.getBindIdName() && !op.getBindStrName()) + return false; + return op.getBindNameValue().has_value() || + op.getBindNameValue(deviceType).has_value(); + } + + // Generate a unique name for the routine and create the routine operation + acc::RoutineOp createRoutineOp(OpBuilder &builder, Location loc, + FunctionOpInterface &callee) { + std::string routineName = + (accRoutinePrefix + std::to_string(routineCounter++)).str(); + auto routineOp = acc::RoutineOp::create( + builder, loc, + /* sym_name=*/builder.getStringAttr(routineName), + /* func_name=*/ + mlir::SymbolRefAttr::get(builder.getContext(), + builder.getStringAttr(callee.getName())), + /* bindIdName=*/nullptr, + /* bindStrName=*/nullptr, + /* bindIdNameDeviceType=*/nullptr, + /* bindStrNameDeviceType=*/nullptr, + /* worker=*/nullptr, + /* vector=*/nullptr, + /* seq=*/nullptr, + /* nohost=*/nullptr, + /* implicit=*/builder.getUnitAttr(), + /* gang=*/nullptr, + /* gangDim=*/nullptr, + /* gangDimDeviceType=*/nullptr); + + // Assert that the callee does not already have routine info attribute + assert(!callee->hasAttr(acc::getRoutineInfoAttrName()) && + "function is already associated with a routine"); + + callee->setAttr( + acc::getRoutineInfoAttrName(), + mlir::acc::RoutineInfoAttr::get( + builder.getContext(), + {mlir::SymbolRefAttr::get(builder.getContext(), + builder.getStringAttr(routineName))})); + return routineOp; + } + + // Used to walk through a compute region looking for function calls. + void + implicitRoutineForCallsInComputeRegions(Operation *op, SymbolTable &symTab, + mlir::OpBuilder &builder, + acc::OpenACCSupport &accSupport) { + op->walk([&](CallOpInterface callOp) { + if (!callOp.getCallableForCallee()) + return; + + auto calleeSymbolRef = + dyn_cast(callOp.getCallableForCallee()); + // When call is done through ssa value, the callee is not a symbol. + // Skip it because we don't know the call target. + if (!calleeSymbolRef) + return; + + auto callee = symTab.lookup( + calleeSymbolRef.getLeafReference().str()); + // If the callee does not exist or is already a valid symbol for GPU + // regions, skip it + + assert(callee && "callee function must be found in symbol table"); + if (accSupport.isValidSymbolUse(callOp.getOperation(), calleeSymbolRef)) + return; + builder.setInsertionPoint(callee); + createRoutineOp(builder, callee.getLoc(), callee); + }); + } + + // Recursively handle calls within a routine operation + void implicitRoutineForCallsInRoutine(acc::RoutineOp routineOp, + mlir::OpBuilder &builder, + acc::OpenACCSupport &accSupport, + acc::DeviceType targetDeviceType) { + // When bind clause is used, it means that the target is different than the + // function to which the `acc routine` is used with. Skip this case to + // avoid implicitly recursively marking calls that would not end up on + // device. + if (isACCRoutineBindDefaultOrDeviceType(routineOp, targetDeviceType)) + return; + + SymbolTable symTab(routineOp->getParentOfType()); + std::queue routineQueue; + routineQueue.push(routineOp); + while (!routineQueue.empty()) { + auto currentRoutine = routineQueue.front(); + routineQueue.pop(); + auto func = symTab.lookup( + currentRoutine.getFuncName().getLeafReference()); + func.walk([&](CallOpInterface callOp) { + if (!callOp.getCallableForCallee()) + return; + + auto calleeSymbolRef = + dyn_cast(callOp.getCallableForCallee()); + // When call is done through ssa value, the callee is not a symbol. + // Skip it because we don't know the call target. + if (!calleeSymbolRef) + return; + + auto callee = symTab.lookup( + calleeSymbolRef.getLeafReference().str()); + // If the callee does not exist or is already a valid symbol for GPU + // regions, skip it + assert(callee && "callee function must be found in symbol table"); + if (accSupport.isValidSymbolUse(callOp.getOperation(), calleeSymbolRef)) + return; + builder.setInsertionPoint(callee); + auto newRoutineOp = createRoutineOp(builder, callee.getLoc(), callee); + routineQueue.push(newRoutineOp); + }); + } + } + +public: + using ACCImplicitRoutineBase::ACCImplicitRoutineBase; + + void runOnOperation() override { + auto module = getOperation(); + mlir::OpBuilder builder(module.getContext()); + SymbolTable symTab(module); + initRoutineCounter(module); + + acc::OpenACCSupport &accSupport = getAnalysis(); + + // Handle compute regions + module.walk([&](Operation *op) { + if (isa(op)) + implicitRoutineForCallsInComputeRegions(op, symTab, builder, + accSupport); + }); + + // Use the device type option from the pass options. + acc::DeviceType targetDeviceType = deviceType; + + // Handle existing routines + module.walk([&](acc::RoutineOp routineOp) { + implicitRoutineForCallsInRoutine(routineOp, builder, accSupport, + targetDeviceType); + }); + } +}; + +} // namespace diff --git a/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt b/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt index f8fff5958f8c7..028af0362f26e 100644 --- a/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt @@ -1,5 +1,6 @@ add_mlir_dialect_library(MLIROpenACCTransforms ACCImplicitData.cpp + ACCImplicitRoutine.cpp LegalizeDataValues.cpp ADDITIONAL_HEADER_DIRS diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/IterationGraphSorter.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/IterationGraphSorter.cpp index ffa8b402e0b6b..99048034b4f0c 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/IterationGraphSorter.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/IterationGraphSorter.cpp @@ -80,6 +80,53 @@ inline static bool includesDenseOutput(SortMask mask) { return includesAny(mask, SortMask::kIncludeDenseOutput); } +/// Returns a sparsity rank for loop ordering: lower values indicate +/// dimensions that should be placed in outer loops. +/// 0 = Dense, 1 = Compressed, 2 = Singleton, 3 = Other/Unknown. +static unsigned getLoopSparsityRank(unsigned loop, ArrayRef allTensors, + ArrayRef allMaps) { + // Start with highest rank. + unsigned minRank = 3; + + for (auto [tensor, map] : llvm::zip(allTensors, allMaps)) { + // Check if this loop accesses this tensor. + bool loopAccessesTensor = false; + unsigned tensorDim = 0; + for (AffineExpr expr : map.getResults()) { + if (auto dimExpr = dyn_cast(expr)) { + if (dimExpr.getPosition() == loop) { + loopAccessesTensor = true; + break; + } + } + tensorDim++; + } + + if (loopAccessesTensor) { + const auto enc = getSparseTensorEncoding(tensor.getType()); + if (!enc) { + // Dense tensor - lowest rank. + return 0; + } else { + // Sparse tensor - check the level type for this dimension. + auto lvlTypes = enc.getLvlTypes(); + if (tensorDim < lvlTypes.size()) { + auto lvlType = lvlTypes[tensorDim]; + if (isDenseLT(lvlType)) { + return 0; // Dense level. + } else if (isCompressedLT(lvlType)) { + minRank = std::min(minRank, 1u); // Compressed level. + } else if (isSingletonLT(lvlType)) { + minRank = std::min(minRank, 2u); // Singleton level. + } + } + } + } + } + + return minRank; +} + AffineMap IterationGraphSorter::topoSort() { // The sorted result will put the first Reduction iterator to the // latest possible position. @@ -107,10 +154,33 @@ AffineMap IterationGraphSorter::topoSort() { case sparse_tensor::LoopOrderingStrategy::kDefault: src = it.back(); break; + case sparse_tensor::LoopOrderingStrategy::kDenseOuter: { + // Prefer dense, then compressed, then singleton dimensions outermost. + // Create combined tensor and map lists for analysis. + SmallVector allTensors = ins; + allTensors.push_back(out); + SmallVector allMaps = loop2InsLvl; + allMaps.push_back(loop2OutLvl); + + // Find loop with minimum (lowest) sparsity rank. + unsigned minLoop = it[0]; + unsigned minRank = getLoopSparsityRank(minLoop, allTensors, allMaps); + + for (auto candidateLoop : it) { + unsigned rank = getLoopSparsityRank(candidateLoop, allTensors, allMaps); + if (rank < minRank || (rank == minRank && candidateLoop < minLoop)) { + minLoop = candidateLoop; + minRank = rank; + } + } + src = minLoop; + break; + } } loopOrder.push_back(src); - it.pop_back(); + // Remove the selected loop from the worklist. + it.erase(std::find(it.begin(), it.end(), src)); // Update in-degree, and push 0-degree node into worklist. for (unsigned dst = 0; dst < numLoops; dst++) { if (itGraph[src][dst] && --inDegree[dst] == 0) { diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp index 4dd10bedc6d84..85c9a966f0fe8 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp @@ -901,7 +901,7 @@ void LoadGatherOp::build(OpBuilder &builder, OperationState &state, IntegerAttr chunk_size, xegpu::CachePolicyAttr l1_hint, xegpu::CachePolicyAttr l2_hint, xegpu::CachePolicyAttr l3_hint, - xegpu::LayoutAttr layout) { + DistributeLayoutAttr layout) { auto loc = source.getLoc(); int64_t size = static_cast(offsets.size()); auto type = VectorType::get(size, builder.getIndexType()); @@ -985,7 +985,7 @@ void StoreScatterOp::build( OpBuilder &builder, OperationState &state, Value value, Value dest, ArrayRef offsets, Value mask, IntegerAttr chunk_size, xegpu::CachePolicyAttr l1_hint, xegpu::CachePolicyAttr l2_hint, - xegpu::CachePolicyAttr l3_hint, xegpu::LayoutAttr layout) { + xegpu::CachePolicyAttr l3_hint, DistributeLayoutAttr layout) { auto loc = dest.getLoc(); int64_t size = static_cast(offsets.size()); auto type = VectorType::get(size, builder.getIndexType()); diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp index c3bf9606693a8..330553564f81a 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp @@ -678,7 +678,7 @@ struct UnrollLoadGatherOpWithOffset pack(offsets, convertedOffsetTypes, *targetShape, loc, rewriter); } - auto layout = dyn_cast_if_present(op.getLayoutAttr()); + auto layout = op.getLayoutAttr(); if (layout) layout = layout.dropInstData(); @@ -778,7 +778,7 @@ struct UnrollStoreScatterOpWithOffsets SmallVector convertedValues = pack(op.getValue(), convertedValTypes, *targetShape, loc, rewriter); - auto layout = dyn_cast_if_present(op.getLayoutAttr()); + auto layout = op.getLayoutAttr(); if (layout) layout = layout.dropInstData(); diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp index 0a9ef0aa6df96..33d4b0457e5d3 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp @@ -889,8 +889,8 @@ struct WgToSgLoadGatherOpWithOffset return failure(); ArrayRef wgShape = resultType.getShape(); - xegpu::LayoutAttr layout = dyn_cast_if_present( - xegpu::getDistributeLayoutAttr(op.getResult())); + xegpu::DistributeLayoutAttr layout = + xegpu::getDistributeLayoutAttr(op.getResult()); if (!layout || !layout.isForWorkgroup()) return failure(); @@ -913,10 +913,12 @@ struct WgToSgLoadGatherOpWithOffset VectorType newTy = VectorType::get(sgShape, resultType.getElementType()); for (auto [offsets, mask] : llvm::zip(adaptor.getOffsets(), adaptor.getMask())) { + auto newLayout = layout.dropSgLayoutAndData(); auto newLoadOp = xegpu::LoadGatherOp::create( rewriter, loc, newTy, op.getSource(), offsets, mask, chunkSizeAttr, op.getL1HintAttr(), op.getL2HintAttr(), op.getL3HintAttr(), - layout.dropSgLayoutAndData()); + newLayout); + xegpu::setDistributeLayoutAttr(newLoadOp->getResult(0), newLayout); newLoadOps.push_back(newLoadOp); } rewriter.replaceOpWithMultiple(op, {newLoadOps}); @@ -941,8 +943,8 @@ struct WgToSgStoreScatterOpWithOffset if (!valueType) return failure(); - xegpu::LayoutAttr layout = dyn_cast_if_present( - xegpu::getDistributeLayoutAttr(op.getOperand(0))); + xegpu::DistributeLayoutAttr layout = + xegpu::getDistributeLayoutAttr(op.getOperand(0)); if (!layout || !layout.isForWorkgroup()) return failure(); diff --git a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir index 2fd3df6dcfa71..432b8876696a9 100644 --- a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir +++ b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir @@ -456,3 +456,4 @@ func.func @sched_barrier() { amdgpu.sched_barrier allow = func.return } + diff --git a/mlir/test/Conversion/AMDGPUToROCDL/cvt_scale_pk-gfx1250.mlir b/mlir/test/Conversion/AMDGPUToROCDL/cvt_scale_pk-gfx1250.mlir new file mode 100644 index 0000000000000..d2391140ce056 --- /dev/null +++ b/mlir/test/Conversion/AMDGPUToROCDL/cvt_scale_pk-gfx1250.mlir @@ -0,0 +1,164 @@ +// RUN: mlir-opt %s --convert-amdgpu-to-rocdl=chipset=gfx1250 --split-input-file --verify-diagnostics \ +// RUN: | FileCheck %s + +// CHECK-LABEL: @scaled_ext_packed816_fp4 +// CHECK-SAME: (%[[SOURCE:.+]]: vector<8xf4E2M1FN>, %[[SCALE:.+]]: vector<4xf8E8M0FNU>) +func.func @scaled_ext_packed816_fp4(%v: vector<8xf4E2M1FN>, %scale: vector<4xf8E8M0FNU>) -> (vector<8xf16>, vector<8xbf16>, vector<8xf32>) { + // CHECK: %[[SCALE_4xi8:.+]] = builtin.unrealized_conversion_cast %[[SCALE]] : vector<4xf8E8M0FNU> to vector<4xi8> + // CHECK: %[[SOURCE_8xi4:.+]] = builtin.unrealized_conversion_cast %[[SOURCE]] : vector<8xf4E2M1FN> to vector<8xi4> + // CHECK: %[[SCALE_i32:.+]] = llvm.bitcast %[[SCALE_4xi8]] : vector<4xi8> to i32 + // CHECK: %[[SOURCE_i32:.+]] = llvm.bitcast %[[SOURCE_8xi4]] : vector<8xi4> to i32 + // CHECK: rocdl.cvt.scale.pk8.f16.fp4 %[[SOURCE_i32]], %[[SCALE_i32]][0] : vector<8xf16> + %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<8xf4E2M1FN>, vector<4xf8E8M0FNU> -> vector<8xf16> + + // CHECK: %[[SCALE_i32:.+]] = llvm.bitcast %[[SCALE_4xi8]] : vector<4xi8> to i32 + // CHECK: %[[SOURCE_i32:.+]] = llvm.bitcast %[[SOURCE_8xi4]] : vector<8xi4> to i32 + // CHECK: rocdl.cvt.scale.pk8.bf16.fp4 %[[SOURCE_i32]], %[[SCALE_i32]][0] : vector<8xbf16> + %ret1 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<8xf4E2M1FN>, vector<4xf8E8M0FNU> -> vector<8xbf16> + + // CHECK: %[[SCALE_i32:.+]] = llvm.bitcast %[[SCALE_4xi8]] : vector<4xi8> to i32 + // CHECK: %[[SOURCE_i32:.+]] = llvm.bitcast %[[SOURCE_8xi4]] : vector<8xi4> to i32 + // CHECK: rocdl.cvt.scale.pk8.f32.fp4 %[[SOURCE_i32]], %[[SCALE_i32]][0] : vector<8xf32> + %ret2 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<8xf4E2M1FN>, vector<4xf8E8M0FNU> -> vector<8xf32> + func.return %ret0, %ret1, %ret2: vector<8xf16>, vector<8xbf16>, vector<8xf32> +} + +// CHECK-LABEL: @scaled_ext_packed816_fp8 +// CHECK-SAME: (%[[SOURCE:.+]]: vector<8xf8E4M3FN>, %[[SCALE:.+]]: vector<4xf8E8M0FNU>) +func.func @scaled_ext_packed816_fp8(%v: vector<8xf8E4M3FN>, %scale: vector<4xf8E8M0FNU>) -> (vector<8xf16>, vector<8xbf16>, vector<8xf32>) { + // CHECK: %[[SCALE_4xi8:.+]] = builtin.unrealized_conversion_cast %[[SCALE]] : vector<4xf8E8M0FNU> to vector<4xi8> + // CHECK: %[[SOURCE_8xi8:.+]] = builtin.unrealized_conversion_cast %[[SOURCE]] : vector<8xf8E4M3FN> to vector<8xi8> + // CHECK: %[[SCALE_i32:.+]] = llvm.bitcast %[[SCALE_4xi8]] : vector<4xi8> to i32 + // CHECK: %[[SOURCE_v2xi32:.+]] = llvm.bitcast %[[SOURCE_8xi8]] : vector<8xi8> to vector<2xi32> + // CHECK: rocdl.cvt.scale.pk8.f16.fp8 %[[SOURCE_v2xi32]], %[[SCALE_i32]][0] : vector<8xf16> + %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<8xf8E4M3FN>, vector<4xf8E8M0FNU> -> vector<8xf16> + + // CHECK: %[[SCALE_i32:.+]] = llvm.bitcast %[[SCALE_4xi8]] : vector<4xi8> to i32 + // CHECK: %[[SOURCE_v2xi32:.+]] = llvm.bitcast %[[SOURCE_8xi8]] : vector<8xi8> to vector<2xi32> + // CHECK: rocdl.cvt.scale.pk8.bf16.fp8 %[[SOURCE_v2xi32]], %[[SCALE_i32]][0] : vector<8xbf16> + %ret1 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<8xf8E4M3FN>, vector<4xf8E8M0FNU> -> vector<8xbf16> + + // CHECK: %[[SCALE_i32:.+]] = llvm.bitcast %[[SCALE_4xi8]] : vector<4xi8> to i32 + // CHECK: %[[SOURCE_v2xi32:.+]] = llvm.bitcast %[[SOURCE_8xi8]] : vector<8xi8> to vector<2xi32> + // CHECK: rocdl.cvt.scale.pk8.f32.fp8 %[[SOURCE_v2xi32]], %[[SCALE_i32]][0] : vector<8xf32> + %ret2 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<8xf8E4M3FN>, vector<4xf8E8M0FNU> -> vector<8xf32> + + func.return %ret0, %ret1, %ret2 : vector<8xf16>, vector<8xbf16>, vector<8xf32> +} + +// CHECK-LABEL: @scaled_ext_packed816_bf8 +// CHECK-SAME: (%[[SOURCE:.+]]: vector<8xf8E5M2>, %[[SCALE:.+]]: vector<4xf8E8M0FNU>) +func.func @scaled_ext_packed816_bf8(%v: vector<8xf8E5M2>, %scale: vector<4xf8E8M0FNU>) -> (vector<8xf16>, vector<8xbf16>, vector<8xf32>) { + // CHECK: %[[SCALE_4xi8:.+]] = builtin.unrealized_conversion_cast %[[SCALE]] : vector<4xf8E8M0FNU> to vector<4xi8> + // CHECK: %[[SOURCE_8xi8:.+]] = builtin.unrealized_conversion_cast %[[SOURCE]] : vector<8xf8E5M2> to vector<8xi8> + // CHECK: %[[SCALE_i32:.+]] = llvm.bitcast %[[SCALE_4xi8]] : vector<4xi8> to i32 + // CHECK: %[[SOURCE_v2xi32:.+]] = llvm.bitcast %[[SOURCE_8xi8]] : vector<8xi8> to vector<2xi32> + // CHECK: %[[RES:.+]] = rocdl.cvt.scale.pk8.f16.bf8 %[[SOURCE_v2xi32]], %[[SCALE_i32]][0] : vector<8xf16> + %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<8xf8E5M2>, vector<4xf8E8M0FNU> -> vector<8xf16> + + // CHECK: %[[SCALE_i32:.+]] = llvm.bitcast %[[SCALE_4xi8]] : vector<4xi8> to i32 + // CHECK: %[[SOURCE_v2xi32:.+]] = llvm.bitcast %[[SOURCE_8xi8]] : vector<8xi8> to vector<2xi32> + // CHECK: rocdl.cvt.scale.pk8.bf16.bf8 %[[SOURCE_v2xi32]], %[[SCALE_i32]][0] : vector<8xbf16> + %ret1 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<8xf8E5M2>, vector<4xf8E8M0FNU> -> vector<8xbf16> + + // CHECK: %[[SCALE_i32:.+]] = llvm.bitcast %[[SCALE_4xi8]] : vector<4xi8> to i32 + // CHECK: %[[SOURCE_v2xi32:.+]] = llvm.bitcast %[[SOURCE_8xi8]] : vector<8xi8> to vector<2xi32> + // CHECK: rocdl.cvt.scale.pk8.f32.bf8 %[[SOURCE_v2xi32]], %[[SCALE_i32]][0] : vector<8xf32> + %ret2 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<8xf8E5M2>, vector<4xf8E8M0FNU> -> vector<8xf32> + func.return %ret0, %ret1, %ret2 : vector<8xf16>, vector<8xbf16>, vector<8xf32> +} + + +// CHECK-LABEL: @scaled_ext_packed816_fp6 +// CHECK-SAME: (%[[SOURCE:.+]]: vector<16xf6E2M3FN>, %[[SCALE:.+]]: vector<4xf8E8M0FNU>) +func.func @scaled_ext_packed816_fp6(%v: vector<16xf6E2M3FN>, %scale: vector<4xf8E8M0FNU>) -> (vector<16xf16>, vector<16xbf16>, vector<16xf32>) { + // CHECK-DAG: %[[SCALE_4xi8:.+]] = builtin.unrealized_conversion_cast %[[SCALE]] : vector<4xf8E8M0FNU> to vector<4xi8> + // CHECK-DAG: %[[SOURCE_16xi6:.+]] = builtin.unrealized_conversion_cast %[[SOURCE]] : vector<16xf6E2M3FN> to vector<16xi6> + + // CHECK: %[[SCALE_i32:.+]] = llvm.bitcast %[[SCALE_4xi8]] : vector<4xi8> to i32 + // CHECK: %[[SOURCE_v3xi32:.+]] = llvm.bitcast %[[SOURCE_16xi6]] : vector<16xi6> to vector<3xi32> + // CHECK: rocdl.cvt.scale.pk16.f16.fp6 %[[SOURCE_v3xi32]], %[[SCALE_i32]][0] : vector<16xf16> + %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<16xf6E2M3FN>, vector<4xf8E8M0FNU> -> vector<16xf16> + + // CHECK: %[[SCALE_i32:.+]] = llvm.bitcast %[[SCALE_4xi8]] : vector<4xi8> to i32 + // CHECK: %[[SOURCE_v3xi32:.+]] = llvm.bitcast %[[SOURCE_16xi6]] : vector<16xi6> to vector<3xi32> + // CHECK: rocdl.cvt.scale.pk16.bf16.fp6 %[[SOURCE_v3xi32]], %[[SCALE_i32]][0] : vector<16xbf16> + %ret1 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<16xf6E2M3FN>, vector<4xf8E8M0FNU> -> vector<16xbf16> + + // CHECK: %[[SCALE_i32:.+]] = llvm.bitcast %[[SCALE_4xi8]] : vector<4xi8> to i32 + // CHECK: %[[SOURCE_v3xi32:.+]] = llvm.bitcast %[[SOURCE_16xi6]] : vector<16xi6> to vector<3xi32> + // CHECK: rocdl.cvt.scale.pk16.f32.fp6 %[[SOURCE_v3xi32]], %[[SCALE_i32]][0] : vector<16xf32> + %ret2 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<16xf6E2M3FN>, vector<4xf8E8M0FNU> -> vector<16xf32> + return %ret0, %ret1, %ret2: vector<16xf16>, vector<16xbf16>, vector<16xf32> +} + +// CHECK-LABEL: @scaled_ext_packed816_bf6 +// CHECK-SAME: (%[[SOURCE:.+]]: vector<16xf6E3M2FN>, %[[SCALE:.+]]: vector<4xf8E8M0FNU>) +func.func @scaled_ext_packed816_bf6(%v: vector<16xf6E3M2FN>, %scale: vector<4xf8E8M0FNU>) -> (vector<16xf16>, vector<16xbf16>, vector<16xf32>) { + // CHECK-DAG: %[[SCALE_4xi8:.+]] = builtin.unrealized_conversion_cast %[[SCALE]] : vector<4xf8E8M0FNU> to vector<4xi8> + // CHECK-DAG: %[[SOURCE_16xi6:.+]] = builtin.unrealized_conversion_cast %[[SOURCE]] : vector<16xf6E3M2FN> to vector<16xi6> + + // CHECK: %[[SCALE_i32:.+]] = llvm.bitcast %[[SCALE_4xi8]] : vector<4xi8> to i32 + // CHECK: %[[SOURCE_v3xi32:.+]] = llvm.bitcast %[[SOURCE_16xi6]] : vector<16xi6> to vector<3xi32> + // CHECK: rocdl.cvt.scale.pk16.f16.bf6 %[[SOURCE_v3xi32]], %[[SCALE_i32]][0] : vector<16xf16> + %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<16xf6E3M2FN>, vector<4xf8E8M0FNU> -> vector<16xf16> + + // CHECK: %[[SCALE_i32:.+]] = llvm.bitcast %[[SCALE_4xi8]] : vector<4xi8> to i32 + // CHECK: %[[SOURCE_v3xi32:.+]] = llvm.bitcast %[[SOURCE_16xi6]] : vector<16xi6> to vector<3xi32> + // CHECK: rocdl.cvt.scale.pk16.bf16.bf6 %[[SOURCE_v3xi32]], %[[SCALE_i32]][0] : vector<16xbf16> + %ret1 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<16xf6E3M2FN>, vector<4xf8E8M0FNU> -> vector<16xbf16> + + // CHECK: %[[SCALE_i32:.+]] = llvm.bitcast %[[SCALE_4xi8]] : vector<4xi8> to i32 + // CHECK: %[[SOURCE_v3xi32:.+]] = llvm.bitcast %[[SOURCE_16xi6]] : vector<16xi6> to vector<3xi32> + // CHECK: rocdl.cvt.scale.pk16.f32.bf6 %[[SOURCE_v3xi32]], %[[SCALE_i32]][0] : vector<16xf32> + %ret2 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<16xf6E3M2FN>, vector<4xf8E8M0FNU> -> vector<16xf32> + return %ret0, %ret1, %ret2: vector<16xf16>, vector<16xbf16>, vector<16xf32> +} + +// ----- + +func.func @amdgpu.scaled_ext_packed816_invalid_block_size_and_first_scale_byte_16(%v: vector<8xf4E2M1FN>, %scale: vector<4xf8E8M0FNU>) { + // expected-error@+1 {{'amdgpu.scaled_ext_packed816' op blockSize of 16 can only have firstScaleByte be 0 or 1 for f4 and f6}} + %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(16) firstScaleLane(0) firstScaleByte(2) : vector<8xf4E2M1FN>, vector<4xf8E8M0FNU> -> vector<8xf16> + func.return +} + +// ----- + +func.func @amdgpu.scaled_ext_packed816_invalid_block_size_and_first_scale_byte_32(%v: vector<8xf4E2M1FN>, %scale: vector<4xf8E8M0FNU>) { + // expected-error@+1 {{'amdgpu.scaled_ext_packed816' op blockSize of 32 can only have firstScaleByte be 0 or 2 for f4 and f6.}} + %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(1) : vector<8xf4E2M1FN>, vector<4xf8E8M0FNU> -> vector<8xf16> + func.return +} + +// ----- + +func.func @amdgpu.scaled_ext_packed816_invalid_attributes_for_f8(%v: vector<8xf8E5M2>, %scale: vector<4xf8E8M0FNU>) { + // expected-error@+1 {{'amdgpu.scaled_ext_packed816' op blockSize of 16 can only have (firstScaleLane, firstScaleByte) be (0, 0) or (1, 2) for f8.}} + %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(16) firstScaleLane(0) firstScaleByte(1) : vector<8xf8E5M2>, vector<4xf8E8M0FNU> -> vector<8xf16> + func.return +} + +// ----- + +func.func @amdgpu.scaled_ext_packed816_invalid_input_output_sizes(%v: vector<8xf8E5M2>, %scale: vector<4xf8E8M0FNU>) { + // expected-error@+1 {{'amdgpu.scaled_ext_packed816' op failed to verify that all of {source, res} have same shape}} + %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(16) firstScaleLane(0) firstScaleByte(0) : vector<8xf8E5M2>, vector<4xf8E8M0FNU> -> vector<16xf16> + func.return +} + +// ----- + +func.func @amdgpu.scaled_ext_packed816_invalid_src_elem_type(%v: vector<16xf16>, %scale: vector<4xf8E8M0FNU>) -> (vector<16xf16>) { + // expected-error@+1 {{'amdgpu.scaled_ext_packed816' op operand #0 must be}} + %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<16xf16>, vector<4xf8E8M0FNU> -> vector<16xf16> + return %ret0: vector<16xf16> +} + +// ----- + +func.func @amdgpu.scaled_ext_packed816_invalid_dst_elem_type(%v: vector<16xf6E3M2FN>, %scale: vector<4xf8E8M0FNU>) -> (vector<16xf64>) { + // expected-error@+1 {{'amdgpu.scaled_ext_packed816' op result #0 must be vector}} + %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<16xf6E3M2FN>, vector<4xf8E8M0FNU> -> vector<16xf64> + return %ret0: vector<16xf64> +} diff --git a/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir b/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir index 5f1ec66234df2..6fdc1104d2609 100644 --- a/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir +++ b/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir @@ -738,6 +738,22 @@ func.func @ops_supporting_overflow(%arg0: i64, %arg1: i64) { // ----- +// CHECK-LABEL: @ops_supporting_exact +func.func @ops_supporting_exact(i32, i32) { +^bb0(%arg0: i32, %arg1: i32): +// CHECK: = llvm.ashr exact %arg0, %arg1 : i32 + %0 = arith.shrsi %arg0, %arg1 exact : i32 +// CHECK: = llvm.lshr exact %arg0, %arg1 : i32 + %1 = arith.shrui %arg0, %arg1 exact : i32 +// CHECK: = llvm.sdiv exact %arg0, %arg1 : i32 + %2 = arith.divsi %arg0, %arg1 exact : i32 +// CHECK: = llvm.udiv exact %arg0, %arg1 : i32 + %3 = arith.divui %arg0, %arg1 exact : i32 + return +} + +// ----- + // CHECK-LABEL: func @memref_bitcast // CHECK-SAME: (%[[ARG:.*]]: memref) // CHECK: %[[V1:.*]] = builtin.unrealized_conversion_cast %[[ARG]] : memref to !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> diff --git a/mlir/test/Dialect/AMDGPU/invalid.mlir b/mlir/test/Dialect/AMDGPU/invalid.mlir index 5c8cc8b67c4b3..61fdf29a78cbd 100644 --- a/mlir/test/Dialect/AMDGPU/invalid.mlir +++ b/mlir/test/Dialect/AMDGPU/invalid.mlir @@ -333,38 +333,6 @@ func.func @gather_to_lds_non_lds(%idx1 : index, %mem1 : memref<32xf16>, %mem2 : // ----- -func.func @amdgpu.scaled_ext_packed816_invalid_block_size_and_first_scale_byte_16(%v: vector<8xf4E2M1FN>, %scale: vector<4xf8E8M0FNU>) { - // expected-error@+1 {{'amdgpu.scaled_ext_packed816' op blockSize of 16 can only have firstScaleByte be 0 or 1 for f4 and f6}} - %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(16) firstScaleLane(0) firstScaleByte(2) : vector<8xf4E2M1FN>, vector<4xf8E8M0FNU> -> vector<8xf16> - func.return -} - -// ----- - -func.func @amdgpu.scaled_ext_packed816_invalid_block_size_and_first_scale_byte_32(%v: vector<8xf4E2M1FN>, %scale: vector<4xf8E8M0FNU>) { - // expected-error@+1 {{'amdgpu.scaled_ext_packed816' op blockSize of 32 can only have firstScaleByte be 0 or 2 for f4 and f6.}} - %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(1) : vector<8xf4E2M1FN>, vector<4xf8E8M0FNU> -> vector<8xf16> - func.return -} - -// ----- - -func.func @amdgpu.scaled_ext_packed816_invalid_attributes_for_f8(%v: vector<8xf8E5M2>, %scale: vector<4xf8E8M0FNU>) { - // expected-error@+1 {{'amdgpu.scaled_ext_packed816' op blockSize of 16 can only have firstScaleByte be 0 or 2 for f8.}} - %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(16) firstScaleLane(0) firstScaleByte(1) : vector<8xf8E5M2>, vector<4xf8E8M0FNU> -> vector<8xf16> - func.return -} - -// ----- - -func.func @amdgpu.scaled_ext_packed816_invalid_input_output_sizes(%v: vector<8xf8E5M2>, %scale: vector<4xf8E8M0FNU>) { - // expected-error@+1 {{'amdgpu.scaled_ext_packed816' op failed to verify that all of {source, res} have same shape}} - %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(16) firstScaleLane(0) firstScaleByte(0) : vector<8xf8E5M2>, vector<4xf8E8M0FNU> -> vector<16xf16> - func.return -} - -// ----- - func.func @scaled_mfma_invalid_m(%arg0 : vector<4xf8E8M0FNU>, %arg1 : vector<32xf4E2M1FN>, %arg2 : vector<16xf32>) -> vector<16xf32> { // expected-error@+1 {{'amdgpu.scaled_mfma' op attribute 'm' failed to satisfy constraint: 32-bit signless integer attribute whose value is one of {16, 32}}} %0 = amdgpu.scaled_mfma 8x32x64 (%arg0[0] * %arg1) * (%arg0[1] * %arg1) + %arg2 : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<16xf32> diff --git a/mlir/test/Dialect/Arith/canonicalize.mlir b/mlir/test/Dialect/Arith/canonicalize.mlir index 2fe0995c9d4df..3ad1530248809 100644 --- a/mlir/test/Dialect/Arith/canonicalize.mlir +++ b/mlir/test/Dialect/Arith/canonicalize.mlir @@ -2958,6 +2958,19 @@ func.func @truncIShrSIToTrunciShrUI(%a: i64) -> i32 { return %hi : i32 } +// CHECK-LABEL: @truncIShrSIExactToTrunciShrUIExact +// CHECK-SAME: (%[[A:.+]]: i64) +// CHECK-NEXT: %[[C32:.+]] = arith.constant 32 : i64 +// CHECK-NEXT: %[[SHR:.+]] = arith.shrui %[[A]], %[[C32]] exact : i64 +// CHECK-NEXT: %[[TRU:.+]] = arith.trunci %[[SHR]] : i64 to i32 +// CHECK-NEXT: return %[[TRU]] : i32 +func.func @truncIShrSIExactToTrunciShrUIExact(%a: i64) -> i32 { + %c32 = arith.constant 32: i64 + %sh = arith.shrsi %a, %c32 exact : i64 + %hi = arith.trunci %sh: i64 to i32 + return %hi : i32 +} + // CHECK-LABEL: @truncIShrSIToTrunciShrUIBadShiftAmt1 // CHECK: arith.shrsi func.func @truncIShrSIToTrunciShrUIBadShiftAmt1(%a: i64) -> i32 { diff --git a/mlir/test/Dialect/Arith/ops.mlir b/mlir/test/Dialect/Arith/ops.mlir index 1e656e84da836..58eadfda17060 100644 --- a/mlir/test/Dialect/Arith/ops.mlir +++ b/mlir/test/Dialect/Arith/ops.mlir @@ -151,6 +151,12 @@ func.func @test_divui(%arg0 : i64, %arg1 : i64) -> i64 { return %0 : i64 } +// CHECK-LABEL: test_divui_exact +func.func @test_divui_exact(%arg0 : i64, %arg1 : i64) -> i64 { + %0 = arith.divui %arg0, %arg1 exact : i64 + return %0 : i64 +} + // CHECK-LABEL: test_divui_tensor func.func @test_divui_tensor(%arg0 : tensor<8x8xi64>, %arg1 : tensor<8x8xi64>) -> tensor<8x8xi64> { %0 = arith.divui %arg0, %arg1 : tensor<8x8xi64> @@ -175,6 +181,12 @@ func.func @test_divsi(%arg0 : i64, %arg1 : i64) -> i64 { return %0 : i64 } +// CHECK-LABEL: test_divsi_exact +func.func @test_divsi_exact(%arg0 : i64, %arg1 : i64) -> i64 { + %0 = arith.divsi %arg0, %arg1 exact : i64 + return %0 : i64 +} + // CHECK-LABEL: test_divsi_tensor func.func @test_divsi_tensor(%arg0 : tensor<8x8xi64>, %arg1 : tensor<8x8xi64>) -> tensor<8x8xi64> { %0 = arith.divsi %arg0, %arg1 : tensor<8x8xi64> @@ -391,6 +403,12 @@ func.func @test_shrui(%arg0 : i64, %arg1 : i64) -> i64 { return %0 : i64 } +// CHECK-LABEL: test_shrui_exact +func.func @test_shrui_exact(%arg0 : i64, %arg1 : i64) -> i64 { + %0 = arith.shrui %arg0, %arg1 exact : i64 + return %0 : i64 +} + // CHECK-LABEL: test_shrui_tensor func.func @test_shrui_tensor(%arg0 : tensor<8x8xi64>, %arg1 : tensor<8x8xi64>) -> tensor<8x8xi64> { %0 = arith.shrui %arg0, %arg1 : tensor<8x8xi64> @@ -415,6 +433,12 @@ func.func @test_shrsi(%arg0 : i64, %arg1 : i64) -> i64 { return %0 : i64 } +// CHECK-LABEL: test_shrsi_exact +func.func @test_shrsi_exact(%arg0 : i64, %arg1 : i64) -> i64 { + %0 = arith.shrsi %arg0, %arg1 exact : i64 + return %0 : i64 +} + // CHECK-LABEL: test_shrsi_tensor func.func @test_shrsi_tensor(%arg0 : tensor<8x8xi64>, %arg1 : tensor<8x8xi64>) -> tensor<8x8xi64> { %0 = arith.shrsi %arg0, %arg1 : tensor<8x8xi64> diff --git a/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir b/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir index 4fbb566cfbe73..5dde84e8e0bc2 100644 --- a/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir +++ b/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir @@ -547,4 +547,21 @@ gpu.module @test_distribution { %broadcast = vector.broadcast %arg0 {layout_result_0 = #xegpu.layout} : index to vector<4x1x1xindex> gpu.return } + + // CHECK-LABEL: distribute_load_slice_attr + gpu.func @distribute_load_slice_attr() { + %2 = memref.alloca() {alignment = 1024} : memref<4096xf32> + %offset = arith.constant {layout_result_0 = #xegpu.layout } dense<0> : vector<256xindex> + %mask = arith.constant {layout_result_0 = #xegpu.layout } dense<1> : vector<256xi1> + + // CHECK: %[[LOAD:.*]] = xegpu.load {{.*}} <{chunk_size = 1 : i64, layout = #xegpu.slice<#xegpu.layout, dims = [0]>}> + // CHECK-SAME: {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [0]>} : + // CHECK-SAME: memref<4096xf32>, vector<32xindex>, vector<32xi1> -> vector<32xf32> + %3 = xegpu.load %2[%offset], %mask {chunk_size = 1, layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [0]> } : memref<4096xf32>, vector<256xindex>, vector<256xi1> -> vector<256xf32> + + // CHECK: %[[BROADCAST:.*]] = vector.broadcast %[[LOAD]] {layout_result_0 = #xegpu.layout} : vector<32xf32> to vector<32x32xf32> + %4 = vector.broadcast %3 {layout_result_0 = + #xegpu.layout} : vector<256xf32> to vector<256x256xf32> + gpu.return + } } diff --git a/mlir/test/Transforms/loop-invariant-code-motion.mlir b/mlir/test/Transforms/loop-invariant-code-motion.mlir index c1604e226a334..31a4f64dd7de0 100644 --- a/mlir/test/Transforms/loop-invariant-code-motion.mlir +++ b/mlir/test/Transforms/loop-invariant-code-motion.mlir @@ -880,6 +880,18 @@ func.func @no_speculate_divui( return } +func.func @no_speculate_udiv( +// CHECK-LABEL: @no_speculate_udiv( + %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { + scf.for %i = %lb to %ub step %step { +// CHECK: scf.for +// CHECK: llvm.udiv + %val = llvm.udiv %num, %denom : i32 + } + + return +} + func.func @no_speculate_divsi( // CHECK-LABEL: @no_speculate_divsi( %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { @@ -892,6 +904,18 @@ func.func @no_speculate_divsi( return } +func.func @no_speculate_sdiv( +// CHECK-LABEL: @no_speculate_sdiv( + %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { + scf.for %i = %lb to %ub step %step { +// CHECK: scf.for +// CHECK: llvm.sdiv + %val = llvm.sdiv %num, %denom : i32 + } + + return +} + func.func @no_speculate_ceildivui( // CHECK-LABEL: @no_speculate_ceildivui( %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { @@ -928,6 +952,18 @@ func.func @no_speculate_divui_const(%num: i32, %lb: index, %ub: index, %step: in return } +func.func @no_speculate_udiv_const(%num: i32, %lb: index, %ub: index, %step: index) { +// CHECK-LABEL: @no_speculate_udiv_const( + %c0 = arith.constant 0 : i32 + scf.for %i = %lb to %ub step %step { +// CHECK: scf.for +// CHECK: llvm.udiv + %val = llvm.udiv %num, %c0 : i32 + } + + return +} + func.func @speculate_divui_const( // CHECK-LABEL: @speculate_divui_const( %num: i32, %lb: index, %ub: index, %step: index) { @@ -941,6 +977,19 @@ func.func @speculate_divui_const( return } +func.func @speculate_udiv_const( +// CHECK-LABEL: @speculate_udiv_const( + %num: i32, %lb: index, %ub: index, %step: index) { + %c5 = llvm.mlir.constant(5 : i32) : i32 +// CHECK: llvm.udiv +// CHECK: scf.for + scf.for %i = %lb to %ub step %step { + %val = llvm.udiv %num, %c5 : i32 + } + + return +} + func.func @no_speculate_ceildivui_const(%num: i32, %lb: index, %ub: index, %step: index) { // CHECK-LABEL: @no_speculate_ceildivui_const( %c0 = arith.constant 0 : i32 @@ -979,6 +1028,19 @@ func.func @no_speculate_divsi_const0( return } +func.func @no_speculate_sdiv_const0( +// CHECK-LABEL: @no_speculate_sdiv_const0( + %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { + %c0 = arith.constant 0 : i32 + scf.for %i = %lb to %ub step %step { +// CHECK: scf.for +// CHECK: llvm.sdiv + %val = llvm.sdiv %num, %c0 : i32 + } + + return +} + func.func @no_speculate_divsi_const_minus1( // CHECK-LABEL: @no_speculate_divsi_const_minus1( %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { @@ -992,6 +1054,19 @@ func.func @no_speculate_divsi_const_minus1( return } +func.func @no_speculate_sdiv_const_minus1( +// CHECK-LABEL: @no_speculate_sdiv_const_minus1( + %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { + %cm1 = arith.constant -1 : i32 + scf.for %i = %lb to %ub step %step { +// CHECK: scf.for +// CHECK: llvm.sdiv + %val = llvm.sdiv %num, %cm1 : i32 + } + + return +} + func.func @speculate_divsi_const( // CHECK-LABEL: @speculate_divsi_const( %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { @@ -1005,6 +1080,19 @@ func.func @speculate_divsi_const( return } +func.func @speculate_sdiv_const( +// CHECK-LABEL: @speculate_sdiv_const( + %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { + %c5 = arith.constant 5 : i32 + scf.for %i = %lb to %ub step %step { +// CHECK: llvm.sdiv +// CHECK: scf.for + %val = llvm.sdiv %num, %c5 : i32 + } + + return +} + func.func @no_speculate_ceildivsi_const0( // CHECK-LABEL: @no_speculate_ceildivsi_const0( %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { @@ -1057,6 +1145,19 @@ func.func @no_speculate_divui_range( return } +func.func @no_speculate_udiv_range( +// CHECK-LABEL: @no_speculate_udiv_range( + %num: i8, %lb: index, %ub: index, %step: index) { + %denom = test.with_bounds {smax = 127 : i8, smin = -128 : i8, umax = 255 : i8, umin = 0 : i8} : i8 + scf.for %i = %lb to %ub step %step { +// CHECK: scf.for +// CHECK: llvm.udiv + %val = llvm.udiv %num, %denom : i8 + } + + return +} + func.func @no_speculate_divsi_range( // CHECK-LABEL: @no_speculate_divsi_range( %num: i8, %lb: index, %ub: index, %step: index) { @@ -1072,6 +1173,21 @@ func.func @no_speculate_divsi_range( return } +func.func @no_speculate_sdiv_range( +// CHECK-LABEL: @no_speculate_sdiv_range( + %num: i8, %lb: index, %ub: index, %step: index) { + %denom0 = test.with_bounds {smax = -1: i8, smin = -128 : i8, umax = 255 : i8, umin = 0 : i8} : i8 + %denom1 = test.with_bounds {smax = 127 : i8, smin = 0 : i8, umax = 255 : i8, umin = 0 : i8} : i8 + scf.for %i = %lb to %ub step %step { +// CHECK: scf.for +// CHECK-COUNT-2: llvm.sdiv + %val0 = llvm.sdiv %num, %denom0 : i8 + %val1 = llvm.sdiv %num, %denom1 : i8 + } + + return +} + func.func @no_speculate_ceildivui_range( // CHECK-LABEL: @no_speculate_ceildivui_range( %num: i8, %lb: index, %ub: index, %step: index) { @@ -1113,6 +1229,19 @@ func.func @speculate_divui_range( return } +func.func @speculate_udiv_range( +// CHECK-LABEL: @speculate_udiv_range( + %num: i8, %lb: index, %ub: index, %step: index) { + %denom = test.with_bounds {smax = 127 : i8, smin = -128 : i8, umax = 255 : i8, umin = 1 : i8} : i8 + scf.for %i = %lb to %ub step %step { +// CHECK: llvm.udiv +// CHECK: scf.for + %val = llvm.udiv %num, %denom : i8 + } + + return +} + func.func @speculate_divsi_range( // CHECK-LABEL: @speculate_divsi_range( %num: i8, %lb: index, %ub: index, %step: index) { @@ -1129,6 +1258,22 @@ func.func @speculate_divsi_range( return } +func.func @speculate_sdiv_range( +// CHECK-LABEL: @speculate_sdiv_range( + %num: i8, %lb: index, %ub: index, %step: index) { + %denom0 = test.with_bounds {smax = 127 : i8, smin = 1 : i8, umax = 255 : i8, umin = 0 : i8} : i8 + %denom1 = test.with_bounds {smax = -2 : i8, smin = -128 : i8, umax = 255 : i8, umin = 0 : i8} : i8 + scf.for %i = %lb to %ub step %step { +// CHECK-COUNT-2: llvm.sdiv +// CHECK: scf.for + %val0 = llvm.sdiv %num, %denom0 : i8 + %val1 = llvm.sdiv %num, %denom1 : i8 + + } + + return +} + func.func @speculate_ceildivui_range( // CHECK-LABEL: @speculate_ceildivui_range( %num: i8, %lb: index, %ub: index, %step: index) { diff --git a/mlir/unittests/Dialect/LLVMIR/CMakeLists.txt b/mlir/unittests/Dialect/LLVMIR/CMakeLists.txt index 7cc130d02ad74..568126fd342cc 100644 --- a/mlir/unittests/Dialect/LLVMIR/CMakeLists.txt +++ b/mlir/unittests/Dialect/LLVMIR/CMakeLists.txt @@ -4,4 +4,5 @@ add_mlir_unittest(MLIRLLVMIRTests mlir_target_link_libraries(MLIRLLVMIRTests PRIVATE MLIRLLVMDialect + MLIRInferIntRangeInterface ) diff --git a/openmp/tools/omptest/test/CMakeLists.txt b/openmp/tools/omptest/test/CMakeLists.txt index 1e07a1044f7d6..2b4aa78b0bc16 100644 --- a/openmp/tools/omptest/test/CMakeLists.txt +++ b/openmp/tools/omptest/test/CMakeLists.txt @@ -9,7 +9,7 @@ set(UNITTEST_SOURCES unittests/asserter-seq-test.cpp unittests/internal-event-eq-test.cpp unittests/internal-event-tostring-test.cpp - unittests/internal-util-test + unittests/internal-util-test.cpp unittests/main-test.cpp ) add_executable(omptest-unittests ${UNITTEST_SOURCES}) diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 788c6570081a2..a27abbd5b386a 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -1103,6 +1103,7 @@ libc_support_library( ":func_realloc", ":hdr_stdio_macros", ":hdr_stdio_overlay", + ":string_memory_utils", ":types_off_t", ], ) diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index 67c397e34b8c7..85c64ffd58ca6 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -2193,92 +2193,253 @@ llvm_target_lib_list = [lib for lib in [ { "name": "AArch64", "short_name": "AArch64", - "tbl_outs": { - "lib/Target/AArch64/AArch64GenRegisterBank.inc": ["-gen-register-bank"], - "lib/Target/AArch64/AArch64GenRegisterInfo.inc": ["-gen-register-info"], - "lib/Target/AArch64/AArch64GenInstrInfo.inc": ["-gen-instr-info"], - "lib/Target/AArch64/AArch64GenMCCodeEmitter.inc": ["-gen-emitter"], - "lib/Target/AArch64/AArch64GenMCPseudoLowering.inc": ["-gen-pseudo-lowering"], - "lib/Target/AArch64/AArch64GenAsmWriter.inc": ["-gen-asm-writer"], - "lib/Target/AArch64/AArch64GenAsmWriter1.inc": [ - "-gen-asm-writer", - "-asmwriternum=1", - ], - "lib/Target/AArch64/AArch64GenAsmMatcher.inc": ["-gen-asm-matcher"], - "lib/Target/AArch64/AArch64GenDAGISel.inc": ["-gen-dag-isel"], - "lib/Target/AArch64/AArch64GenFastISel.inc": ["-gen-fast-isel"], - "lib/Target/AArch64/AArch64GenGlobalISel.inc": ["-gen-global-isel"], - "lib/Target/AArch64/AArch64GenO0PreLegalizeGICombiner.inc": [ - "-gen-global-isel-combiner", - "-combiners=AArch64O0PreLegalizerCombiner", - ], - "lib/Target/AArch64/AArch64GenPreLegalizeGICombiner.inc": [ - "-gen-global-isel-combiner", - "-combiners=AArch64PreLegalizerCombiner", - ], - "lib/Target/AArch64/AArch64GenPostLegalizeGICombiner.inc": [ - "-gen-global-isel-combiner", - "-combiners=AArch64PostLegalizerCombiner", - ], - "lib/Target/AArch64/AArch64GenPostLegalizeGILowering.inc": [ - "-gen-global-isel-combiner", - "-combiners=AArch64PostLegalizerLowering", - ], - "lib/Target/AArch64/AArch64GenCallingConv.inc": ["-gen-callingconv"], - "lib/Target/AArch64/AArch64GenSDNodeInfo.inc": ["-gen-sd-node-info"], - "lib/Target/AArch64/AArch64GenSubtargetInfo.inc": ["-gen-subtarget"], - "lib/Target/AArch64/AArch64GenDisassemblerTables.inc": [ - "-gen-disassembler", - ], - "lib/Target/AArch64/AArch64GenSystemOperands.inc": ["-gen-searchable-tables"], - "lib/Target/AArch64/AArch64GenExegesis.inc": ["-gen-exegesis"], - }, + "tbl_outs": [ + ( + ["-gen-register-bank"], + "lib/Target/AArch64/AArch64GenRegisterBank.inc", + ), + ( + ["-gen-register-info"], + [ + "lib/Target/AArch64/AArch64GenRegisterInfo.inc", + "lib/Target/AArch64/AArch64GenRegisterInfoEnums.inc", + "lib/Target/AArch64/AArch64GenRegisterInfoMCDesc.inc", + "lib/Target/AArch64/AArch64GenRegisterInfoHeader.inc", + "lib/Target/AArch64/AArch64GenRegisterInfoTargetDesc.inc", + ], + ), + ( + ["-gen-instr-info"], + "lib/Target/AArch64/AArch64GenInstrInfo.inc", + ), + ( + ["-gen-emitter"], + "lib/Target/AArch64/AArch64GenMCCodeEmitter.inc", + ), + ( + ["-gen-pseudo-lowering"], + "lib/Target/AArch64/AArch64GenMCPseudoLowering.inc", + ), + ( + ["-gen-asm-writer"], + "lib/Target/AArch64/AArch64GenAsmWriter.inc", + ), + ( + [ + "-gen-asm-writer", + "-asmwriternum=1", + ], + "lib/Target/AArch64/AArch64GenAsmWriter1.inc", + ), + ( + ["-gen-asm-matcher"], + "lib/Target/AArch64/AArch64GenAsmMatcher.inc", + ), + ( + ["-gen-dag-isel"], + "lib/Target/AArch64/AArch64GenDAGISel.inc", + ), + ( + ["-gen-fast-isel"], + "lib/Target/AArch64/AArch64GenFastISel.inc", + ), + ( + ["-gen-global-isel"], + "lib/Target/AArch64/AArch64GenGlobalISel.inc", + ), + ( + [ + "-gen-global-isel-combiner", + "-combiners=AArch64O0PreLegalizerCombiner", + ], + "lib/Target/AArch64/AArch64GenO0PreLegalizeGICombiner.inc", + ), + ( + [ + "-gen-global-isel-combiner", + "-combiners=AArch64PreLegalizerCombiner", + ], + "lib/Target/AArch64/AArch64GenPreLegalizeGICombiner.inc", + ), + ( + [ + "-gen-global-isel-combiner", + "-combiners=AArch64PostLegalizerCombiner", + ], + "lib/Target/AArch64/AArch64GenPostLegalizeGICombiner.inc", + ), + ( + [ + "-gen-global-isel-combiner", + "-combiners=AArch64PostLegalizerLowering", + ], + "lib/Target/AArch64/AArch64GenPostLegalizeGILowering.inc", + ), + ( + ["-gen-callingconv"], + "lib/Target/AArch64/AArch64GenCallingConv.inc", + ), + ( + ["-gen-sd-node-info"], + "lib/Target/AArch64/AArch64GenSDNodeInfo.inc", + ), + ( + ["-gen-subtarget"], + "lib/Target/AArch64/AArch64GenSubtargetInfo.inc", + ), + ( + ["-gen-disassembler"], + "lib/Target/AArch64/AArch64GenDisassemblerTables.inc", + ), + ( + ["-gen-searchable-tables"], + "lib/Target/AArch64/AArch64GenSystemOperands.inc", + ), + ( + ["-gen-exegesis"], + "lib/Target/AArch64/AArch64GenExegesis.inc", + ), + ], }, { "name": "ARM", "short_name": "ARM", - "tbl_outs": { - "lib/Target/ARM/ARMGenRegisterBank.inc": ["-gen-register-bank"], - "lib/Target/ARM/ARMGenRegisterInfo.inc": ["-gen-register-info"], - "lib/Target/ARM/ARMGenSystemRegister.inc": ["-gen-searchable-tables"], - "lib/Target/ARM/ARMGenInstrInfo.inc": ["-gen-instr-info"], - "lib/Target/ARM/ARMGenMCCodeEmitter.inc": ["-gen-emitter"], - "lib/Target/ARM/ARMGenMCPseudoLowering.inc": ["-gen-pseudo-lowering"], - "lib/Target/ARM/ARMGenAsmWriter.inc": ["-gen-asm-writer"], - "lib/Target/ARM/ARMGenAsmMatcher.inc": ["-gen-asm-matcher"], - "lib/Target/ARM/ARMGenDAGISel.inc": ["-gen-dag-isel"], - "lib/Target/ARM/ARMGenFastISel.inc": ["-gen-fast-isel"], - "lib/Target/ARM/ARMGenGlobalISel.inc": ["-gen-global-isel"], - "lib/Target/ARM/ARMGenCallingConv.inc": ["-gen-callingconv"], - "lib/Target/ARM/ARMGenSubtargetInfo.inc": ["-gen-subtarget"], - "lib/Target/ARM/ARMGenDisassemblerTables.inc": [ - "-gen-disassembler", - "-ignore-non-decodable-operands", - ], - }, + "tbl_outs": [ + ( + ["-gen-register-bank"], + "lib/Target/ARM/ARMGenRegisterBank.inc", + ), + ( + ["-gen-register-info"], + [ + "lib/Target/ARM/ARMGenRegisterInfo.inc", + "lib/Target/ARM/ARMGenRegisterInfoEnums.inc", + "lib/Target/ARM/ARMGenRegisterInfoMCDesc.inc", + "lib/Target/ARM/ARMGenRegisterInfoHeader.inc", + "lib/Target/ARM/ARMGenRegisterInfoTargetDesc.inc", + ], + ), + ( + ["-gen-searchable-tables"], + "lib/Target/ARM/ARMGenSystemRegister.inc", + ), + ( + ["-gen-instr-info"], + "lib/Target/ARM/ARMGenInstrInfo.inc", + ), + ( + ["-gen-emitter"], + "lib/Target/ARM/ARMGenMCCodeEmitter.inc", + ), + ( + ["-gen-pseudo-lowering"], + "lib/Target/ARM/ARMGenMCPseudoLowering.inc", + ), + ( + ["-gen-asm-writer"], + "lib/Target/ARM/ARMGenAsmWriter.inc", + ), + ( + ["-gen-asm-matcher"], + "lib/Target/ARM/ARMGenAsmMatcher.inc", + ), + ( + ["-gen-dag-isel"], + "lib/Target/ARM/ARMGenDAGISel.inc", + ), + ( + ["-gen-fast-isel"], + "lib/Target/ARM/ARMGenFastISel.inc", + ), + ( + ["-gen-global-isel"], + "lib/Target/ARM/ARMGenGlobalISel.inc", + ), + ( + ["-gen-callingconv"], + "lib/Target/ARM/ARMGenCallingConv.inc", + ), + ( + ["-gen-subtarget"], + "lib/Target/ARM/ARMGenSubtargetInfo.inc", + ), + ( + [ + "-gen-disassembler", + "-ignore-non-decodable-operands", + ], + "lib/Target/ARM/ARMGenDisassemblerTables.inc", + ), + ], }, { "name": "AMDGPU", "short_name": "AMDGPU", - "tbl_outs": { - "lib/Target/AMDGPU/AMDGPUGenRegisterBank.inc": ["-gen-register-bank"], - "lib/Target/AMDGPU/AMDGPUGenRegisterInfo.inc": ["-gen-register-info"], - "lib/Target/AMDGPU/AMDGPUGenInstrInfo.inc": ["-gen-instr-info"], - "lib/Target/AMDGPU/AMDGPUGenMCCodeEmitter.inc": ["-gen-emitter"], - "lib/Target/AMDGPU/AMDGPUGenMCPseudoLowering.inc": ["-gen-pseudo-lowering"], - "lib/Target/AMDGPU/AMDGPUGenAsmWriter.inc": ["-gen-asm-writer"], - "lib/Target/AMDGPU/AMDGPUGenAsmMatcher.inc": ["-gen-asm-matcher"], - "lib/Target/AMDGPU/AMDGPUGenDAGISel.inc": ["-gen-dag-isel"], - "lib/Target/AMDGPU/AMDGPUGenCallingConv.inc": ["-gen-callingconv"], - "lib/Target/AMDGPU/AMDGPUGenSubtargetInfo.inc": ["-gen-subtarget"], - "lib/Target/AMDGPU/AMDGPUGenDisassemblerTables.inc": [ - "-gen-disassembler", - "--specialize-decoders-per-bitwidth", - "-ignore-non-decodable-operands", - "-ignore-fully-defined-operands", - ], - "lib/Target/AMDGPU/AMDGPUGenSearchableTables.inc": ["-gen-searchable-tables"], - }, + "tbl_outs": [ + ( + ["-gen-register-bank"], + "lib/Target/AMDGPU/AMDGPUGenRegisterBank.inc", + ), + ( + ["-gen-register-info"], + [ + "lib/Target/AMDGPU/AMDGPUGenRegisterInfo.inc", + "lib/Target/AMDGPU/AMDGPUGenRegisterInfoEnums.inc", + "lib/Target/AMDGPU/AMDGPUGenRegisterInfoMCDesc.inc", + "lib/Target/AMDGPU/AMDGPUGenRegisterInfoHeader.inc", + "lib/Target/AMDGPU/AMDGPUGenRegisterInfoTargetDesc.inc", + ], + ), + ( + ["-gen-instr-info"], + "lib/Target/AMDGPU/AMDGPUGenInstrInfo.inc", + ), + ( + ["-gen-emitter"], + "lib/Target/AMDGPU/AMDGPUGenMCCodeEmitter.inc", + ), + ( + ["-gen-pseudo-lowering"], + "lib/Target/AMDGPU/AMDGPUGenMCPseudoLowering.inc", + ), + ( + ["-gen-asm-writer"], + "lib/Target/AMDGPU/AMDGPUGenAsmWriter.inc", + ), + ( + ["-gen-asm-matcher"], + "lib/Target/AMDGPU/AMDGPUGenAsmMatcher.inc", + ), + ( + ["-gen-dag-isel"], + "lib/Target/AMDGPU/AMDGPUGenDAGISel.inc", + ), + ( + ["-gen-callingconv"], + "lib/Target/AMDGPU/AMDGPUGenCallingConv.inc", + ), + ( + ["-gen-subtarget"], + "lib/Target/AMDGPU/AMDGPUGenSubtargetInfo.inc", + ), + ( + [ + "-gen-disassembler", + "--specialize-decoders-per-bitwidth", + "-ignore-non-decodable-operands", + "-ignore-fully-defined-operands", + ], + "lib/Target/AMDGPU/AMDGPUGenDisassemblerTables.inc", + ), + ( + ["-gen-searchable-tables"], + "lib/Target/AMDGPU/AMDGPUGenSearchableTables.inc", + ), + ( + ["-gen-sd-node-info"], + "lib/Target/AMDGPU/AMDGPUGenSDNodeInfo.inc", + ), + ], "tbl_deps": [ ":InstCombineTableGen", ":amdgpu_isel_target_gen", @@ -2288,184 +2449,567 @@ llvm_target_lib_list = [lib for lib in [ { "name": "AVR", "short_name": "AVR", - "tbl_outs": { - "lib/Target/AVR/AVRGenAsmMatcher.inc": ["-gen-asm-matcher"], - "lib/Target/AVR/AVRGenAsmWriter.inc": ["-gen-asm-writer"], - "lib/Target/AVR/AVRGenCallingConv.inc": ["-gen-callingconv"], - "lib/Target/AVR/AVRGenDAGISel.inc": ["-gen-dag-isel"], - "lib/Target/AVR/AVRGenDisassemblerTables.inc": [ - "-gen-disassembler", - ], - "lib/Target/AVR/AVRGenMCCodeEmitter.inc": ["-gen-emitter"], - "lib/Target/AVR/AVRGenInstrInfo.inc": ["-gen-instr-info"], - "lib/Target/AVR/AVRGenRegisterInfo.inc": ["-gen-register-info"], - "lib/Target/AVR/AVRGenSDNodeInfo.inc": ["-gen-sd-node-info"], - "lib/Target/AVR/AVRGenSubtargetInfo.inc": ["-gen-subtarget"], - }, + "tbl_outs": [ + ( + ["-gen-asm-matcher"], + "lib/Target/AVR/AVRGenAsmMatcher.inc", + ), + ( + ["-gen-asm-writer"], + "lib/Target/AVR/AVRGenAsmWriter.inc", + ), + ( + ["-gen-callingconv"], + "lib/Target/AVR/AVRGenCallingConv.inc", + ), + ( + ["-gen-dag-isel"], + "lib/Target/AVR/AVRGenDAGISel.inc", + ), + ( + ["-gen-disassembler"], + "lib/Target/AVR/AVRGenDisassemblerTables.inc", + ), + ( + ["-gen-emitter"], + "lib/Target/AVR/AVRGenMCCodeEmitter.inc", + ), + ( + ["-gen-instr-info"], + "lib/Target/AVR/AVRGenInstrInfo.inc", + ), + ( + ["-gen-register-info"], + [ + "lib/Target/AVR/AVRGenRegisterInfo.inc", + "lib/Target/AVR/AVRGenRegisterInfoEnums.inc", + "lib/Target/AVR/AVRGenRegisterInfoMCDesc.inc", + "lib/Target/AVR/AVRGenRegisterInfoHeader.inc", + "lib/Target/AVR/AVRGenRegisterInfoTargetDesc.inc", + ], + ), + ( + ["-gen-sd-node-info"], + "lib/Target/AVR/AVRGenSDNodeInfo.inc", + ), + ( + ["-gen-subtarget"], + "lib/Target/AVR/AVRGenSubtargetInfo.inc", + ), + ], }, { "name": "BPF", "short_name": "BPF", - "tbl_outs": { - "lib/Target/BPF/BPFGenRegisterBank.inc": ["-gen-register-bank"], - "lib/Target/BPF/BPFGenAsmWriter.inc": ["-gen-asm-writer"], - "lib/Target/BPF/BPFGenAsmMatcher.inc": ["-gen-asm-matcher"], - "lib/Target/BPF/BPFGenCallingConv.inc": ["-gen-callingconv"], - "lib/Target/BPF/BPFGenDAGISel.inc": ["-gen-dag-isel"], - "lib/Target/BPF/BPFGenGlobalISel.inc": ["-gen-global-isel"], - "lib/Target/BPF/BPFGenDisassemblerTables.inc": ["-gen-disassembler"], - "lib/Target/BPF/BPFGenMCCodeEmitter.inc": ["-gen-emitter"], - "lib/Target/BPF/BPFGenInstrInfo.inc": ["-gen-instr-info"], - "lib/Target/BPF/BPFGenRegisterInfo.inc": ["-gen-register-info"], - "lib/Target/BPF/BPFGenSubtargetInfo.inc": ["-gen-subtarget"], - "lib/Target/BPF/BPFGenSDNodeInfo.inc": ["-gen-sd-node-info"], - }, + "tbl_outs": [ + ( + ["-gen-register-bank"], + "lib/Target/BPF/BPFGenRegisterBank.inc", + ), + ( + ["-gen-asm-writer"], + "lib/Target/BPF/BPFGenAsmWriter.inc", + ), + ( + ["-gen-asm-matcher"], + "lib/Target/BPF/BPFGenAsmMatcher.inc", + ), + ( + ["-gen-callingconv"], + "lib/Target/BPF/BPFGenCallingConv.inc", + ), + ( + ["-gen-dag-isel"], + "lib/Target/BPF/BPFGenDAGISel.inc", + ), + ( + ["-gen-global-isel"], + "lib/Target/BPF/BPFGenGlobalISel.inc", + ), + ( + ["-gen-disassembler"], + "lib/Target/BPF/BPFGenDisassemblerTables.inc", + ), + ( + ["-gen-emitter"], + "lib/Target/BPF/BPFGenMCCodeEmitter.inc", + ), + ( + ["-gen-instr-info"], + "lib/Target/BPF/BPFGenInstrInfo.inc", + ), + ( + ["-gen-register-info"], + [ + "lib/Target/BPF/BPFGenRegisterInfo.inc", + "lib/Target/BPF/BPFGenRegisterInfoEnums.inc", + "lib/Target/BPF/BPFGenRegisterInfoMCDesc.inc", + "lib/Target/BPF/BPFGenRegisterInfoHeader.inc", + "lib/Target/BPF/BPFGenRegisterInfoTargetDesc.inc", + ], + ), + ( + ["-gen-subtarget"], + "lib/Target/BPF/BPFGenSubtargetInfo.inc", + ), + ( + ["-gen-sd-node-info"], + "lib/Target/BPF/BPFGenSDNodeInfo.inc", + ), + ], }, { "name": "Hexagon", "short_name": "Hexagon", - "tbl_outs": { - "lib/Target/Hexagon/HexagonGenAsmMatcher.inc": ["-gen-asm-matcher"], - "lib/Target/Hexagon/HexagonGenAsmWriter.inc": ["-gen-asm-writer"], - "lib/Target/Hexagon/HexagonGenCallingConv.inc": ["-gen-callingconv"], - "lib/Target/Hexagon/HexagonGenDAGISel.inc": ["-gen-dag-isel"], - "lib/Target/Hexagon/HexagonGenDFAPacketizer.inc": ["-gen-dfa-packetizer"], - "lib/Target/Hexagon/HexagonGenDisassemblerTables.inc": [ - "-gen-disassembler", - ], - "lib/Target/Hexagon/HexagonGenInstrInfo.inc": ["-gen-instr-info"], - "lib/Target/Hexagon/HexagonGenMCCodeEmitter.inc": ["-gen-emitter"], - "lib/Target/Hexagon/HexagonGenRegisterInfo.inc": ["-gen-register-info"], - "lib/Target/Hexagon/HexagonGenSubtargetInfo.inc": ["-gen-subtarget"], - }, + "tbl_outs": [ + ( + ["-gen-asm-matcher"], + "lib/Target/Hexagon/HexagonGenAsmMatcher.inc", + ), + ( + ["-gen-asm-writer"], + "lib/Target/Hexagon/HexagonGenAsmWriter.inc", + ), + ( + ["-gen-callingconv"], + "lib/Target/Hexagon/HexagonGenCallingConv.inc", + ), + ( + ["-gen-dag-isel"], + "lib/Target/Hexagon/HexagonGenDAGISel.inc", + ), + ( + ["-gen-dfa-packetizer"], + "lib/Target/Hexagon/HexagonGenDFAPacketizer.inc", + ), + ( + ["-gen-disassembler"], + "lib/Target/Hexagon/HexagonGenDisassemblerTables.inc", + ), + ( + ["-gen-instr-info"], + "lib/Target/Hexagon/HexagonGenInstrInfo.inc", + ), + ( + ["-gen-emitter"], + "lib/Target/Hexagon/HexagonGenMCCodeEmitter.inc", + ), + ( + ["-gen-register-info"], + [ + "lib/Target/Hexagon/HexagonGenRegisterInfo.inc", + "lib/Target/Hexagon/HexagonGenRegisterInfoEnums.inc", + "lib/Target/Hexagon/HexagonGenRegisterInfoMCDesc.inc", + "lib/Target/Hexagon/HexagonGenRegisterInfoHeader.inc", + "lib/Target/Hexagon/HexagonGenRegisterInfoTargetDesc.inc", + ], + ), + ( + ["-gen-subtarget"], + "lib/Target/Hexagon/HexagonGenSubtargetInfo.inc", + ), + ], }, { "name": "Lanai", "short_name": "Lanai", - "tbl_outs": { - "lib/Target/Lanai/LanaiGenAsmMatcher.inc": ["-gen-asm-matcher"], - "lib/Target/Lanai/LanaiGenAsmWriter.inc": ["-gen-asm-writer"], - "lib/Target/Lanai/LanaiGenCallingConv.inc": ["-gen-callingconv"], - "lib/Target/Lanai/LanaiGenDAGISel.inc": ["-gen-dag-isel"], - "lib/Target/Lanai/LanaiGenDisassemblerTables.inc": ["-gen-disassembler"], - "lib/Target/Lanai/LanaiGenMCCodeEmitter.inc": ["-gen-emitter"], - "lib/Target/Lanai/LanaiGenInstrInfo.inc": ["-gen-instr-info"], - "lib/Target/Lanai/LanaiGenRegisterInfo.inc": ["-gen-register-info"], - "lib/Target/Lanai/LanaiGenSDNodeInfo.inc": ["-gen-sd-node-info"], - "lib/Target/Lanai/LanaiGenSubtargetInfo.inc": ["-gen-subtarget"], - }, + "tbl_outs": [ + ( + ["-gen-asm-matcher"], + "lib/Target/Lanai/LanaiGenAsmMatcher.inc", + ), + ( + ["-gen-asm-writer"], + "lib/Target/Lanai/LanaiGenAsmWriter.inc", + ), + ( + ["-gen-callingconv"], + "lib/Target/Lanai/LanaiGenCallingConv.inc", + ), + ( + ["-gen-dag-isel"], + "lib/Target/Lanai/LanaiGenDAGISel.inc", + ), + ( + ["-gen-disassembler"], + "lib/Target/Lanai/LanaiGenDisassemblerTables.inc", + ), + ( + ["-gen-emitter"], + "lib/Target/Lanai/LanaiGenMCCodeEmitter.inc", + ), + ( + ["-gen-instr-info"], + "lib/Target/Lanai/LanaiGenInstrInfo.inc", + ), + ( + ["-gen-register-info"], + [ + "lib/Target/Lanai/LanaiGenRegisterInfo.inc", + "lib/Target/Lanai/LanaiGenRegisterInfoEnums.inc", + "lib/Target/Lanai/LanaiGenRegisterInfoMCDesc.inc", + "lib/Target/Lanai/LanaiGenRegisterInfoHeader.inc", + "lib/Target/Lanai/LanaiGenRegisterInfoTargetDesc.inc", + ], + ), + ( + ["-gen-sd-node-info"], + "lib/Target/Lanai/LanaiGenSDNodeInfo.inc", + ), + ( + ["-gen-subtarget"], + "lib/Target/Lanai/LanaiGenSubtargetInfo.inc", + ), + ], }, { "name": "LoongArch", "short_name": "LoongArch", - "tbl_outs": { - "lib/Target/LoongArch/LoongArchGenAsmMatcher.inc": ["-gen-asm-matcher"], - "lib/Target/LoongArch/LoongArchGenAsmWriter.inc": ["-gen-asm-writer"], - "lib/Target/LoongArch/LoongArchGenDAGISel.inc": ["-gen-dag-isel"], - "lib/Target/LoongArch/LoongArchGenDisassemblerTables.inc": ["-gen-disassembler"], - "lib/Target/LoongArch/LoongArchGenMCCodeEmitter.inc": ["-gen-emitter"], - "lib/Target/LoongArch/LoongArchGenInstrInfo.inc": ["-gen-instr-info"], - "lib/Target/LoongArch/LoongArchGenMCPseudoLowering.inc": ["-gen-pseudo-lowering"], - "lib/Target/LoongArch/LoongArchGenRegisterInfo.inc": ["-gen-register-info"], - "lib/Target/LoongArch/LoongArchGenSubtargetInfo.inc": ["-gen-subtarget"], - }, + "tbl_outs": [ + ( + ["-gen-asm-matcher"], + "lib/Target/LoongArch/LoongArchGenAsmMatcher.inc", + ), + ( + ["-gen-asm-writer"], + "lib/Target/LoongArch/LoongArchGenAsmWriter.inc", + ), + ( + ["-gen-dag-isel"], + "lib/Target/LoongArch/LoongArchGenDAGISel.inc", + ), + ( + ["-gen-disassembler"], + "lib/Target/LoongArch/LoongArchGenDisassemblerTables.inc", + ), + ( + ["-gen-emitter"], + "lib/Target/LoongArch/LoongArchGenMCCodeEmitter.inc", + ), + ( + ["-gen-instr-info"], + "lib/Target/LoongArch/LoongArchGenInstrInfo.inc", + ), + ( + ["-gen-pseudo-lowering"], + "lib/Target/LoongArch/LoongArchGenMCPseudoLowering.inc", + ), + ( + ["-gen-register-info"], + [ + "lib/Target/LoongArch/LoongArchGenRegisterInfo.inc", + "lib/Target/LoongArch/LoongArchGenRegisterInfoEnums.inc", + "lib/Target/LoongArch/LoongArchGenRegisterInfoMCDesc.inc", + "lib/Target/LoongArch/LoongArchGenRegisterInfoHeader.inc", + "lib/Target/LoongArch/LoongArchGenRegisterInfoTargetDesc.inc", + ], + ), + ( + ["-gen-subtarget"], + "lib/Target/LoongArch/LoongArchGenSubtargetInfo.inc", + ), + ], }, { "name": "Mips", "short_name": "Mips", - "tbl_outs": { - "lib/Target/Mips/MipsGenAsmMatcher.inc": ["-gen-asm-matcher"], - "lib/Target/Mips/MipsGenAsmWriter.inc": ["-gen-asm-writer"], - "lib/Target/Mips/MipsGenCallingConv.inc": ["-gen-callingconv"], - "lib/Target/Mips/MipsGenDAGISel.inc": ["-gen-dag-isel"], - "lib/Target/Mips/MipsGenDisassemblerTables.inc": [ - "-gen-disassembler", - "-ignore-non-decodable-operands", - ], - "lib/Target/Mips/MipsGenMCCodeEmitter.inc": ["-gen-emitter"], - "lib/Target/Mips/MipsGenExegesis.inc": ["-gen-exegesis"], - "lib/Target/Mips/MipsGenFastISel.inc": ["-gen-fast-isel"], - "lib/Target/Mips/MipsGenGlobalISel.inc": ["-gen-global-isel"], - "lib/Target/Mips/MipsGenPostLegalizeGICombiner.inc": [ - "-gen-global-isel-combiner", - "-combiners=MipsPostLegalizerCombiner", - ], - "lib/Target/Mips/MipsGenInstrInfo.inc": ["-gen-instr-info"], - "lib/Target/Mips/MipsGenMCPseudoLowering.inc": ["-gen-pseudo-lowering"], - "lib/Target/Mips/MipsGenRegisterBank.inc": ["-gen-register-bank"], - "lib/Target/Mips/MipsGenRegisterInfo.inc": ["-gen-register-info"], - "lib/Target/Mips/MipsGenSubtargetInfo.inc": ["-gen-subtarget"], - }, + "tbl_outs": [ + ( + ["-gen-asm-matcher"], + "lib/Target/Mips/MipsGenAsmMatcher.inc", + ), + ( + ["-gen-asm-writer"], + "lib/Target/Mips/MipsGenAsmWriter.inc", + ), + ( + ["-gen-callingconv"], + "lib/Target/Mips/MipsGenCallingConv.inc", + ), + ( + ["-gen-dag-isel"], + "lib/Target/Mips/MipsGenDAGISel.inc", + ), + ( + [ + "-gen-disassembler", + "-ignore-non-decodable-operands", + ], + "lib/Target/Mips/MipsGenDisassemblerTables.inc", + ), + ( + ["-gen-emitter"], + "lib/Target/Mips/MipsGenMCCodeEmitter.inc", + ), + ( + ["-gen-exegesis"], + "lib/Target/Mips/MipsGenExegesis.inc", + ), + ( + ["-gen-fast-isel"], + "lib/Target/Mips/MipsGenFastISel.inc", + ), + ( + ["-gen-global-isel"], + "lib/Target/Mips/MipsGenGlobalISel.inc", + ), + ( + [ + "-gen-global-isel-combiner", + "-combiners=MipsPostLegalizerCombiner", + ], + "lib/Target/Mips/MipsGenPostLegalizeGICombiner.inc", + ), + ( + ["-gen-instr-info"], + "lib/Target/Mips/MipsGenInstrInfo.inc", + ), + ( + ["-gen-pseudo-lowering"], + "lib/Target/Mips/MipsGenMCPseudoLowering.inc", + ), + ( + ["-gen-register-bank"], + "lib/Target/Mips/MipsGenRegisterBank.inc", + ), + ( + ["-gen-register-info"], + [ + "lib/Target/Mips/MipsGenRegisterInfo.inc", + "lib/Target/Mips/MipsGenRegisterInfoEnums.inc", + "lib/Target/Mips/MipsGenRegisterInfoMCDesc.inc", + "lib/Target/Mips/MipsGenRegisterInfoHeader.inc", + "lib/Target/Mips/MipsGenRegisterInfoTargetDesc.inc", + ], + ), + ( + ["-gen-subtarget"], + "lib/Target/Mips/MipsGenSubtargetInfo.inc", + ), + ], }, { "name": "MSP430", "short_name": "MSP430", - "tbl_outs": { - "lib/Target/MSP430/MSP430GenAsmMatcher.inc": ["-gen-asm-matcher"], - "lib/Target/MSP430/MSP430GenAsmWriter.inc": ["-gen-asm-writer"], - "lib/Target/MSP430/MSP430GenCallingConv.inc": ["-gen-callingconv"], - "lib/Target/MSP430/MSP430GenDAGISel.inc": ["-gen-dag-isel"], - "lib/Target/MSP430/MSP430GenDisassemblerTables.inc": ["-gen-disassembler"], - "lib/Target/MSP430/MSP430GenInstrInfo.inc": ["-gen-instr-info"], - "lib/Target/MSP430/MSP430GenMCCodeEmitter.inc": ["-gen-emitter"], - "lib/Target/MSP430/MSP430GenRegisterInfo.inc": ["-gen-register-info"], - "lib/Target/MSP430/MSP430GenSDNodeInfo.inc": ["-gen-sd-node-info"], - "lib/Target/MSP430/MSP430GenSubtargetInfo.inc": ["-gen-subtarget"], - }, + "tbl_outs": [ + ( + ["-gen-asm-matcher"], + "lib/Target/MSP430/MSP430GenAsmMatcher.inc", + ), + ( + ["-gen-asm-writer"], + "lib/Target/MSP430/MSP430GenAsmWriter.inc", + ), + ( + ["-gen-callingconv"], + "lib/Target/MSP430/MSP430GenCallingConv.inc", + ), + ( + ["-gen-dag-isel"], + "lib/Target/MSP430/MSP430GenDAGISel.inc", + ), + ( + ["-gen-disassembler"], + "lib/Target/MSP430/MSP430GenDisassemblerTables.inc", + ), + ( + ["-gen-instr-info"], + "lib/Target/MSP430/MSP430GenInstrInfo.inc", + ), + ( + ["-gen-emitter"], + "lib/Target/MSP430/MSP430GenMCCodeEmitter.inc", + ), + ( + ["-gen-register-info"], + [ + "lib/Target/MSP430/MSP430GenRegisterInfo.inc", + "lib/Target/MSP430/MSP430GenRegisterInfoEnums.inc", + "lib/Target/MSP430/MSP430GenRegisterInfoMCDesc.inc", + "lib/Target/MSP430/MSP430GenRegisterInfoHeader.inc", + "lib/Target/MSP430/MSP430GenRegisterInfoTargetDesc.inc", + ], + ), + ( + ["-gen-sd-node-info"], + "lib/Target/MSP430/MSP430GenSDNodeInfo.inc", + ), + ( + ["-gen-subtarget"], + "lib/Target/MSP430/MSP430GenSubtargetInfo.inc", + ), + ], }, { "name": "NVPTX", "short_name": "NVPTX", - "tbl_outs": { - "lib/Target/NVPTX/NVPTXGenRegisterInfo.inc": ["-gen-register-info"], - "lib/Target/NVPTX/NVPTXGenInstrInfo.inc": ["-gen-instr-info"], - "lib/Target/NVPTX/NVPTXGenAsmWriter.inc": ["-gen-asm-writer"], - "lib/Target/NVPTX/NVPTXGenDAGISel.inc": ["-gen-dag-isel"], - "lib/Target/NVPTX/NVPTXGenSubtargetInfo.inc": ["-gen-subtarget"], - }, + "tbl_outs": [ + ( + ["-gen-register-info"], + [ + "lib/Target/NVPTX/NVPTXGenRegisterInfo.inc", + "lib/Target/NVPTX/NVPTXGenRegisterInfoEnums.inc", + "lib/Target/NVPTX/NVPTXGenRegisterInfoMCDesc.inc", + "lib/Target/NVPTX/NVPTXGenRegisterInfoHeader.inc", + "lib/Target/NVPTX/NVPTXGenRegisterInfoTargetDesc.inc", + ], + ), + ( + ["-gen-instr-info"], + "lib/Target/NVPTX/NVPTXGenInstrInfo.inc", + ), + ( + ["-gen-asm-writer"], + "lib/Target/NVPTX/NVPTXGenAsmWriter.inc", + ), + ( + ["-gen-dag-isel"], + "lib/Target/NVPTX/NVPTXGenDAGISel.inc", + ), + ( + ["-gen-subtarget"], + "lib/Target/NVPTX/NVPTXGenSubtargetInfo.inc", + ), + ], }, { "name": "PowerPC", "short_name": "PPC", - "tbl_outs": { - "lib/Target/PowerPC/PPCGenAsmWriter.inc": ["-gen-asm-writer"], - "lib/Target/PowerPC/PPCGenAsmMatcher.inc": ["-gen-asm-matcher"], - "lib/Target/PowerPC/PPCGenMCCodeEmitter.inc": ["-gen-emitter"], - "lib/Target/PowerPC/PPCGenRegisterInfo.inc": ["-gen-register-info"], - "lib/Target/PowerPC/PPCGenInstrInfo.inc": ["-gen-instr-info"], - "lib/Target/PowerPC/PPCGenDAGISel.inc": ["-gen-dag-isel"], - "lib/Target/PowerPC/PPCGenFastISel.inc": ["-gen-fast-isel"], - "lib/Target/PowerPC/PPCGenCallingConv.inc": ["-gen-callingconv"], - "lib/Target/PowerPC/PPCGenSubtargetInfo.inc": ["-gen-subtarget"], - "lib/Target/PowerPC/PPCGenDisassemblerTables.inc": ["-gen-disassembler"], - "lib/Target/PowerPC/PPCGenRegisterBank.inc": ["-gen-register-bank"], - "lib/Target/PowerPC/PPCGenGlobalISel.inc": ["-gen-global-isel"], - "lib/Target/PowerPC/PPCGenExegesis.inc": ["-gen-exegesis"], - }, + "tbl_outs": [ + ( + ["-gen-asm-writer"], + "lib/Target/PowerPC/PPCGenAsmWriter.inc", + ), + ( + ["-gen-asm-matcher"], + "lib/Target/PowerPC/PPCGenAsmMatcher.inc", + ), + ( + ["-gen-emitter"], + "lib/Target/PowerPC/PPCGenMCCodeEmitter.inc", + ), + ( + ["-gen-register-info"], + [ + "lib/Target/PowerPC/PPCGenRegisterInfo.inc", + "lib/Target/PowerPC/PPCGenRegisterInfoEnums.inc", + "lib/Target/PowerPC/PPCGenRegisterInfoMCDesc.inc", + "lib/Target/PowerPC/PPCGenRegisterInfoHeader.inc", + "lib/Target/PowerPC/PPCGenRegisterInfoTargetDesc.inc", + ], + ), + ( + ["-gen-instr-info"], + "lib/Target/PowerPC/PPCGenInstrInfo.inc", + ), + ( + ["-gen-dag-isel"], + "lib/Target/PowerPC/PPCGenDAGISel.inc", + ), + ( + ["-gen-fast-isel"], + "lib/Target/PowerPC/PPCGenFastISel.inc", + ), + ( + ["-gen-callingconv"], + "lib/Target/PowerPC/PPCGenCallingConv.inc", + ), + ( + ["-gen-subtarget"], + "lib/Target/PowerPC/PPCGenSubtargetInfo.inc", + ), + ( + ["-gen-disassembler"], + "lib/Target/PowerPC/PPCGenDisassemblerTables.inc", + ), + ( + ["-gen-register-bank"], + "lib/Target/PowerPC/PPCGenRegisterBank.inc", + ), + ( + ["-gen-global-isel"], + "lib/Target/PowerPC/PPCGenGlobalISel.inc", + ), + ( + ["-gen-exegesis"], + "lib/Target/PowerPC/PPCGenExegesis.inc", + ), + ], }, { "name": "RISCV", "short_name": "RISCV", - "tbl_outs": { - "lib/Target/RISCV/RISCVGenAsmMatcher.inc": ["-gen-asm-matcher"], - "lib/Target/RISCV/RISCVGenAsmWriter.inc": ["-gen-asm-writer"], - "lib/Target/RISCV/RISCVGenCompressInstEmitter.inc": ["-gen-compress-inst-emitter"], - "lib/Target/RISCV/RISCVGenDAGISel.inc": ["-gen-dag-isel"], - "lib/Target/RISCV/RISCVGenDisassemblerTables.inc": [ - "-gen-disassembler", - "--specialize-decoders-per-bitwidth", - ], - "lib/Target/RISCV/RISCVGenInstrInfo.inc": ["-gen-instr-info"], - "lib/Target/RISCV/RISCVGenMacroFusion.inc": ["-gen-macro-fusion-pred"], - "lib/Target/RISCV/RISCVGenMCCodeEmitter.inc": ["-gen-emitter"], - "lib/Target/RISCV/RISCVGenMCPseudoLowering.inc": ["-gen-pseudo-lowering"], - "lib/Target/RISCV/RISCVGenRegisterBank.inc": ["-gen-register-bank"], - "lib/Target/RISCV/RISCVGenRegisterInfo.inc": ["-gen-register-info"], - "lib/Target/RISCV/RISCVGenSubtargetInfo.inc": ["-gen-subtarget"], - "lib/Target/RISCV/RISCVGenSearchableTables.inc": ["-gen-searchable-tables"], - "lib/Target/RISCV/RISCVGenExegesis.inc": ["-gen-exegesis"], - "lib/Target/RISCV/RISCVGenSDNodeInfo.inc": ["-gen-sd-node-info"], - }, + "tbl_outs": [ + ( + ["-gen-asm-matcher"], + "lib/Target/RISCV/RISCVGenAsmMatcher.inc", + ), + ( + ["-gen-asm-writer"], + "lib/Target/RISCV/RISCVGenAsmWriter.inc", + ), + ( + ["-gen-compress-inst-emitter"], + "lib/Target/RISCV/RISCVGenCompressInstEmitter.inc", + ), + ( + ["-gen-dag-isel"], + "lib/Target/RISCV/RISCVGenDAGISel.inc", + ), + ( + [ + "-gen-disassembler", + "--specialize-decoders-per-bitwidth", + ], + "lib/Target/RISCV/RISCVGenDisassemblerTables.inc", + ), + ( + ["-gen-instr-info"], + "lib/Target/RISCV/RISCVGenInstrInfo.inc", + ), + ( + ["-gen-macro-fusion-pred"], + "lib/Target/RISCV/RISCVGenMacroFusion.inc", + ), + ( + ["-gen-emitter"], + "lib/Target/RISCV/RISCVGenMCCodeEmitter.inc", + ), + ( + ["-gen-pseudo-lowering"], + "lib/Target/RISCV/RISCVGenMCPseudoLowering.inc", + ), + ( + ["-gen-register-bank"], + "lib/Target/RISCV/RISCVGenRegisterBank.inc", + ), + ( + ["-gen-register-info"], + [ + "lib/Target/RISCV/RISCVGenRegisterInfo.inc", + "lib/Target/RISCV/RISCVGenRegisterInfoEnums.inc", + "lib/Target/RISCV/RISCVGenRegisterInfoMCDesc.inc", + "lib/Target/RISCV/RISCVGenRegisterInfoHeader.inc", + "lib/Target/RISCV/RISCVGenRegisterInfoTargetDesc.inc", + ], + ), + ( + ["-gen-subtarget"], + "lib/Target/RISCV/RISCVGenSubtargetInfo.inc", + ), + ( + ["-gen-searchable-tables"], + "lib/Target/RISCV/RISCVGenSearchableTables.inc", + ), + ( + ["-gen-exegesis"], + "lib/Target/RISCV/RISCVGenExegesis.inc", + ), + ( + ["-gen-sd-node-info"], + "lib/Target/RISCV/RISCVGenSDNodeInfo.inc", + ), + ], "tbl_deps": [ ":riscv_isel_target_gen", ], @@ -2473,135 +3017,404 @@ llvm_target_lib_list = [lib for lib in [ { "name": "Sparc", "short_name": "Sparc", - "tbl_outs": { - "lib/Target/Sparc/SparcGenAsmWriter.inc": ["-gen-asm-writer"], - "lib/Target/Sparc/SparcGenAsmMatcher.inc": ["-gen-asm-matcher"], - "lib/Target/Sparc/SparcGenMCCodeEmitter.inc": ["-gen-emitter"], - "lib/Target/Sparc/SparcGenRegisterInfo.inc": ["-gen-register-info"], - "lib/Target/Sparc/SparcGenInstrInfo.inc": ["-gen-instr-info"], - "lib/Target/Sparc/SparcGenDAGISel.inc": ["-gen-dag-isel"], - "lib/Target/Sparc/SparcGenCallingConv.inc": ["-gen-callingconv"], - "lib/Target/Sparc/SparcGenSubtargetInfo.inc": ["-gen-subtarget"], - "lib/Target/Sparc/SparcGenDisassemblerTables.inc": ["-gen-disassembler"], - "lib/Target/Sparc/SparcGenSearchableTables.inc": ["-gen-searchable-tables"], - "lib/Target/Sparc/SparcGenSDNodeInfo.inc": [ - "-gen-sd-node-info", - "-sdnode-namespace=SPISD", - ], - }, + "tbl_outs": [ + ( + ["-gen-asm-writer"], + "lib/Target/Sparc/SparcGenAsmWriter.inc", + ), + ( + ["-gen-asm-matcher"], + "lib/Target/Sparc/SparcGenAsmMatcher.inc", + ), + ( + ["-gen-emitter"], + "lib/Target/Sparc/SparcGenMCCodeEmitter.inc", + ), + ( + ["-gen-register-info"], + [ + "lib/Target/Sparc/SparcGenRegisterInfo.inc", + "lib/Target/Sparc/SparcGenRegisterInfoEnums.inc", + "lib/Target/Sparc/SparcGenRegisterInfoMCDesc.inc", + "lib/Target/Sparc/SparcGenRegisterInfoHeader.inc", + "lib/Target/Sparc/SparcGenRegisterInfoTargetDesc.inc", + ], + ), + ( + ["-gen-instr-info"], + "lib/Target/Sparc/SparcGenInstrInfo.inc", + ), + ( + ["-gen-dag-isel"], + "lib/Target/Sparc/SparcGenDAGISel.inc", + ), + ( + ["-gen-callingconv"], + "lib/Target/Sparc/SparcGenCallingConv.inc", + ), + ( + ["-gen-subtarget"], + "lib/Target/Sparc/SparcGenSubtargetInfo.inc", + ), + ( + ["-gen-disassembler"], + "lib/Target/Sparc/SparcGenDisassemblerTables.inc", + ), + ( + ["-gen-searchable-tables"], + "lib/Target/Sparc/SparcGenSearchableTables.inc", + ), + ( + [ + "-gen-sd-node-info", + "-sdnode-namespace=SPISD", + ], + "lib/Target/Sparc/SparcGenSDNodeInfo.inc", + ), + ], }, { "name": "SPIRV", "short_name": "SPIRV", - "tbl_outs": { - "lib/Target/SPIRV/SPIRVGenAsmWriter.inc": ["-gen-asm-writer"], - "lib/Target/SPIRV/SPIRVGenMCCodeEmitter.inc": ["-gen-emitter"], - "lib/Target/SPIRV/SPIRVGenGlobalISel.inc": ["-gen-global-isel"], - "lib/Target/SPIRV/SPIRVGenPreLegalizeGICombiner.inc": [ - "-gen-global-isel-combiner", - "-combiners=SPIRVPreLegalizerCombiner", - ], - "lib/Target/SPIRV/SPIRVGenInstrInfo.inc": ["-gen-instr-info"], - "lib/Target/SPIRV/SPIRVGenRegisterBank.inc": ["-gen-register-bank"], - "lib/Target/SPIRV/SPIRVGenRegisterInfo.inc": ["-gen-register-info"], - "lib/Target/SPIRV/SPIRVGenTables.inc": ["-gen-searchable-tables"], - "lib/Target/SPIRV/SPIRVGenSubtargetInfo.inc": ["-gen-subtarget"], - }, + "tbl_outs": [ + ( + ["-gen-asm-writer"], + "lib/Target/SPIRV/SPIRVGenAsmWriter.inc", + ), + ( + ["-gen-emitter"], + "lib/Target/SPIRV/SPIRVGenMCCodeEmitter.inc", + ), + ( + ["-gen-global-isel"], + "lib/Target/SPIRV/SPIRVGenGlobalISel.inc", + ), + ( + [ + "-gen-global-isel-combiner", + "-combiners=SPIRVPreLegalizerCombiner", + ], + "lib/Target/SPIRV/SPIRVGenPreLegalizeGICombiner.inc", + ), + ( + ["-gen-instr-info"], + "lib/Target/SPIRV/SPIRVGenInstrInfo.inc", + ), + ( + ["-gen-register-bank"], + "lib/Target/SPIRV/SPIRVGenRegisterBank.inc", + ), + ( + ["-gen-register-info"], + [ + "lib/Target/SPIRV/SPIRVGenRegisterInfo.inc", + "lib/Target/SPIRV/SPIRVGenRegisterInfoEnums.inc", + "lib/Target/SPIRV/SPIRVGenRegisterInfoMCDesc.inc", + "lib/Target/SPIRV/SPIRVGenRegisterInfoHeader.inc", + "lib/Target/SPIRV/SPIRVGenRegisterInfoTargetDesc.inc", + ], + ), + ( + ["-gen-searchable-tables"], + "lib/Target/SPIRV/SPIRVGenTables.inc", + ), + ( + ["-gen-subtarget"], + "lib/Target/SPIRV/SPIRVGenSubtargetInfo.inc", + ), + ], }, { "name": "SystemZ", "short_name": "SystemZ", - "tbl_outs": { - "lib/Target/SystemZ/SystemZGenAsmMatcher.inc": ["-gen-asm-matcher"], - "lib/Target/SystemZ/SystemZGenGNUAsmWriter.inc": ["-gen-asm-writer"], - "lib/Target/SystemZ/SystemZGenHLASMAsmWriter.inc": [ - "-gen-asm-writer", - "-asmwriternum=1", - ], - "lib/Target/SystemZ/SystemZGenCallingConv.inc": ["-gen-callingconv"], - "lib/Target/SystemZ/SystemZGenDAGISel.inc": ["-gen-dag-isel"], - "lib/Target/SystemZ/SystemZGenDisassemblerTables.inc": ["-gen-disassembler"], - "lib/Target/SystemZ/SystemZGenMCCodeEmitter.inc": ["-gen-emitter"], - "lib/Target/SystemZ/SystemZGenInstrInfo.inc": ["-gen-instr-info"], - "lib/Target/SystemZ/SystemZGenRegisterInfo.inc": ["-gen-register-info"], - "lib/Target/SystemZ/SystemZGenSubtargetInfo.inc": ["-gen-subtarget"], - }, + "tbl_outs": [ + ( + ["-gen-asm-matcher"], + "lib/Target/SystemZ/SystemZGenAsmMatcher.inc", + ), + ( + ["-gen-asm-writer"], + "lib/Target/SystemZ/SystemZGenGNUAsmWriter.inc", + ), + ( + [ + "-gen-asm-writer", + "-asmwriternum=1", + ], + "lib/Target/SystemZ/SystemZGenHLASMAsmWriter.inc", + ), + ( + ["-gen-callingconv"], + "lib/Target/SystemZ/SystemZGenCallingConv.inc", + ), + ( + ["-gen-dag-isel"], + "lib/Target/SystemZ/SystemZGenDAGISel.inc", + ), + ( + ["-gen-disassembler"], + "lib/Target/SystemZ/SystemZGenDisassemblerTables.inc", + ), + ( + ["-gen-emitter"], + "lib/Target/SystemZ/SystemZGenMCCodeEmitter.inc", + ), + ( + ["-gen-instr-info"], + "lib/Target/SystemZ/SystemZGenInstrInfo.inc", + ), + ( + ["-gen-register-info"], + [ + "lib/Target/SystemZ/SystemZGenRegisterInfo.inc", + "lib/Target/SystemZ/SystemZGenRegisterInfoEnums.inc", + "lib/Target/SystemZ/SystemZGenRegisterInfoMCDesc.inc", + "lib/Target/SystemZ/SystemZGenRegisterInfoHeader.inc", + "lib/Target/SystemZ/SystemZGenRegisterInfoTargetDesc.inc", + ], + ), + ( + ["-gen-subtarget"], + "lib/Target/SystemZ/SystemZGenSubtargetInfo.inc", + ), + ( + ["-gen-sd-node-info"], + "lib/Target/SystemZ/SystemZGenSDNodeInfo.inc", + ), + ], }, { "name": "VE", "short_name": "VE", - "tbl_outs": { - "lib/Target/VE/VEGenAsmMatcher.inc": ["-gen-asm-matcher"], - "lib/Target/VE/VEGenAsmWriter.inc": ["-gen-asm-writer"], - "lib/Target/VE/VEGenCallingConv.inc": ["-gen-callingconv"], - "lib/Target/VE/VEGenDAGISel.inc": ["-gen-dag-isel"], - "lib/Target/VE/VEGenDisassemblerTables.inc": ["-gen-disassembler"], - "lib/Target/VE/VEGenMCCodeEmitter.inc": ["-gen-emitter"], - "lib/Target/VE/VEGenInstrInfo.inc": ["-gen-instr-info"], - "lib/Target/VE/VEGenRegisterInfo.inc": ["-gen-register-info"], - "lib/Target/VE/VEGenSubtargetInfo.inc": ["-gen-subtarget"], - }, + "tbl_outs": [ + ( + ["-gen-asm-matcher"], + "lib/Target/VE/VEGenAsmMatcher.inc", + ), + ( + ["-gen-asm-writer"], + "lib/Target/VE/VEGenAsmWriter.inc", + ), + ( + ["-gen-callingconv"], + "lib/Target/VE/VEGenCallingConv.inc", + ), + ( + ["-gen-dag-isel"], + "lib/Target/VE/VEGenDAGISel.inc", + ), + ( + ["-gen-disassembler"], + "lib/Target/VE/VEGenDisassemblerTables.inc", + ), + ( + ["-gen-emitter"], + "lib/Target/VE/VEGenMCCodeEmitter.inc", + ), + ( + ["-gen-instr-info"], + "lib/Target/VE/VEGenInstrInfo.inc", + ), + ( + ["-gen-register-info"], + [ + "lib/Target/VE/VEGenRegisterInfo.inc", + "lib/Target/VE/VEGenRegisterInfoEnums.inc", + "lib/Target/VE/VEGenRegisterInfoMCDesc.inc", + "lib/Target/VE/VEGenRegisterInfoHeader.inc", + "lib/Target/VE/VEGenRegisterInfoTargetDesc.inc", + ], + ), + ( + ["-gen-subtarget"], + "lib/Target/VE/VEGenSubtargetInfo.inc", + ), + ( + ["-gen-sd-node-info"], + "lib/Target/VE/VEGenSDNodeInfo.inc", + ), + ], }, { "name": "WebAssembly", "short_name": "WebAssembly", - "tbl_outs": { - "lib/Target/WebAssembly/WebAssemblyGenDisassemblerTables.inc": ["-gen-disassembler"], - "lib/Target/WebAssembly/WebAssemblyGenAsmWriter.inc": ["-gen-asm-writer"], - "lib/Target/WebAssembly/WebAssemblyGenInstrInfo.inc": ["-gen-instr-info"], - "lib/Target/WebAssembly/WebAssemblyGenDAGISel.inc": ["-gen-dag-isel"], - "lib/Target/WebAssembly/WebAssemblyGenFastISel.inc": ["-gen-fast-isel"], - "lib/Target/WebAssembly/WebAssemblyGenMCCodeEmitter.inc": ["-gen-emitter"], - "lib/Target/WebAssembly/WebAssemblyGenRegisterInfo.inc": ["-gen-register-info"], - "lib/Target/WebAssembly/WebAssemblyGenSubtargetInfo.inc": ["-gen-subtarget"], - "lib/Target/WebAssembly/WebAssemblyGenAsmMatcher.inc": ["-gen-asm-matcher"], - "lib/Target/WebAssembly/WebAssemblyGenSDNodeInfo.inc": ["-gen-sd-node-info"], - }, + "tbl_outs": [ + ( + ["-gen-disassembler"], + "lib/Target/WebAssembly/WebAssemblyGenDisassemblerTables.inc", + ), + ( + ["-gen-asm-writer"], + "lib/Target/WebAssembly/WebAssemblyGenAsmWriter.inc", + ), + ( + ["-gen-instr-info"], + "lib/Target/WebAssembly/WebAssemblyGenInstrInfo.inc", + ), + ( + ["-gen-dag-isel"], + "lib/Target/WebAssembly/WebAssemblyGenDAGISel.inc", + ), + ( + ["-gen-fast-isel"], + "lib/Target/WebAssembly/WebAssemblyGenFastISel.inc", + ), + ( + ["-gen-emitter"], + "lib/Target/WebAssembly/WebAssemblyGenMCCodeEmitter.inc", + ), + ( + ["-gen-register-info"], + [ + "lib/Target/WebAssembly/WebAssemblyGenRegisterInfo.inc", + "lib/Target/WebAssembly/WebAssemblyGenRegisterInfoEnums.inc", + "lib/Target/WebAssembly/WebAssemblyGenRegisterInfoMCDesc.inc", + "lib/Target/WebAssembly/WebAssemblyGenRegisterInfoHeader.inc", + "lib/Target/WebAssembly/WebAssemblyGenRegisterInfoTargetDesc.inc", + ], + ), + ( + ["-gen-subtarget"], + "lib/Target/WebAssembly/WebAssemblyGenSubtargetInfo.inc", + ), + ( + ["-gen-asm-matcher"], + "lib/Target/WebAssembly/WebAssemblyGenAsmMatcher.inc", + ), + ( + ["-gen-sd-node-info"], + "lib/Target/WebAssembly/WebAssemblyGenSDNodeInfo.inc", + ), + ], }, { "name": "X86", "short_name": "X86", - "tbl_outs": { - "lib/Target/X86/X86GenRegisterBank.inc": ["-gen-register-bank"], - "lib/Target/X86/X86GenRegisterInfo.inc": ["-gen-register-info"], - "lib/Target/X86/X86GenDisassemblerTables.inc": ["-gen-disassembler"], - "lib/Target/X86/X86GenInstrInfo.inc": ["-gen-instr-info"], - "lib/Target/X86/X86GenAsmWriter.inc": ["-gen-asm-writer"], - "lib/Target/X86/X86GenAsmWriter1.inc": [ - "-gen-asm-writer", - "-asmwriternum=1", - ], - "lib/Target/X86/X86GenAsmMatcher.inc": ["-gen-asm-matcher"], - "lib/Target/X86/X86GenDAGISel.inc": ["-gen-dag-isel"], - "lib/Target/X86/X86GenFastISel.inc": ["-gen-fast-isel"], - "lib/Target/X86/X86GenGlobalISel.inc": ["-gen-global-isel"], - "lib/Target/X86/X86GenCallingConv.inc": ["-gen-callingconv"], - "lib/Target/X86/X86GenSubtargetInfo.inc": ["-gen-subtarget"], - "lib/Target/X86/X86GenFoldTables.inc": [ - "-gen-x86-fold-tables", - "-asmwriternum=1", - ], - "lib/Target/X86/X86GenInstrMapping.inc": ["-gen-x86-instr-mapping"], - "lib/Target/X86/X86GenExegesis.inc": ["-gen-exegesis"], - "lib/Target/X86/X86GenMnemonicTables.inc": [ - "-gen-x86-mnemonic-tables", - "-asmwriternum=1", - ], - }, + "tbl_outs": [ + ( + ["-gen-register-bank"], + "lib/Target/X86/X86GenRegisterBank.inc", + ), + ( + ["-gen-register-info"], + [ + "lib/Target/X86/X86GenRegisterInfo.inc", + "lib/Target/X86/X86GenRegisterInfoEnums.inc", + "lib/Target/X86/X86GenRegisterInfoMCDesc.inc", + "lib/Target/X86/X86GenRegisterInfoHeader.inc", + "lib/Target/X86/X86GenRegisterInfoTargetDesc.inc", + ], + ), + ( + ["-gen-disassembler"], + "lib/Target/X86/X86GenDisassemblerTables.inc", + ), + ( + ["-gen-instr-info"], + "lib/Target/X86/X86GenInstrInfo.inc", + ), + ( + ["-gen-asm-writer"], + "lib/Target/X86/X86GenAsmWriter.inc", + ), + ( + [ + "-gen-asm-writer", + "-asmwriternum=1", + ], + "lib/Target/X86/X86GenAsmWriter1.inc", + ), + ( + ["-gen-asm-matcher"], + "lib/Target/X86/X86GenAsmMatcher.inc", + ), + ( + ["-gen-dag-isel"], + "lib/Target/X86/X86GenDAGISel.inc", + ), + ( + ["-gen-fast-isel"], + "lib/Target/X86/X86GenFastISel.inc", + ), + ( + ["-gen-global-isel"], + "lib/Target/X86/X86GenGlobalISel.inc", + ), + ( + ["-gen-callingconv"], + "lib/Target/X86/X86GenCallingConv.inc", + ), + ( + ["-gen-subtarget"], + "lib/Target/X86/X86GenSubtargetInfo.inc", + ), + ( + [ + "-gen-x86-fold-tables", + "-asmwriternum=1", + ], + "lib/Target/X86/X86GenFoldTables.inc", + ), + ( + ["-gen-x86-instr-mapping"], + "lib/Target/X86/X86GenInstrMapping.inc", + ), + ( + ["-gen-exegesis"], + "lib/Target/X86/X86GenExegesis.inc", + ), + ( + [ + "-gen-x86-mnemonic-tables", + "-asmwriternum=1", + ], + "lib/Target/X86/X86GenMnemonicTables.inc", + ), + ], }, { "name": "XCore", "short_name": "XCore", - "tbl_outs": { - "lib/Target/XCore/XCoreGenAsmWriter.inc": ["-gen-asm-writer"], - "lib/Target/XCore/XCoreGenCallingConv.inc": ["-gen-callingconv"], - "lib/Target/XCore/XCoreGenDAGISel.inc": ["-gen-dag-isel"], - "lib/Target/XCore/XCoreGenDisassemblerTables.inc": ["-gen-disassembler"], - "lib/Target/XCore/XCoreGenInstrInfo.inc": ["-gen-instr-info"], - "lib/Target/XCore/XCoreGenRegisterInfo.inc": ["-gen-register-info"], - "lib/Target/XCore/XCoreGenSDNodeInfo.inc": ["-gen-sd-node-info"], - "lib/Target/XCore/XCoreGenSubtargetInfo.inc": ["-gen-subtarget"], - }, + "tbl_outs": [ + ( + ["-gen-asm-writer"], + "lib/Target/XCore/XCoreGenAsmWriter.inc", + ), + ( + ["-gen-callingconv"], + "lib/Target/XCore/XCoreGenCallingConv.inc", + ), + ( + ["-gen-dag-isel"], + "lib/Target/XCore/XCoreGenDAGISel.inc", + ), + ( + ["-gen-disassembler"], + "lib/Target/XCore/XCoreGenDisassemblerTables.inc", + ), + ( + ["-gen-instr-info"], + "lib/Target/XCore/XCoreGenInstrInfo.inc", + ), + ( + ["-gen-register-info"], + [ + "lib/Target/XCore/XCoreGenRegisterInfo.inc", + "lib/Target/XCore/XCoreGenRegisterInfoEnums.inc", + "lib/Target/XCore/XCoreGenRegisterInfoMCDesc.inc", + "lib/Target/XCore/XCoreGenRegisterInfoHeader.inc", + "lib/Target/XCore/XCoreGenRegisterInfoTargetDesc.inc", + ], + ), + ( + ["-gen-sd-node-info"], + "lib/Target/XCore/XCoreGenSDNodeInfo.inc", + ), + ( + ["-gen-subtarget"], + "lib/Target/XCore/XCoreGenSubtargetInfo.inc", + ), + ], }, ] if lib["name"] in llvm_targets] @@ -2639,16 +3452,46 @@ gentbl_cc_library( gentbl_cc_library( name = "r600_target_gen", strip_include_prefix = "lib/Target/AMDGPU", - tbl_outs = { - "lib/Target/AMDGPU/R600GenAsmWriter.inc": ["-gen-asm-writer"], - "lib/Target/AMDGPU/R600GenCallingConv.inc": ["-gen-callingconv"], - "lib/Target/AMDGPU/R600GenDAGISel.inc": ["-gen-dag-isel"], - "lib/Target/AMDGPU/R600GenDFAPacketizer.inc": ["-gen-dfa-packetizer"], - "lib/Target/AMDGPU/R600GenInstrInfo.inc": ["-gen-instr-info"], - "lib/Target/AMDGPU/R600GenMCCodeEmitter.inc": ["-gen-emitter"], - "lib/Target/AMDGPU/R600GenRegisterInfo.inc": ["-gen-register-info"], - "lib/Target/AMDGPU/R600GenSubtargetInfo.inc": ["-gen-subtarget"], - }, + tbl_outs = [ + ( + ["-gen-asm-writer"], + "lib/Target/AMDGPU/R600GenAsmWriter.inc", + ), + ( + ["-gen-callingconv"], + "lib/Target/AMDGPU/R600GenCallingConv.inc", + ), + ( + ["-gen-dag-isel"], + "lib/Target/AMDGPU/R600GenDAGISel.inc", + ), + ( + ["-gen-dfa-packetizer"], + "lib/Target/AMDGPU/R600GenDFAPacketizer.inc", + ), + ( + ["-gen-instr-info"], + "lib/Target/AMDGPU/R600GenInstrInfo.inc", + ), + ( + ["-gen-emitter"], + "lib/Target/AMDGPU/R600GenMCCodeEmitter.inc", + ), + ( + ["-gen-register-info"], + [ + "lib/Target/AMDGPU/R600GenRegisterInfo.inc", + "lib/Target/AMDGPU/R600GenRegisterInfoEnums.inc", + "lib/Target/AMDGPU/R600GenRegisterInfoMCDesc.inc", + "lib/Target/AMDGPU/R600GenRegisterInfoHeader.inc", + "lib/Target/AMDGPU/R600GenRegisterInfoTargetDesc.inc", + ], + ), + ( + ["-gen-subtarget"], + "lib/Target/AMDGPU/R600GenSubtargetInfo.inc", + ), + ], tblgen = ":llvm-tblgen", td_file = "lib/Target/AMDGPU/R600.td", deps = [ @@ -3381,7 +4224,10 @@ cc_library( gentbl_cc_library( name = "LibOptionsTableGen", strip_include_prefix = "lib/ToolDrivers/llvm-lib", - tbl_outs = {"lib/ToolDrivers/llvm-lib/Options.inc": ["-gen-opt-parser-defs"]}, + tbl_outs = [( + ["-gen-opt-parser-defs"], + "lib/ToolDrivers/llvm-lib/Options.inc", + )], tblgen = ":llvm-tblgen", td_file = "lib/ToolDrivers/llvm-lib/Options.td", deps = [":OptParserTdFiles"], diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 153c7eeedd0ab..1421ec553f251 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -10318,6 +10318,8 @@ cc_library( ), includes = ["include"], deps = [ + ":FunctionInterfaces", + ":IR", ":OpenACCDialect", ":OpenACCOpsIncGen", ":OpenACCPassIncGen", @@ -10325,6 +10327,7 @@ cc_library( ":Support", ":ViewLikeInterface", "//llvm:Support", + "//llvm:ir_headers", ], ) @@ -13105,6 +13108,7 @@ cc_library( ":RuntimeVerifiableOpInterface", ":ShapedOpInterfaces", ":SideEffectInterfaces", + ":UBDialect", ":ValueBoundsOpInterface", ":ViewLikeInterface", "//llvm:Support", diff --git a/utils/bazel/llvm-project-overlay/mlir/tblgen.bzl b/utils/bazel/llvm-project-overlay/mlir/tblgen.bzl index 35888aac37e17..d28a8854fa896 100644 --- a/utils/bazel/llvm-project-overlay/mlir/tblgen.bzl +++ b/utils/bazel/llvm-project-overlay/mlir/tblgen.bzl @@ -153,7 +153,7 @@ def _gentbl_rule_impl(ctx): args.add("-o", ctx.outputs.out) ctx.actions.run( - outputs = [ctx.outputs.out], + outputs = [ctx.outputs.out] + ctx.outputs.additional_outputs, inputs = trans_srcs, executable = ctx.executable.tblgen, execution_requirements = {"supports-path-mapping": "1"}, @@ -195,6 +195,9 @@ gentbl_rule = rule( doc = "The output file for the TableGen invocation.", mandatory = True, ), + "additional_outputs": attr.output_list( + doc = "Extra output files from the TableGen invocation. The primary 'out' is used for the -o argument.", + ), "opts": attr.string_list( doc = "Additional command line options to add to the TableGen" + " invocation. For include arguments, prefer to use" + @@ -313,9 +316,12 @@ def gentbl_filegroup( name: The name of the generated filegroup rule for use in dependencies. tblgen: The binary used to produce the output. td_file: The primary table definitions file. - tbl_outs: Either a dict {out: [opts]} or a list of tuples ([opts], out), - where each 'opts' is a list of options passed to tblgen, each option - being a string, and 'out' is the corresponding output file produced. + tbl_outs: Either a dict {out: [opts]}, a list of tuples ([opts], out), + or a list of tuples ([opts], [outs]). Each 'opts' is a list of options + passed to tblgen, each option being a string, + and 'out' is the corresponding output file produced. If 'outs' are used, + the first path in the list is passed to '-o' but tblgen is expected + to produce all listed outputs. td_srcs: See gentbl_rule.td_srcs includes: See gentbl_rule.includes deps: See gentbl_rule.deps @@ -325,9 +331,14 @@ def gentbl_filegroup( **kwargs: Extra keyword arguments to pass to all generated rules. """ + included_srcs = [] if type(tbl_outs) == type({}): tbl_outs = [(v, k) for k, v in tbl_outs.items()] - for (opts, out) in tbl_outs: + for (opts, output_or_outputs) in tbl_outs: + outs = output_or_outputs if type(output_or_outputs) == type([]) else [output_or_outputs] + out = outs[0] + if not any([skip_opt in opts for skip_opt in skip_opts]): + included_srcs.extend(outs) first_opt = opts[0] if opts else "" rule_suffix = "_{}_{}".format( first_opt.replace("-", "_").replace("=", "_"), @@ -343,6 +354,7 @@ def gentbl_filegroup( deps = deps, includes = includes, out = out, + additional_outputs = outs[1:], **kwargs ) @@ -364,7 +376,6 @@ def gentbl_filegroup( **kwargs ) - included_srcs = [f for (opts, f) in tbl_outs if not any([skip_opt in opts for skip_opt in skip_opts])] native.filegroup( name = name, srcs = included_srcs,