diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index dd34e6ce788df..e974bca18452e 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -602,6 +602,8 @@ Improvements to Clang's diagnostics Moved the warning for a missing (though implied) attribute on a redeclaration into this group. Added a new warning in this group for the case where the attribute is missing/implicit on an override of a virtual method. +- Remove ``-Wperf-constraint-implies-noexcept`` from ``-Wall``. This warning is somewhat nit-picky and + attempts to resolve it, by adding ``noexcept``, can create new ways for programs to crash. (#GH167540) - Implemented diagnostics when retrieving the tuple size for types where its specialization of `std::tuple_size` produces an invalid size (either negative or greater than the implementation limit). (#GH159563) - Fixed fix-it hint for fold expressions. Clang now correctly places the suggested right diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 74df66a25e6f3..71f35b597d965 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1312,7 +1312,7 @@ def Consumed : DiagGroup<"consumed">; // DefaultIgnore in addition to putting it here. def All : DiagGroup<"all", [Most, Parentheses, Switch, SwitchBool, MisleadingIndentation, PackedNonPod, - VLACxxExtension, PerfConstraintImpliesNoexcept]>; + VLACxxExtension]>; // Warnings that should be in clang-cl /w4. def : DiagGroup<"CL4", [All, Extra]>; diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index c55fcabef0b3f..c67493a913d58 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -631,10 +631,49 @@ RValue CIRGenFunction::emitLoadOfLValue(LValue lv, SourceLocation loc) { lv.getVectorIdx())); } + if (lv.isExtVectorElt()) + return emitLoadOfExtVectorElementLValue(lv); + cgm.errorNYI(loc, "emitLoadOfLValue"); return RValue::get(nullptr); } +int64_t CIRGenFunction::getAccessedFieldNo(unsigned int idx, + const mlir::ArrayAttr elts) { + auto elt = mlir::cast(elts[idx]); + return elt.getInt(); +} + +// If this is a reference to a subset of the elements of a vector, create an +// appropriate shufflevector. +RValue CIRGenFunction::emitLoadOfExtVectorElementLValue(LValue lv) { + mlir::Location loc = lv.getExtVectorPointer().getLoc(); + mlir::Value vec = builder.createLoad(loc, lv.getExtVectorAddress()); + + // HLSL allows treating scalars as one-element vectors. Converting the scalar + // IR value to a vector here allows the rest of codegen to behave as normal. + if (getLangOpts().HLSL && !mlir::isa(vec.getType())) { + cgm.errorNYI(loc, "emitLoadOfExtVectorElementLValue: HLSL"); + return {}; + } + + const mlir::ArrayAttr elts = lv.getExtVectorElts(); + + // If the result of the expression is a non-vector type, we must be extracting + // a single element. Just codegen as an extractelement. + const auto *exprVecTy = lv.getType()->getAs(); + if (!exprVecTy) { + int64_t indexValue = getAccessedFieldNo(0, elts); + cir::ConstantOp index = + builder.getConstInt(loc, builder.getSInt64Ty(), indexValue); + return RValue::get(cir::VecExtractOp::create(builder, loc, vec, index)); + } + + cgm.errorNYI( + loc, "emitLoadOfExtVectorElementLValue: Result of expr is vector type"); + return {}; +} + static cir::FuncOp emitFunctionDeclPointer(CIRGenModule &cgm, GlobalDecl gd) { assert(!cir::MissingFeatures::weakRefReference()); return cgm.getAddrOfFunction(gd); @@ -1120,6 +1159,46 @@ CIRGenFunction::emitArraySubscriptExpr(const clang::ArraySubscriptExpr *e) { return lv; } +LValue CIRGenFunction::emitExtVectorElementExpr(const ExtVectorElementExpr *e) { + // Emit the base vector as an l-value. + LValue base; + + // ExtVectorElementExpr's base can either be a vector or pointer to vector. + if (e->isArrow()) { + cgm.errorNYI(e->getSourceRange(), + "emitExtVectorElementExpr: pointer to vector"); + return {}; + } else if (e->getBase()->isGLValue()) { + // Otherwise, if the base is an lvalue ( as in the case of foo.x.x), + // emit the base as an lvalue. + assert(e->getBase()->getType()->isVectorType()); + base = emitLValue(e->getBase()); + } else { + // Otherwise, the base is a normal rvalue (as in (V+V).x), emit it as such. + cgm.errorNYI(e->getSourceRange(), + "emitExtVectorElementExpr: base is a normal rvalue"); + return {}; + } + + QualType type = + e->getType().withCVRQualifiers(base.getQuals().getCVRQualifiers()); + + // Encode the element access list into a vector of unsigned indices. + SmallVector indices; + e->getEncodedElementAccess(indices); + + if (base.isSimple()) { + SmallVector attrElts(indices.begin(), indices.end()); + mlir::ArrayAttr elts = builder.getI64ArrayAttr(attrElts); + return LValue::makeExtVectorElt(base.getAddress(), elts, type, + base.getBaseInfo()); + } + + cgm.errorNYI(e->getSourceRange(), + "emitExtVectorElementExpr: isSimple is false"); + return {}; +} + LValue CIRGenFunction::emitStringLiteralLValue(const StringLiteral *e, llvm::StringRef name) { cir::GlobalOp globalOp = cgm.getGlobalForStringLiteral(e, name); diff --git a/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp b/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp index dcded94b012f4..872fc8d14ad95 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp @@ -839,6 +839,9 @@ void AggExprEmitter::visitCXXParenListOrInitListExpr( } } + // Prepare a 'this' for CXXDefaultInitExprs. + CIRGenFunction::FieldConstructionScope fcScope(cgf, dest.getAddress()); + LValue destLV = cgf.makeAddrLValue(dest.getAddress(), e->getType()); if (record->isUnion()) { diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index 1c4f51c11dc5e..5d9188777741d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -199,6 +199,10 @@ class ScalarExprEmitter : public StmtVisitor { return emitNullValue(e->getType(), cgf.getLoc(e->getSourceRange())); } + mlir::Value VisitGNUNullExpr(const GNUNullExpr *e) { + return emitNullValue(e->getType(), cgf.getLoc(e->getSourceRange())); + } + mlir::Value VisitOpaqueValueExpr(OpaqueValueExpr *e) { if (e->isGLValue()) return emitLoadOfLValue(cgf.getOrCreateOpaqueLValueMapping(e), @@ -279,6 +283,8 @@ class ScalarExprEmitter : public StmtVisitor { e->getSourceRange().getBegin()); } + mlir::Value VisitExtVectorElementExpr(Expr *e) { return emitLoadOfLValue(e); } + mlir::Value VisitMemberExpr(MemberExpr *e); mlir::Value VisitCompoundLiteralExpr(CompoundLiteralExpr *e) { diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp index cc75acc18c211..b73071af2a5d4 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp @@ -887,6 +887,8 @@ LValue CIRGenFunction::emitLValue(const Expr *e) { return emitConditionalOperatorLValue(cast(e)); case Expr::ArraySubscriptExprClass: return emitArraySubscriptExpr(cast(e)); + case Expr::ExtVectorElementExprClass: + return emitExtVectorElementExpr(cast(e)); case Expr::UnaryOperatorClass: return emitUnaryOpLValue(cast(e)); case Expr::StringLiteralClass: diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 4f5948b6e4467..3984f288b9bb0 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -1277,6 +1277,8 @@ class CIRGenFunction : public CIRGenTypeCache { QualType &baseType, Address &addr); LValue emitArraySubscriptExpr(const clang::ArraySubscriptExpr *e); + LValue emitExtVectorElementExpr(const ExtVectorElementExpr *e); + Address emitArrayToPointerDecay(const Expr *e, LValueBaseInfo *baseInfo = nullptr); @@ -1342,6 +1344,8 @@ class CIRGenFunction : public CIRGenTypeCache { mlir::Value emittedE, bool isDynamic); + int64_t getAccessedFieldNo(unsigned idx, mlir::ArrayAttr elts); + RValue emitCall(const CIRGenFunctionInfo &funcInfo, const CIRGenCallee &callee, ReturnValueSlot returnValue, const CallArgList &args, cir::CIRCallOpInterface *callOp, @@ -1637,6 +1641,8 @@ class CIRGenFunction : public CIRGenTypeCache { /// Load a complex number from the specified l-value. mlir::Value emitLoadOfComplex(LValue src, SourceLocation loc); + RValue emitLoadOfExtVectorElementLValue(LValue lv); + /// Given an expression that represents a value lvalue, this method emits /// the address of the lvalue, then loads the result as an rvalue, /// returning the rvalue. diff --git a/clang/lib/CIR/CodeGen/CIRGenValue.h b/clang/lib/CIR/CodeGen/CIRGenValue.h index ab245a771d72c..20a3d0ef61341 100644 --- a/clang/lib/CIR/CodeGen/CIRGenValue.h +++ b/clang/lib/CIR/CodeGen/CIRGenValue.h @@ -166,7 +166,8 @@ class LValue { // this is the alignment of the whole vector) unsigned alignment; mlir::Value v; - mlir::Value vectorIdx; // Index for vector subscript + mlir::Value vectorIdx; // Index for vector subscript + mlir::Attribute vectorElts; // ExtVector element subset: V.xyx mlir::Type elementType; LValueBaseInfo baseInfo; const CIRGenBitFieldInfo *bitFieldInfo{nullptr}; @@ -190,6 +191,7 @@ class LValue { bool isSimple() const { return lvType == Simple; } bool isVectorElt() const { return lvType == VectorElt; } bool isBitField() const { return lvType == BitField; } + bool isExtVectorElt() const { return lvType == ExtVectorElt; } bool isGlobalReg() const { return lvType == GlobalReg; } bool isVolatile() const { return quals.hasVolatile(); } @@ -254,6 +256,22 @@ class LValue { return vectorIdx; } + // extended vector elements. + Address getExtVectorAddress() const { + assert(isExtVectorElt()); + return Address(getExtVectorPointer(), elementType, getAlignment()); + } + + mlir::Value getExtVectorPointer() const { + assert(isExtVectorElt()); + return v; + } + + mlir::ArrayAttr getExtVectorElts() const { + assert(isExtVectorElt()); + return mlir::cast(vectorElts); + } + static LValue makeVectorElt(Address vecAddress, mlir::Value index, clang::QualType t, LValueBaseInfo baseInfo) { LValue r; @@ -265,6 +283,19 @@ class LValue { return r; } + static LValue makeExtVectorElt(Address vecAddress, mlir::ArrayAttr elts, + clang::QualType type, + LValueBaseInfo baseInfo) { + LValue r; + r.lvType = ExtVectorElt; + r.v = vecAddress.getPointer(); + r.elementType = vecAddress.getElementType(); + r.vectorElts = elts; + r.initialize(type, type.getQualifiers(), vecAddress.getAlignment(), + baseInfo); + return r; + } + // bitfield lvalue Address getBitFieldAddress() const { return Address(getBitFieldPointer(), elementType, getAlignment()); diff --git a/clang/test/CIR/CodeGen/gnu-null.cpp b/clang/test/CIR/CodeGen/gnu-null.cpp new file mode 100644 index 0000000000000..d1d15f2007621 --- /dev/null +++ b/clang/test/CIR/CodeGen/gnu-null.cpp @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG + +void gnu_null_expr() { + long a = __null; + int *b = __null; +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !s64i, !cir.ptr, ["a", init] +// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["b", init] +// CIR: %[[CONST_0:.*]] = cir.const #cir.int<0> : !s64i +// CIR: cir.store {{.*}} %[[CONST_0]], %[[A_ADDR]] : !s64i, !cir.ptr +// CIR: %[[CONST_NULL:.*]] = cir.const #cir.ptr : !cir.ptr +// CIR: cir.store {{.*}} %[[CONST_NULL]], %[[B_ADDR]] : !cir.ptr, !cir.ptr> + +// LLVM: %[[A_ADDR:.*]] = alloca i64, i64 1, align 8 +// LLVM: %[[B_ADDR:.*]] = alloca ptr, i64 1, align 8 +// LLVM: store i64 0, ptr %[[A_ADDR]], align 8 +// LLVM: store ptr null, ptr %[[B_ADDR]], align 8 + +// OGCG: %[[A_ADDR:.*]] = alloca i64, align 8 +// OGCG: %[[B_ADDR:.*]] = alloca ptr, align 8 +// OGCG: store i64 0, ptr %[[A_ADDR]], align 8 +// OGCG: store ptr null, ptr %[[B_ADDR]], align 8 diff --git a/clang/test/CIR/CodeGen/struct-init.cpp b/clang/test/CIR/CodeGen/struct-init.cpp index 8f146684ffb10..f5c013a599a40 100644 --- a/clang/test/CIR/CodeGen/struct-init.cpp +++ b/clang/test/CIR/CodeGen/struct-init.cpp @@ -205,3 +205,26 @@ void init_expr(int a, int b, int c) { // OGCG: %[[C_PLUS_THREE:.*]] = add nsw i32 %[[C]], 3 // OGCG: store i32 %[[C_PLUS_THREE]], ptr %[[S_C]] // OGCG: ret void + +void cxx_default_init_with_struct_field() { + struct Parent { + int getA(); + int a = getA(); + }; + Parent p = Parent{}; +} + +// CIR: %[[P_ADDR:.*]] = cir.alloca !rec_Parent, !cir.ptr, ["p", init] +// CIR: %[[P_ELEM_0_PTR:.*]] = cir.get_member %[[P_ADDR]][0] {name = "a"} : !cir.ptr -> !cir.ptr +// CIR: %[[METHOD_CALL:.*]] = cir.call @_ZZ34cxx_default_init_with_struct_fieldvEN6Parent4getAEv(%[[P_ADDR]]) : (!cir.ptr) -> !s32i +// CIR: cir.store{{.*}} %[[METHOD_CALL]], %[[P_ELEM_0_PTR]] : !s32i, !cir.ptr + +// LLVM: %[[P_ADDR:.*]] = alloca %struct.Parent, i64 1, align 4 +// LLVM: %[[P_ELEM_0_PTR:.*]] = getelementptr %struct.Parent, ptr %[[P_ADDR]], i32 0, i32 0 +// LLVM: %[[METHOD_CALL:.*]] = call i32 @_ZZ34cxx_default_init_with_struct_fieldvEN6Parent4getAEv(ptr %[[P_ADDR]]) +// LLVM: store i32 %[[METHOD_CALL]], ptr %[[P_ELEM_0_PTR]], align 4 + +// OGCG: %[[P_ADDR:.*]] = alloca %struct.Parent, align 4 +// OGCG: %[[P_ELEM_0_PTR:.*]] = getelementptr inbounds nuw %struct.Parent, ptr %[[P_ADDR]], i32 0, i32 0 +// OGCG: %[[METHOD_CALL:.*]] = call noundef i32 @_ZZ34cxx_default_init_with_struct_fieldvEN6Parent4getAEv(ptr {{.*}} %[[P_ADDR]]) +// OGCG: store i32 %[[METHOD_CALL]], ptr %[[P_ELEM_0_PTR]], align 4 diff --git a/clang/test/CIR/CodeGen/vector-ext-element.cpp b/clang/test/CIR/CodeGen/vector-ext-element.cpp new file mode 100644 index 0000000000000..de9d53936d2eb --- /dev/null +++ b/clang/test/CIR/CodeGen/vector-ext-element.cpp @@ -0,0 +1,46 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG + +typedef int vi4 __attribute__((ext_vector_type(4))); + +void element_expr_from_gl() { + vi4 a; + int x = a.x; + int y = a.y; +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr>, ["a"] +// CIR: %[[X_ADDR:.*]] = cir.alloca !s32i, !cir.ptr, ["x", init] +// CIR: %[[Y_ADDR:.*]] = cir.alloca !s32i, !cir.ptr, ["y", init] +// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr>, !cir.vector<4 x !s32i> +// CIR: %[[CONST_0:.*]] = cir.const #cir.int<0> : !s64i +// CIR: %[[ELEM_0:.*]] = cir.vec.extract %[[TMP_A]][%[[CONST_0]] : !s64i] : !cir.vector<4 x !s32i> +// CIR: cir.store {{.*}} %[[ELEM_0]], %[[X_ADDR]] : !s32i, !cir.ptr +// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr>, !cir.vector<4 x !s32i> +// CIR: %[[CONST_1:.*]] = cir.const #cir.int<1> : !s64i +// CIR: %[[ELEM_1:.*]] = cir.vec.extract %[[TMP_A]][%[[CONST_1]] : !s64i] : !cir.vector<4 x !s32i> +// CIR: cir.store {{.*}} %[[ELEM_1]], %[[Y_ADDR]] : !s32i, !cir.ptr + +// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16 +// LLVM: %[[X_ADDR:.*]] = alloca i32, i64 1, align 4 +// LLVM: %[[Y_ADDR:.*]] = alloca i32, i64 1, align 4 +// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 +// LLVM: %[[ELEM_0:.*]] = extractelement <4 x i32> %4, i64 0 +// LLVM: store i32 %[[ELEM_0]], ptr %[[X_ADDR]], align 4 +// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 +// LLVM: %[[ELEM_1:.*]] = extractelement <4 x i32> %6, i64 1 +// LLVM: store i32 %[[ELEM_1]], ptr %[[Y_ADDR]], align 4 + +// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16 +// OGCG: %[[X_ADDR:.*]] = alloca i32, align 4 +// OGCG: %[[Y_ADDR:.*]] = alloca i32, align 4 +// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 +// OGCG: %[[ELEM_0:.*]] = extractelement <4 x i32> %[[TMP_A]], i64 0 +// OGCG: store i32 %[[ELEM_0]], ptr %[[X_ADDR]], align 4 +// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 +// OGCG: %[[ELEM_1:.*]] = extractelement <4 x i32> %[[TMP_A]], i64 1 +// OGCG: store i32 %[[ELEM_1]], ptr %[[Y_ADDR]], align 4 diff --git a/clang/test/Misc/warning-wall.c b/clang/test/Misc/warning-wall.c index 91de843f88c91..4909ab034ef30 100644 --- a/clang/test/Misc/warning-wall.c +++ b/clang/test/Misc/warning-wall.c @@ -108,6 +108,5 @@ CHECK-NEXT: -Wmisleading-indentation CHECK-NEXT: -Wpacked-non-pod CHECK-NEXT: -Wvla-cxx-extension CHECK-NEXT: -Wvla-extension-static-assert -CHECK-NEXT: -Wperf-constraint-implies-noexcept CHECK-NOT:-W diff --git a/flang/include/flang/Lower/OpenMP.h b/flang/include/flang/Lower/OpenMP.h index 962abd8952073..df01a7b82c66c 100644 --- a/flang/include/flang/Lower/OpenMP.h +++ b/flang/include/flang/Lower/OpenMP.h @@ -97,13 +97,6 @@ bool markOpenMPDeferredDeclareTargetFunctions( AbstractConverter &); void genOpenMPRequires(mlir::Operation *, const Fortran::semantics::Symbol *); -// Materialize omp.declare_mapper ops for mapper declarations found in -// imported modules. If \p scope is null, materialize for the whole -// semantics global scope; otherwise, operate recursively starting at \p scope. -void materializeOpenMPDeclareMappers( - Fortran::lower::AbstractConverter &, Fortran::semantics::SemanticsContext &, - const Fortran::semantics::Scope *scope = nullptr); - } // namespace lower } // namespace Fortran diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h index 95efe1ae2bd5e..cb27d544ed9f5 100644 --- a/flang/include/flang/Semantics/symbol.h +++ b/flang/include/flang/Semantics/symbol.h @@ -777,24 +777,6 @@ class UserReductionDetails { DeclVector declList_; }; -// Used for OpenMP DECLARE MAPPER, it holds the declaration constructs -// so they can be serialized into module files and later re-parsed when -// USE-associated. -class MapperDetails { -public: - using DeclVector = std::vector; - - MapperDetails() = default; - - void AddDecl(const parser::OpenMPDeclarativeConstruct *decl) { - declList_.emplace_back(decl); - } - const DeclVector &GetDeclList() const { return declList_; } - -private: - DeclVector declList_; -}; - class UnknownDetails {}; using Details = std::variant; + TypeParamDetails, MiscDetails, UserReductionDetails>; llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Details &); std::string DetailsToString(const Details &); diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 5bfcff310c232..20e85a940b182 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -448,13 +448,6 @@ class FirConverter : public Fortran::lower::AbstractConverter { } }); - // Ensure imported OpenMP declare mappers are materialized at module - // scope before lowering any constructs that may reference them. - createBuilderOutsideOfFuncOpAndDo([&]() { - Fortran::lower::materializeOpenMPDeclareMappers( - *this, bridge.getSemanticsContext()); - }); - // Create definitions of intrinsic module constants. createBuilderOutsideOfFuncOpAndDo( [&]() { createIntrinsicModuleDefinitions(pft); }); diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 0026a7e5eb3b5..9cdd46137adbf 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -1363,14 +1363,10 @@ bool ClauseProcessor::processMap( } if (mappers) { assert(mappers->size() == 1 && "more than one mapper"); - const semantics::Symbol *mapperSym = mappers->front().v.id().symbol; - mapperIdName = mapperSym->name().ToString(); - if (mapperIdName != "default") { - // Mangle with the ultimate owner so that use-associated mapper - // identifiers resolve to the same symbol as their defining scope. - const semantics::Symbol &ultimate = mapperSym->GetUltimate(); - mapperIdName = converter.mangleName(mapperIdName, ultimate.owner()); - } + mapperIdName = mappers->front().v.id().symbol->name().ToString(); + if (mapperIdName != "default") + mapperIdName = converter.mangleName( + mapperIdName, mappers->front().v.id().symbol->owner()); } processMapObjects(stmtCtx, clauseLocation, diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 5b95474775661..1f7084ab4315d 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -3579,10 +3579,10 @@ genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, TODO(converter.getCurrentLocation(), "OpenMPDeclareSimdConstruct"); } -static void genOpenMPDeclareMapperImpl( - lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, - const parser::OpenMPDeclareMapperConstruct &construct, - const semantics::Symbol *mapperSymOpt = nullptr) { +static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + const parser::OpenMPDeclareMapperConstruct &construct) { mlir::Location loc = converter.genLocation(construct.source); fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); const parser::OmpArgumentList &args = construct.v.Arguments(); @@ -3598,17 +3598,8 @@ static void genOpenMPDeclareMapperImpl( "Expected derived type"); std::string mapperNameStr = mapperName; - if (mapperSymOpt && mapperNameStr != "default") { - mapperNameStr = converter.mangleName(mapperNameStr, mapperSymOpt->owner()); - } else if (auto *sym = - converter.getCurrentScope().FindSymbol(mapperNameStr)) { + if (auto *sym = converter.getCurrentScope().FindSymbol(mapperNameStr)) mapperNameStr = converter.mangleName(mapperNameStr, sym->owner()); - } - - // If the mapper op already exists (e.g., created by regular lowering or by - // materialization of imported mappers), do not recreate it. - if (converter.getModuleOp().lookupSymbol(mapperNameStr)) - return; // Save current insertion point before moving to the module scope to create // the DeclareMapperOp @@ -3631,13 +3622,6 @@ static void genOpenMPDeclareMapperImpl( mlir::omp::DeclareMapperInfoOp::create(firOpBuilder, loc, clauseOps.mapVars); } -static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, - lower::pft::Evaluation &eval, - const parser::OpenMPDeclareMapperConstruct &construct) { - genOpenMPDeclareMapperImpl(converter, semaCtx, construct); -} - static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, @@ -4273,36 +4257,3 @@ void Fortran::lower::genOpenMPRequires(mlir::Operation *mod, offloadMod.setRequires(mlirFlags); } } - -// Walk scopes and materialize omp.declare_mapper ops for mapper declarations -// found in imported modules. If \p scope is null, start from the global scope. -void Fortran::lower::materializeOpenMPDeclareMappers( - Fortran::lower::AbstractConverter &converter, - semantics::SemanticsContext &semaCtx, const semantics::Scope *scope) { - const semantics::Scope &root = scope ? *scope : semaCtx.globalScope(); - - // Recurse into child scopes first (modules, submodules, etc.). - for (const semantics::Scope &child : root.children()) - materializeOpenMPDeclareMappers(converter, semaCtx, &child); - - // Only consider module scopes to avoid duplicating local constructs. - if (!root.IsModule()) - return; - - // Only materialize for modules coming from mod files to avoid duplicates. - if (!root.symbol() || !root.symbol()->test(semantics::Symbol::Flag::ModFile)) - return; - - // Scan symbols in this module scope for MapperDetails. - for (auto &it : root) { - const semantics::Symbol &sym = *it.second; - if (auto *md = sym.detailsIf()) { - for (const auto *decl : md->GetDeclList()) { - if (const auto *mapperDecl = - std::get_if(&decl->u)) { - genOpenMPDeclareMapperImpl(converter, semaCtx, *mapperDecl, &sym); - } - } - } - } -} diff --git a/flang/lib/Semantics/mod-file.cpp b/flang/lib/Semantics/mod-file.cpp index 840b98dd42139..b419864f73b8e 100644 --- a/flang/lib/Semantics/mod-file.cpp +++ b/flang/lib/Semantics/mod-file.cpp @@ -59,7 +59,6 @@ static void PutBound(llvm::raw_ostream &, const Bound &); static void PutShapeSpec(llvm::raw_ostream &, const ShapeSpec &); static void PutShape( llvm::raw_ostream &, const ArraySpec &, char open, char close); -static void PutMapper(llvm::raw_ostream &, const Symbol &, SemanticsContext &); static llvm::raw_ostream &PutAttr(llvm::raw_ostream &, Attr); static llvm::raw_ostream &PutType(llvm::raw_ostream &, const DeclTypeSpec &); @@ -939,7 +938,6 @@ void ModFileWriter::PutEntity(llvm::raw_ostream &os, const Symbol &symbol) { [&](const ProcEntityDetails &) { PutProcEntity(os, symbol); }, [&](const TypeParamDetails &) { PutTypeParam(os, symbol); }, [&](const UserReductionDetails &) { PutUserReduction(os, symbol); }, - [&](const MapperDetails &) { PutMapper(decls_, symbol, context_); }, [&](const auto &) { common::die("PutEntity: unexpected details: %s", DetailsToString(symbol.details()).c_str()); @@ -1103,16 +1101,6 @@ void ModFileWriter::PutUserReduction( } } -static void PutMapper( - llvm::raw_ostream &os, const Symbol &symbol, SemanticsContext &context) { - const auto &details{symbol.get()}; - // Emit each saved DECLARE MAPPER construct as-is, so that consumers of the - // module can reparse it and recreate the mapper symbol and semantics state. - for (const auto *decl : details.GetDeclList()) { - Unparse(os, *decl, context.langOptions()); - } -} - void PutInit(llvm::raw_ostream &os, const Symbol &symbol, const MaybeExpr &init, const parser::Expr *unanalyzed, SemanticsContext &context) { if (IsNamedConstant(symbol) || symbol.owner().IsDerivedType()) { diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index ea0d38c573af9..09ec951a422ca 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -1852,25 +1852,21 @@ bool OmpVisitor::Pre(const parser::OmpMapClause &x) { // TODO: Do we need a specific flag or type here, to distinghuish against // other ConstructName things? Leaving this for the full implementation // of mapper lowering. - auto &ultimate{symbol->GetUltimate()}; - auto *misc{ultimate.detailsIf()}; - auto *md{ultimate.detailsIf()}; - if (!md && (!misc || misc->kind() != MiscDetails::Kind::ConstructName)) + auto *misc{symbol->detailsIf()}; + if (!misc || misc->kind() != MiscDetails::Kind::ConstructName) context().Say(mapper->v.source, "Name '%s' should be a mapper name"_err_en_US, mapper->v.source); else mapper->v.symbol = symbol; } else { - // Allow the special 'default' mapper identifier without prior - // declaration so lowering can recognize and handle it. Emit an - // error for any other missing mapper identifier. - if (mapper->v.source.ToString() == "default") { - mapper->v.symbol = &MakeSymbol( - mapper->v, MiscDetails{MiscDetails::Kind::ConstructName}); - } else { - context().Say( - mapper->v.source, "'%s' not declared"_err_en_US, mapper->v.source); - } + mapper->v.symbol = + &MakeSymbol(mapper->v, MiscDetails{MiscDetails::Kind::ConstructName}); + // TODO: When completing the implementation, we probably want to error if + // the symbol is not declared, but right now, testing that the TODO for + // OmpMapClause happens is obscured by the TODO for declare mapper, so + // leaving this out. Remove the above line once the declare mapper is + // implemented. context().Say(mapper->v.source, "'%s' not + // declared"_err_en_US, mapper->v.source); } } return true; @@ -1884,15 +1880,8 @@ void OmpVisitor::ProcessMapperSpecifier(const parser::OmpMapperSpecifier &spec, // the type has been fully processed. BeginDeclTypeSpec(); auto &mapperName{std::get(spec.t)}; - // Create or update the mapper symbol with MapperDetails and - // keep track of the declarative construct for module emission. - Symbol &mapperSym{MakeSymbol(parser::CharBlock(mapperName), Attrs{})}; - if (auto *md{mapperSym.detailsIf()}) { - md->AddDecl(declaratives_.back()); - } else if (mapperSym.has() || mapperSym.has()) { - mapperSym.set_details(MapperDetails{}); - mapperSym.get().AddDecl(declaratives_.back()); - } + MakeSymbol(parser::CharBlock(mapperName), Attrs{}, + MiscDetails{MiscDetails::Kind::ConstructName}); PushScope(Scope::Kind::OtherConstruct, nullptr); Walk(std::get(spec.t)); auto &varName{std::get(spec.t)}; @@ -3622,20 +3611,10 @@ void ModuleVisitor::Post(const parser::UseStmt &x) { rename.u); } for (const auto &[name, symbol] : *useModuleScope_) { - // Default USE imports public names, excluding intrinsic-only and most - // miscellaneous details. Allow OpenMP mapper identifiers represented - // as MapperDetails, and also legacy MiscDetails::ConstructName. - bool isMapper{symbol->has()}; - if (!isMapper) { - if (const auto *misc{symbol->detailsIf()}) { - isMapper = misc->kind() == MiscDetails::Kind::ConstructName; - } - } if (symbol->attrs().test(Attr::PUBLIC) && !IsUseRenamed(symbol->name()) && (!symbol->implicitAttrs().test(Attr::INTRINSIC) || symbol->has()) && - (!symbol->has() || isMapper) && - useNames.count(name) == 0) { + !symbol->has() && useNames.count(name) == 0) { SourceName location{x.moduleName.source}; if (auto *localSymbol{FindInScope(name)}) { DoAddUse(location, localSymbol->name(), *localSymbol, *symbol); diff --git a/flang/lib/Semantics/symbol.cpp b/flang/lib/Semantics/symbol.cpp index ed0715a422e78..0ec44b7c40491 100644 --- a/flang/lib/Semantics/symbol.cpp +++ b/flang/lib/Semantics/symbol.cpp @@ -338,8 +338,7 @@ std::string DetailsToString(const Details &details) { [](const TypeParamDetails &) { return "TypeParam"; }, [](const MiscDetails &) { return "Misc"; }, [](const AssocEntityDetails &) { return "AssocEntity"; }, - [](const UserReductionDetails &) { return "UserReductionDetails"; }, - [](const MapperDetails &) { return "MapperDetails"; }}, + [](const UserReductionDetails &) { return "UserReductionDetails"; }}, details); } @@ -380,7 +379,6 @@ bool Symbol::CanReplaceDetails(const Details &details) const { [&](const UserReductionDetails &) { return has(); }, - [&](const MapperDetails &) { return has(); }, [](const auto &) { return false; }, }, details); @@ -687,8 +685,6 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const Details &details) { DumpType(os, type); } }, - // Avoid recursive streaming for MapperDetails; nothing more to dump - [&](const MapperDetails &) {}, [&](const auto &x) { os << x; }, }, details); diff --git a/flang/test/Lower/Intrinsics/adjustl.f90 b/flang/test/Lower/Intrinsics/adjustl.f90 index a742f58db5c48..b66a8409c083d 100644 --- a/flang/test/Lower/Intrinsics/adjustl.f90 +++ b/flang/test/Lower/Intrinsics/adjustl.f90 @@ -16,4 +16,3 @@ subroutine adjustl_test ! CHECK: fir.call @_FortranAAdjustl(%[[r3]], %[[r4]], %[[r5]], %{{.*}}) {{.*}}: (!fir.ref>, !fir.box, !fir.ref, i32) -> () adjust_str = adjustl(adjust_str) end subroutine - diff --git a/flang/test/Lower/Intrinsics/adjustr.f90 b/flang/test/Lower/Intrinsics/adjustr.f90 index a929ab17ab9ff..8e823718db9f2 100644 --- a/flang/test/Lower/Intrinsics/adjustr.f90 +++ b/flang/test/Lower/Intrinsics/adjustr.f90 @@ -16,4 +16,3 @@ subroutine adjustr_test ! CHECK: fir.call @_FortranAAdjustr(%[[r3]], %[[r4]], %[[r5]], %{{.*}}) {{.*}}: (!fir.ref>, !fir.box, !fir.ref, i32) -> () adjust_str = adjustr(adjust_str) end subroutine - diff --git a/flang/test/Lower/Intrinsics/associated.f90 b/flang/test/Lower/Intrinsics/associated.f90 index 9308ec7cc1a15..b32e0abd9bb0a 100644 --- a/flang/test/Lower/Intrinsics/associated.f90 +++ b/flang/test/Lower/Intrinsics/associated.f90 @@ -23,11 +23,11 @@ subroutine associated_test(scalar, array) ! CHECK: fir.call @_FortranAPointerIsAssociatedWith(%[[sbox]], %[[zbox]]) {{.*}}: (!fir.box, !fir.box) -> i1 print *, associated(scalar, ziel) end subroutine - + subroutine test_func_results() interface function get_pointer() - real, pointer :: get_pointer(:) + real, pointer :: get_pointer(:) end function end interface ! CHECK: %[[result:.*]] = fir.call @_QPget_pointer() {{.*}}: () -> !fir.box>> @@ -38,7 +38,7 @@ function get_pointer() ! CHECK: arith.cmpi ne, %[[addr_cast]], %c0{{.*}} : i64 print *, associated(get_pointer()) end subroutine - + ! CHECK-LABEL: func @_QPtest_optional_target_1( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>>> {fir.bindc_name = "p"}, ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref> {fir.bindc_name = "optionales_ziel", fir.optional, fir.target}) { @@ -61,7 +61,7 @@ subroutine test_optional_target_1(p, optionales_ziel) ! CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_4]] : (!fir.box>) -> !fir.box ! CHECK: fir.call @_FortranAPointerIsAssociatedWith(%[[VAL_14]], %[[VAL_15]]) {{.*}}: (!fir.box, !fir.box) -> i1 end subroutine - + ! CHECK-LABEL: func @_QPtest_optional_target_2( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>>> {fir.bindc_name = "p"}, ! CHECK-SAME: %[[VAL_1:.*]]: !fir.box> {fir.bindc_name = "optionales_ziel", fir.optional, fir.target}) { @@ -81,7 +81,7 @@ subroutine test_optional_target_2(p, optionales_ziel) ! CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_8]] : (!fir.box>) -> !fir.box ! CHECK: fir.call @_FortranAPointerIsAssociatedWith(%[[VAL_11]], %[[VAL_12]]) {{.*}}: (!fir.box, !fir.box) -> i1 end subroutine - + ! CHECK-LABEL: func @_QPtest_optional_target_3( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>>> {fir.bindc_name = "p"}, ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref>>> {fir.bindc_name = "optionales_ziel", fir.optional}) { @@ -102,7 +102,7 @@ subroutine test_optional_target_3(p, optionales_ziel) ! CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_9]] : (!fir.box>>) -> !fir.box ! CHECK: fir.call @_FortranAPointerIsAssociatedWith(%[[VAL_12]], %[[VAL_13]]) {{.*}}: (!fir.box, !fir.box) -> i1 end subroutine - + ! CHECK-LABEL: func @_QPtest_optional_target_4( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>>> {fir.bindc_name = "p"}, ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref>>> {fir.bindc_name = "optionales_ziel", fir.optional, fir.target}) { @@ -123,7 +123,7 @@ subroutine test_optional_target_4(p, optionales_ziel) ! CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_9]] : (!fir.box>>) -> !fir.box ! CHECK: fir.call @_FortranAPointerIsAssociatedWith(%[[VAL_12]], %[[VAL_13]]) {{.*}}: (!fir.box, !fir.box) -> i1 end subroutine - + ! CHECK-LABEL: func @_QPtest_pointer_target( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>>> {fir.bindc_name = "p"}, ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref>>> {fir.bindc_name = "pointer_ziel"}) { @@ -137,7 +137,7 @@ subroutine test_pointer_target(p, pointer_ziel) ! CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_7]] : (!fir.box>>) -> !fir.box ! CHECK: fir.call @_FortranAPointerIsAssociatedWith(%[[VAL_9]], %[[VAL_10]]) {{.*}}: (!fir.box, !fir.box) -> i1 end subroutine - + ! CHECK-LABEL: func @_QPtest_allocatable_target( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>>> {fir.bindc_name = "p"}, ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref>>> {fir.bindc_name = "allocatable_ziel", fir.target}) { diff --git a/flang/test/Lower/Intrinsics/btest.f90 b/flang/test/Lower/Intrinsics/btest.f90 index 6c0fccd0f5a9f..b10850ef0b5e6 100644 --- a/flang/test/Lower/Intrinsics/btest.f90 +++ b/flang/test/Lower/Intrinsics/btest.f90 @@ -15,4 +15,3 @@ function btest_test(i, j) ! CHECK: return %[[VAL_9]] : !fir.logical<4> btest_test = btest(i, j) end - \ No newline at end of file diff --git a/flang/test/Lower/Intrinsics/ceiling.f90 b/flang/test/Lower/Intrinsics/ceiling.f90 index 8c283de762e28..3c87bec3032e0 100644 --- a/flang/test/Lower/Intrinsics/ceiling.f90 +++ b/flang/test/Lower/Intrinsics/ceiling.f90 @@ -16,5 +16,3 @@ subroutine ceiling_test2(i, a) ! CHECK: %[[f:.*]] = math.ceil %{{.*}} : f32 ! CHECK: fir.convert %[[f]] : (f32) -> i64 end subroutine - - diff --git a/flang/test/Lower/Intrinsics/count.f90 b/flang/test/Lower/Intrinsics/count.f90 index c3efe6b4bf077..064d01163985d 100644 --- a/flang/test/Lower/Intrinsics/count.f90 +++ b/flang/test/Lower/Intrinsics/count.f90 @@ -11,7 +11,7 @@ subroutine count_test1(rslt, mask) rslt = count(mask) ! CHECK: %[[a5:.*]] = fir.call @_FortranACount(%[[a2]], %{{.*}}, %{{.*}}, %[[a4]]) {{.*}}: (!fir.box, !fir.ref, i32, i32) -> i64 end subroutine - + ! CHECK-LABEL: test_count2 ! CHECK-SAME: %[[arg0:.*]]: !fir.box>{{.*}}, %[[arg1:.*]]: !fir.box>>{{.*}}) subroutine test_count2(rslt, mask) @@ -29,7 +29,7 @@ subroutine test_count2(rslt, mask) ! CHECK: %[[a12:.*]] = fir.box_addr %[[a10]] : (!fir.box>>) -> !fir.heap> ! CHECK: fir.freemem %[[a12]] end subroutine - + ! CHECK-LABEL: test_count3 ! CHECK-SAME: %[[arg0:.*]]: !fir.ref{{.*}}, %[[arg1:.*]]: !fir.box>>{{.*}}) subroutine test_count3(rslt, mask) diff --git a/flang/test/Lower/Intrinsics/cpu_time.f90 b/flang/test/Lower/Intrinsics/cpu_time.f90 index 25ff4f8821145..73eead3796444 100644 --- a/flang/test/Lower/Intrinsics/cpu_time.f90 +++ b/flang/test/Lower/Intrinsics/cpu_time.f90 @@ -8,4 +8,3 @@ subroutine cpu_time_test(t) ! CHECK: fir.store %[[result32]] to %arg0 : !fir.ref call cpu_time(t) end subroutine - diff --git a/flang/test/Lower/Intrinsics/date_and_time.f90 b/flang/test/Lower/Intrinsics/date_and_time.f90 index 55b1383766cb8..d9ca46ea83dc6 100644 --- a/flang/test/Lower/Intrinsics/date_and_time.f90 +++ b/flang/test/Lower/Intrinsics/date_and_time.f90 @@ -18,13 +18,13 @@ subroutine date_and_time_test(date, time, zone, values) ! CHECK: fir.call @_FortranADateAndTime(%[[dateBuffer]], %[[dateLen]], %[[timeBuffer]], %[[timeLen]], %[[zoneBuffer]], %[[zoneLen]], %{{.*}}, %{{.*}}, %[[valuesCast]]) {{.*}}: (!fir.ref, i64, !fir.ref, i64, !fir.ref, i64, !fir.ref, i32, !fir.box) -> () call date_and_time(date, time, zone, values) end subroutine - + ! CHECK-LABEL: func @_QPdate_and_time_test2( ! CHECK-SAME: %[[date:.*]]: !fir.boxchar<1>{{.*}}) subroutine date_and_time_test2(date) character(*) :: date ! CHECK: %[[dateUnbox:.*]]:2 = fir.unboxchar %[[date]] : (!fir.boxchar<1>) -> (!fir.ref>, index) - ! CHECK: %[[values:.*]] = fir.absent !fir.box + ! CHECK: %[[values:.*]] = fir.absent !fir.box ! CHECK: %[[dateBuffer:.*]] = fir.convert %[[dateUnbox]]#0 : (!fir.ref>) -> !fir.ref ! CHECK: %[[dateLen:.*]] = fir.convert %[[dateUnbox]]#1 : (index) -> i64 ! CHECK: %[[timeBuffer:.*]] = fir.convert %c0{{.*}} : (index) -> !fir.ref @@ -34,7 +34,7 @@ subroutine date_and_time_test2(date) ! CHECK: fir.call @_FortranADateAndTime(%[[dateBuffer]], %[[dateLen]], %[[timeBuffer]], %[[timeLen]], %[[zoneBuffer]], %[[zoneLen]], %{{.*}}, %{{.*}}, %[[values]]) {{.*}}: (!fir.ref, i64, !fir.ref, i64, !fir.ref, i64, !fir.ref, i32, !fir.box) -> () call date_and_time(date) end subroutine - + ! CHECK-LABEL: func @_QPdate_and_time_dynamic_optional( ! CHECK-SAME: %[[VAL_0:[^:]*]]: !fir.boxchar<1> ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref>>> diff --git a/flang/test/Lower/Intrinsics/eoshift.f90 b/flang/test/Lower/Intrinsics/eoshift.f90 index 9cd0b86fadc52..4f01ce989c9f0 100644 --- a/flang/test/Lower/Intrinsics/eoshift.f90 +++ b/flang/test/Lower/Intrinsics/eoshift.f90 @@ -13,16 +13,16 @@ subroutine eoshift_test1(arr, shift) ! CHECK: fir.store %[[init]] to %[[resBox]] : !fir.ref>>>> ! CHECK: %[[boundBox:.*]] = fir.absent !fir.box ! CHECK: %[[shift:.*]] = fir.load %arg1 : !fir.ref - + res = eoshift(arr, shift) - + ! CHECK: %[[resIRBox:.*]] = fir.convert %[[resBox]] : (!fir.ref>>>>) -> !fir.ref> ! CHECK: %[[arrBox:.*]] = fir.convert %[[arr]] : (!fir.box>>) -> !fir.box ! CHECK: %[[shiftBox:.*]] = fir.convert %[[shift]] : (i32) -> i64 ! CHECK: fir.call @_FortranAEoshiftVector(%[[resIRBox]], %[[arrBox]], %[[shiftBox]], %[[boundBox]], {{.*}}, {{.*}}) {{.*}}: (!fir.ref>, !fir.box, i64, !fir.box, !fir.ref, i32) -> () ! CHECK: fir.array_merge_store %[[resLoad]], {{.*}} to %[[res]] : !fir.array<3x!fir.logical<4>>, !fir.array<3x!fir.logical<4>>, !fir.ref>> end subroutine eoshift_test1 - + ! CHECK-LABEL: eoshift_test2 subroutine eoshift_test2(arr, shift, bound, dim) integer, dimension(3,3) :: arr, res @@ -31,9 +31,9 @@ subroutine eoshift_test2(arr, shift, bound, dim) ! CHECK: %[[resBox:.*]] = fir.alloca !fir.box>> ! CHECK: %[[res:.*]] = fir.alloca !fir.array<3x3xi32> {bindc_name = "res", uniq_name = "_QFeoshift_test2Eres"} !CHECK: %[[resLoad:.*]] = fir.array_load %[[res]]({{.*}}) : (!fir.ref>, !fir.shape<2>) -> !fir.array<3x3xi32> - + res = eoshift(arr, shift, bound, dim) - + ! CHECK: %[[arr:.*]] = fir.embox %arg0({{.*}}) : (!fir.ref>, !fir.shape<2>) -> !fir.box> ! CHECK: %[[boundBox:.*]] = fir.embox %arg2 : (!fir.ref) -> !fir.box ! CHECK: %[[dim:.*]] = fir.load %arg3 : !fir.ref @@ -42,16 +42,16 @@ subroutine eoshift_test2(arr, shift, bound, dim) ! CHECK: %[[arrBox:.*]] = fir.convert %[[arr]] : (!fir.box>) -> !fir.box ! CHECK: %[[shiftBoxNone:.*]] = fir.convert %[[shiftBox]] : (!fir.box>) -> !fir.box ! CHECK: %[[boundBoxNone:.*]] = fir.convert %[[boundBox]] : (!fir.box) -> !fir.box - + ! CHECK: fir.call @_FortranAEoshift(%[[resIRBox]], %[[arrBox]], %[[shiftBoxNone]], %[[boundBoxNone]], %[[dim]], {{.*}}, {{.*}}) {{.*}}: (!fir.ref>, !fir.box, !fir.box, !fir.box, i32, !fir.ref, i32) -> () ! CHECK: fir.array_merge_store %[[resLoad]], {{.*}} to %[[res]] : !fir.array<3x3xi32>, !fir.array<3x3xi32>, !fir.ref> end subroutine eoshift_test2 - + ! CHECK-LABEL: eoshift_test3 subroutine eoshift_test3(arr, shift, dim) character(4), dimension(3,3) :: arr, res integer :: shift, dim - + ! CHECK: %[[resBox:.*]] = fir.alloca !fir.box>>> ! CHECK: %[[arr:.*]]:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref>, index) ! CHECK: %[[array:.*]] = fir.convert %[[arr]]#0 : (!fir.ref>) -> !fir.ref>> @@ -59,9 +59,9 @@ subroutine eoshift_test3(arr, shift, dim) ! CHECK: %[[resLoad:.*]] = fir.array_load %[[res]]({{.*}}) : (!fir.ref>>, !fir.shape<2>) -> !fir.array<3x3x!fir.char<1,4>> ! CHECK: %[[arrayBox:.*]] = fir.embox %[[array]]({{.*}}) : (!fir.ref>>, !fir.shape<2>) -> !fir.box>> ! CHECK: %[[dim:.*]] = fir.load %arg2 : !fir.ref - + res = eoshift(arr, SHIFT=shift, DIM=dim) - + ! CHECK: %[[boundBox:.*]] = fir.absent !fir.box ! CHECK: %[[shiftBox:.*]] = fir.embox %arg1 : (!fir.ref) -> !fir.box ! CHECK: %[[resIRBox:.*]] = fir.convert %[[resBox]] : (!fir.ref>>>>) -> !fir.ref> @@ -70,7 +70,7 @@ subroutine eoshift_test3(arr, shift, dim) ! CHECK: fir.call @_FortranAEoshift(%[[resIRBox]], %[[arrayBoxNone]], %[[shiftBoxNone]], %[[boundBox]], %[[dim]], {{.*}}, {{.*}}) {{.*}}: (!fir.ref>, !fir.box, !fir.box, !fir.box, i32, !fir.ref, i32) -> () ! CHECK: fir.array_merge_store %[[resLoad]], {{.*}} to %[[res]] : !fir.array<3x3x!fir.char<1,4>>, !fir.array<3x3x!fir.char<1,4>>, !fir.ref>> end subroutine eoshift_test3 - + ! CHECK-LABEL: func @_QPeoshift_test_dynamic_optional( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref diff --git a/flang/test/Lower/Intrinsics/execute_command_line-optional.f90 b/flang/test/Lower/Intrinsics/execute_command_line-optional.f90 index f8c667f3fa82d..a4137dfd47f79 100644 --- a/flang/test/Lower/Intrinsics/execute_command_line-optional.f90 +++ b/flang/test/Lower/Intrinsics/execute_command_line-optional.f90 @@ -12,9 +12,9 @@ subroutine all_args_optional(command, isWait, exitVal, cmdVal, msg) LOGICAL, OPTIONAL :: isWait ! Note: command is not optional in execute_command_line and must be present call execute_command_line(command, isWait, exitVal, cmdVal, msg) -! CHECK-NEXT: %[[c14:.*]] = arith.constant 14 : i32 -! CHECK-NEXT: %true = arith.constant true -! CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : i64 +! CHECK-NEXT: %[[c14:.*]] = arith.constant 14 : i32 +! CHECK-NEXT: %true = arith.constant true +! CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : i64 ! CHECK-NEXT: %[[DSCOPE:.*]] = fir.dummy_scope : !fir.dscope ! CHECK-NEXT: %[[cmdstatDeclare:.*]] = fir.declare %[[cmdstatArg]] dummy_scope %[[DSCOPE]] arg {{[0-9]+}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFall_args_optionalEcmdval"} : (!fir.ref, !fir.dscope) -> !fir.ref ! CHECK-NEXT: %[[commandUnbox:.*]]:2 = fir.unboxchar %[[commandArg]] : (!fir.boxchar<1>) -> (!fir.ref>, index) @@ -35,7 +35,7 @@ subroutine all_args_optional(command, isWait, exitVal, cmdVal, msg) ! CHECK-NEXT: %[[cmdstatArgBox:.*]] = fir.embox %[[cmdstatDeclare]] : (!fir.ref) -> !fir.box ! CHECK-NEXT: %[[cmdstatBox:.*]] = arith.select %[[cmdstatIsPresent]], %[[cmdstatArgBox]], %[[absentBoxi32]] : !fir.box ! CHECK-NEXT: %[[cmdmsgArgBox:.*]] = fir.embox %[[cmdmsgDeclare]] typeparams %[[cmdmsgUnbox]]#1 : (!fir.ref>, index) -> !fir.box> -! CHECK-NEXT: %[[absentBox:.*]] = fir.absent !fir.box> +! CHECK-NEXT: %[[absentBox:.*]] = fir.absent !fir.box> ! CHECK-NEXT: %[[cmdmsgBox:.*]] = arith.select %[[cmdmsgIsPresent]], %[[cmdmsgArgBox]], %[[absentBox]] : !fir.box> ! CHECK-NEXT: %[[waitCast:.*]] = fir.convert %[[waitDeclare]] : (!fir.ref>) -> i64 ! CHECK-NEXT: %[[waitPresent:.*]] = arith.cmpi ne, %[[waitCast]], %[[c0]] : i64 diff --git a/flang/test/Lower/Intrinsics/execute_command_line.f90 b/flang/test/Lower/Intrinsics/execute_command_line.f90 index e70513068ab3e..e29c09688e6d1 100644 --- a/flang/test/Lower/Intrinsics/execute_command_line.f90 +++ b/flang/test/Lower/Intrinsics/execute_command_line.f90 @@ -11,9 +11,9 @@ subroutine all_args(command, isWait, exitVal, cmdVal, msg) INTEGER :: exitVal, cmdVal LOGICAL :: isWait call execute_command_line(command, isWait, exitVal, cmdVal, msg) -! CHECK-NEXT: %[[c13:.*]] = arith.constant 13 : i32 -! CHECK-NEXT: %true = arith.constant true -! CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : i64 +! CHECK-NEXT: %[[c13:.*]] = arith.constant 13 : i32 +! CHECK-NEXT: %true = arith.constant true +! CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : i64 ! CHECK-NEXT: %[[c30:.*]] = arith.constant 30 : index ! CHECK-NEXT: %[[DSCOPE:.*]] = fir.dummy_scope : !fir.dscope ! CHECK-NEXT: %[[cmdstatsDeclare:.*]] = fir.declare %[[cmdstatArg]] dummy_scope %[[DSCOPE]] arg {{[0-9]+}} {uniq_name = "_QFall_argsEcmdval"} : (!fir.ref, !fir.dscope) -> !fir.ref @@ -51,8 +51,8 @@ end subroutine all_args subroutine only_command_default_wait_true(command) CHARACTER(30) :: command call execute_command_line(command) -! CHECK-NEXT: %[[c52:.*]] = arith.constant 53 : i32 -! CHECK-NEXT: %true = arith.constant true +! CHECK-NEXT: %[[c52:.*]] = arith.constant 53 : i32 +! CHECK-NEXT: %true = arith.constant true ! CHECK-NEXT: %[[c30:.*]] = arith.constant 30 : index ! CHECK-NEXT: %[[DSCOPE:.*]] = fir.dummy_scope : !fir.dscope ! CHECK-NEXT: %[[commandUnbox:.*]]:2 = fir.unboxchar %[[cmdArg]] : (!fir.boxchar<1>) -> (!fir.ref>, index) @@ -60,7 +60,7 @@ subroutine only_command_default_wait_true(command) ! CHECK-NEXT: %[[commandDeclare:.*]] = fir.declare %[[commandCast]] typeparams %[[c30]] dummy_scope %[[DSCOPE]] arg {{[0-9]+}} {uniq_name = "_QFonly_command_default_wait_trueEcommand"} : (!fir.ref>, index, !fir.dscope) -> !fir.ref> ! CHECK-NEXT: %[[commandBox:.*]] = fir.embox %[[commandDeclare]] : (!fir.ref>) -> !fir.box> ! CHECK-NEXT: %[[absent:.*]] = fir.absent !fir.box -! CHECK: %[[command:.*]] = fir.convert %[[commandBox]] : (!fir.box>) -> !fir.box +! CHECK: %[[command:.*]] = fir.convert %[[commandBox]] : (!fir.box>) -> !fir.box ! CHECK: fir.call @_FortranAExecuteCommandLine(%[[command]], %true, %[[absent]], %[[absent]], %[[absent]], %[[VAL_7:.*]], %[[c52]]) fastmath : (!fir.box, i1, !fir.box, !fir.box, !fir.box, !fir.ref, i32) -> () ! CHECK-NEXT: return end subroutine only_command_default_wait_true diff --git a/flang/test/Lower/Intrinsics/exit.f90 b/flang/test/Lower/Intrinsics/exit.f90 index d80efc556f95e..49b41346cac0d 100644 --- a/flang/test/Lower/Intrinsics/exit.f90 +++ b/flang/test/Lower/Intrinsics/exit.f90 @@ -10,7 +10,7 @@ subroutine exit_test1 ! CHECK-32: fir.call @_FortranAExit(%[[status]]) {{.*}}: (i32) -> () ! CHECK-64: fir.call @_FortranAExit(%[[statusConvert]]) {{.*}}: (i32) -> () end subroutine exit_test1 - + ! CHECK-LABEL: func @_QPexit_test2( ! CHECK-SAME: %[[statusArg:.*]]: !fir.ref{{.*}}) { subroutine exit_test2(status) diff --git a/flang/test/Lower/Intrinsics/extends_type_of.f90 b/flang/test/Lower/Intrinsics/extends_type_of.f90 index f99a63e30a552..d69e35ff934d9 100644 --- a/flang/test/Lower/Intrinsics/extends_type_of.f90 +++ b/flang/test/Lower/Intrinsics/extends_type_of.f90 @@ -9,7 +9,7 @@ module extends_type_of_mod type, extends(p1) :: p2 integer :: b end type - + type k1(a) integer, kind :: a end type diff --git a/flang/test/Lower/Intrinsics/floor.f90 b/flang/test/Lower/Intrinsics/floor.f90 index 63d6d2fccee86..b478b6732efeb 100644 --- a/flang/test/Lower/Intrinsics/floor.f90 +++ b/flang/test/Lower/Intrinsics/floor.f90 @@ -16,4 +16,3 @@ subroutine floor_test2(i, a) ! CHECK: %[[f:.*]] = math.floor %{{.*}} : f32 ! CHECK: fir.convert %[[f]] : (f32) -> i64 end subroutine - diff --git a/flang/test/Lower/Intrinsics/get_command_argument-optional.f90 b/flang/test/Lower/Intrinsics/get_command_argument-optional.f90 index c1b081b6112b9..545ca663feeb7 100644 --- a/flang/test/Lower/Intrinsics/get_command_argument-optional.f90 +++ b/flang/test/Lower/Intrinsics/get_command_argument-optional.f90 @@ -7,11 +7,11 @@ ! CHECK-SAME: %[[lengthParam:.*]]: !fir.ref {fir.bindc_name = "length", fir.optional}, ! CHECK-SAME: %[[statusParam:.*]]: !fir.ref {fir.bindc_name = "status", fir.optional}, ! CHECK-SAME: %[[errmsgParam:.*]]: !fir.boxchar<1> {fir.bindc_name = "errmsg", fir.optional}) { -subroutine test(number, value, length, status, errmsg) +subroutine test(number, value, length, status, errmsg) integer, optional :: number, status, length character(*), optional :: value, errmsg ! Note: number cannot be absent - call get_command_argument(number, value, length, status, errmsg) + call get_command_argument(number, value, length, status, errmsg) ! CHECK: %[[errmsgUnboxed:.*]]:2 = fir.unboxchar %[[errmsgParam]] : (!fir.boxchar<1>) -> (!fir.ref>, index) ! CHECK: %[[valueUnboxed:.*]]:2 = fir.unboxchar %[[valueParam]] : (!fir.boxchar<1>) -> (!fir.ref>, index) ! CHECK: %[[number:.*]] = fir.load %[[numberParam]] : !fir.ref diff --git a/flang/test/Lower/Intrinsics/ichar.f90 b/flang/test/Lower/Intrinsics/ichar.f90 index 99284455be097..eb7e03873e6b7 100644 --- a/flang/test/Lower/Intrinsics/ichar.f90 +++ b/flang/test/Lower/Intrinsics/ichar.f90 @@ -37,7 +37,7 @@ subroutine ichar_test(c) subroutine no_extui(ch) integer, parameter :: kind = selected_char_kind('ISO_10646') character(*, kind), intent(in) :: ch(:) - integer :: i, j + integer :: i, j ! CHECK-NOT: arith.extui j = ichar(ch(i)(i:i)) end subroutine diff --git a/flang/test/Lower/Intrinsics/ishftc.f90 b/flang/test/Lower/Intrinsics/ishftc.f90 index 70d71128cf9cf..f13d9c06a8197 100644 --- a/flang/test/Lower/Intrinsics/ishftc.f90 +++ b/flang/test/Lower/Intrinsics/ishftc.f90 @@ -40,7 +40,7 @@ function ishftc_test(i, j, k) ! CHECK: return %[[VAL_36]] : i32 ishftc_test = ishftc(i, j, k) end - + ! Test cases where the size argument presence can only be know at runtime module test_ishftc contains @@ -67,9 +67,9 @@ subroutine dyn_optional_scalar(i, shift, size) ! CHECK: %[[VAL_19:.*]] = arith.xori %[[VAL_9]], %[[VAL_18]] : i32 ! CHECK: %[[VAL_20:.*]] = arith.subi %[[VAL_19]], %[[VAL_18]] : i32 ! CHECK: %[[VAL_21:.*]] = arith.subi %[[VAL_11]], %[[VAL_20]] : i32 - ! ... as in non optional case + ! ... as in non optional case end subroutine - + ! CHECK-LABEL: func @_QMtest_ishftcPdyn_optional_array_scalar( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "i"}, ! CHECK-SAME: %[[VAL_1:.*]]: !fir.box> {fir.bindc_name = "shift"}, @@ -90,11 +90,11 @@ subroutine dyn_optional_array_scalar(i, shift, size) ! CHECK: %[[VAL_26:.*]] = arith.constant 32 : i32 ! CHECK: fir.result %[[VAL_26]] : i32 ! CHECK: } - ! ... as in non optional case + ! ... as in non optional case ! CHECK: } print *, ishftc(i, shift, size) end subroutine - + ! CHECK-LABEL: func @_QMtest_ishftcPdyn_optional_array( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "i"}, ! CHECK-SAME: %[[VAL_1:.*]]: !fir.box> {fir.bindc_name = "shift"}, @@ -117,22 +117,22 @@ subroutine dyn_optional_array(i, shift, size) ! CHECK: %[[VAL_32:.*]] = arith.constant 32 : i32 ! CHECK: fir.result %[[VAL_32]] : i32 ! CHECK: } - ! ... as in non optional case + ! ... as in non optional case ! CHECK: } print *, ishftc(i, shift, size) end subroutine end module - + use test_ishftc integer :: i(4) = [333, 334, 335, 336] integer :: shift(4) = [2, 1, -1, -2] integer :: size(4) = [2, 4, 8, 16] call dyn_optional_scalar(i(1), shift(1)) call dyn_optional_scalar(i(1), shift(1), size(1)) - + call dyn_optional_array_scalar(i, shift) call dyn_optional_array_scalar(i, shift, size(1)) - + call dyn_optional_array(i, shift) call dyn_optional_array(i, shift, size) end diff --git a/flang/test/Lower/Intrinsics/max.f90 b/flang/test/Lower/Intrinsics/max.f90 index 1909a4eca3f67..c3d2457ef1af1 100644 --- a/flang/test/Lower/Intrinsics/max.f90 +++ b/flang/test/Lower/Intrinsics/max.f90 @@ -31,8 +31,8 @@ subroutine dynamic_optional(a, b, c) ! CHECK: fir.result %[[VAL_36]] : !fir.array ! CHECK: } print *, max(a, b, c) - end subroutine - + end subroutine + ! CHECK-LABEL: func @_QMmax_testPdynamic_optional_array_expr_scalar_optional( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "a"}, ! CHECK-SAME: %[[VAL_1:.*]]: !fir.box> {fir.bindc_name = "b"}, @@ -60,8 +60,8 @@ subroutine dynamic_optional_array_expr_scalar_optional(a, b, c) ! CHECK: %[[VAL_30:.*]] = fir.array_update %[[VAL_21]], %[[VAL_26]], %[[VAL_20]] : (!fir.array, i32, index) -> !fir.array ! CHECK: fir.result %[[VAL_30]] : !fir.array ! CHECK: } - end subroutine - + end subroutine + ! CHECK-LABEL: func @_QMmax_testPdynamic_optional_scalar( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref {fir.bindc_name = "a"}, ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "b"}, @@ -84,8 +84,8 @@ subroutine dynamic_optional_scalar(a, b, c) ! CHECK: fir.result %[[VAL_12]] : i32 ! CHECK: } ! CHECK: fir.call @_FortranAioOutputInteger32(%{{.*}}, %[[VAL_13]]) {{.*}}: (!fir.ref, i32) -> i1 - end subroutine - + end subroutine + ! CHECK-LABEL: func @_QMmax_testPdynamic_optional_weird( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref {fir.bindc_name = "a"}, ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "b"}, @@ -123,9 +123,9 @@ subroutine dynamic_optional_weird(a, b, c, d, e) ! CHECK: fir.result %[[VAL_23]] : i32 ! CHECK: } ! CHECK: fir.call @_FortranAioOutputInteger32(%{{.*}}, %[[VAL_24]]) {{.*}}: (!fir.ref, i32) -> i1 - end subroutine + end subroutine end module - + use :: max_test integer :: a(4) = [1,12,23, 34] integer :: b(4) = [31,22,13, 4] diff --git a/flang/test/Lower/Intrinsics/maxloc.f90 b/flang/test/Lower/Intrinsics/maxloc.f90 index 87f17881e0476..13dbe984043b6 100644 --- a/flang/test/Lower/Intrinsics/maxloc.f90 +++ b/flang/test/Lower/Intrinsics/maxloc.f90 @@ -18,7 +18,7 @@ subroutine maxloc_test(arr,res) ! CHECK-DAG: %[[a14:.*]] = fir.box_addr %[[a12]] : (!fir.box>>) -> !fir.heap> ! CHECK-DAG: fir.freemem %[[a14]] end subroutine - + ! CHECK-LABEL: func @_QPmaxloc_test2( ! CHECK-SAME: %[[arg0:.*]]: !fir.box>{{.*}}, %[[arg1:.*]]: !fir.box>{{.*}}, %[[arg2:.*]]: !fir.ref{{.*}}) { subroutine maxloc_test2(arr,res,d) @@ -39,7 +39,7 @@ subroutine maxloc_test2(arr,res,d) ! CHECK: %[[a13:.*]] = fir.box_addr %[[a12]] : (!fir.box>) -> !fir.heap ! CHECK: fir.freemem %[[a13]] end subroutine - + ! CHECK-LABEL: func @_QPtest_maxloc_optional_scalar_mask( ! CHECK-SAME: %[[VAL_0:[^:]+]]: !fir.ref> ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref> @@ -65,7 +65,7 @@ subroutine test_maxloc_optional_scalar_mask(mask, back, array) ! CHECK: %[[VAL_30:.*]] = fir.convert %[[VAL_14]] : (!fir.logical<4>) -> i1 ! CHECK: fir.call @_FortranAMaxlocInteger4(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[VAL_29]], %[[VAL_30]]) {{.*}}: (!fir.ref>, !fir.box, i32, !fir.ref, i32, !fir.box, i1) -> () end subroutine - + ! CHECK-LABEL: func @_QPtest_maxloc_optional_array_mask( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.box>> ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref> diff --git a/flang/test/Lower/Intrinsics/merge.f90 b/flang/test/Lower/Intrinsics/merge.f90 index 2e17efcaf5c2a..52417f83294b6 100644 --- a/flang/test/Lower/Intrinsics/merge.f90 +++ b/flang/test/Lower/Intrinsics/merge.f90 @@ -9,7 +9,7 @@ function merge_test(o1, o2, mask) merge_test = merge(o1, o2, mask) ! CHECK: %[[a0:.*]]:2 = fir.unboxchar %[[arg2]] : (!fir.boxchar<1>) -> (!fir.ref>, index) ! CHECK: %[[a0_cast:.*]] = fir.convert %[[a0]]#0 : (!fir.ref>) -> !fir.ref> -! CHECK: %[[a1:.*]]:2 = fir.unboxchar %[[arg3]] : (!fir.boxchar<1>) -> (!fir.ref>, index) +! CHECK: %[[a1:.*]]:2 = fir.unboxchar %[[arg3]] : (!fir.boxchar<1>) -> (!fir.ref>, index) ! CHECK: %[[a1_cast:.*]] = fir.convert %[[a1]]#0 : (!fir.ref>) -> !fir.ref> ! CHECK: %[[a2:.*]] = fir.load %[[arg4]] : !fir.ref> ! CHECK: %[[a3:.*]] = fir.convert %[[a2]] : (!fir.logical<4>) -> i1 diff --git a/flang/test/Lower/Intrinsics/minloc.f90 b/flang/test/Lower/Intrinsics/minloc.f90 index caab36d0f8138..fa3bc9b67ad91 100644 --- a/flang/test/Lower/Intrinsics/minloc.f90 +++ b/flang/test/Lower/Intrinsics/minloc.f90 @@ -18,7 +18,7 @@ subroutine minloc_test(arr,res) ! CHECK-DAG: %[[a14:.*]] = fir.box_addr %[[a12]] : (!fir.box>>) -> !fir.heap> ! CHECK-DAG: fir.freemem %[[a14]] end subroutine - + ! CHECK-LABEL: func @_QPminloc_test2( ! CHECK-SAME: %[[arg0:.*]]: !fir.box>{{.*}}, %[[arg1:.*]]: !fir.box>{{.*}}, %[[arg2:.*]]: !fir.ref subroutine minloc_test2(arr,res,d) @@ -39,7 +39,7 @@ subroutine minloc_test2(arr,res,d) ! CHECK: %[[a13:.*]] = fir.box_addr %[[a12]] : (!fir.box>) -> !fir.heap ! CHECK: fir.freemem %[[a13]] end subroutine - + ! CHECK-LABEL: func @_QPtest_minloc_optional_scalar_mask( ! CHECK-SAME: %[[VAL_0:[^:]+]]: !fir.ref> ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref> @@ -65,7 +65,7 @@ subroutine test_minloc_optional_scalar_mask(mask, back, array) ! CHECK: %[[VAL_30:.*]] = fir.convert %[[VAL_14]] : (!fir.logical<4>) -> i1 ! CHECK: fir.call @_FortranAMinlocInteger4(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[VAL_29]], %[[VAL_30]]) {{.*}}: (!fir.ref>, !fir.box, i32, !fir.ref, i32, !fir.box, i1) -> () end subroutine - + ! CHECK-LABEL: func @_QPtest_minloc_optional_array_mask( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.box>> ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref> diff --git a/flang/test/Lower/Intrinsics/modulo.f90 b/flang/test/Lower/Intrinsics/modulo.f90 index b4ead32c687e8..10b40e4bd00f3 100644 --- a/flang/test/Lower/Intrinsics/modulo.f90 +++ b/flang/test/Lower/Intrinsics/modulo.f90 @@ -20,7 +20,7 @@ subroutine modulo_testr(r, a, p) ! ALL: fir.store %[[res]] to %[[arg0]] : !fir.ref r = modulo(a, p) end subroutine - + ! ALL-LABEL: func @_QPmodulo_testi( ! ALL-SAME: %[[arg0:.*]]: !fir.ref{{.*}}, %[[arg1:.*]]: !fir.ref{{.*}}, %[[arg2:.*]]: !fir.ref{{.*}}) { subroutine modulo_testi(r, a, p) diff --git a/flang/test/Lower/Intrinsics/nint.f90 b/flang/test/Lower/Intrinsics/nint.f90 index 2f25eda5d20b2..166fdac90d8e1 100644 --- a/flang/test/Lower/Intrinsics/nint.f90 +++ b/flang/test/Lower/Intrinsics/nint.f90 @@ -14,4 +14,3 @@ subroutine nint_test2(i, a) i = nint(a, 8) ! CHECK: fir.call @llvm.lround.i64.f64 end subroutine - \ No newline at end of file diff --git a/flang/test/Lower/Intrinsics/not.f90 b/flang/test/Lower/Intrinsics/not.f90 index 140800c27e878..b772e8b8b37cf 100644 --- a/flang/test/Lower/Intrinsics/not.f90 +++ b/flang/test/Lower/Intrinsics/not.f90 @@ -13,4 +13,3 @@ subroutine not_test ! CHECK: return destination = not(source) end subroutine - \ No newline at end of file diff --git a/flang/test/Lower/Intrinsics/pack.f90 b/flang/test/Lower/Intrinsics/pack.f90 index a00c10dc2e959..f4eeef7747a82 100644 --- a/flang/test/Lower/Intrinsics/pack.f90 +++ b/flang/test/Lower/Intrinsics/pack.f90 @@ -21,7 +21,7 @@ subroutine pack_test(a,m,v,r) ! CHECK: %[[a13:.*]] = fir.box_addr %[[a11]] : (!fir.box>>) -> !fir.heap> ! CHECK: fir.freemem %[[a13]] end subroutine - + ! CHECK-LABEL: func @_QPtest_pack_optional( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>>> subroutine test_pack_optional(vector, array, mask) diff --git a/flang/test/Lower/Intrinsics/perror.f90 b/flang/test/Lower/Intrinsics/perror.f90 index e746e73a5f9bc..a595ab54746bf 100644 --- a/flang/test/Lower/Intrinsics/perror.f90 +++ b/flang/test/Lower/Intrinsics/perror.f90 @@ -11,13 +11,13 @@ subroutine test_perror() ! CHECK: %[[C10:.*]] = arith.constant 10 : index ! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.char<1,10> {bindc_name = "string", uniq_name = "_QFtest_perrorEstring"} ! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] typeparams %[[C10]] {uniq_name = "_QFtest_perrorEstring"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) - + call perror(string) ! CHECK: %[[VAL_4:.*]] = fir.embox %[[VAL_3]]#0 : (!fir.ref>) -> !fir.box> ! CHECK: %[[VAL_5:.*]] = fir.box_addr %[[VAL_4]] : (!fir.box>) -> !fir.ref> ! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_5]] : (!fir.ref>) -> !fir.ref - ! CHECK: fir.call @_FortranAPerror(%[[VAL_6]]) fastmath : (!fir.ref) -> () - + ! CHECK: fir.call @_FortranAPerror(%[[VAL_6]]) fastmath : (!fir.ref) -> () + call perror("prefix") ! CHECK: %[[VAL_7:.*]] = fir.address_of(@{{.*}}) : !fir.ref> ! CHECK: %[[C6:.*]] = arith.constant 6 : index @@ -25,13 +25,13 @@ subroutine test_perror() ! CHECK: %[[VAL_9:.*]] = fir.embox %[[VAL_8]]#0 : (!fir.ref>) -> !fir.box> ! CHECK: %[[VAL_10:.*]] = fir.box_addr %[[VAL_9]] : (!fir.box>) -> !fir.ref> ! CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (!fir.ref>) -> !fir.ref - ! CHECK: fir.call @_FortranAPerror(%[[VAL_11]]) fastmath : (!fir.ref) -> () - + ! CHECK: fir.call @_FortranAPerror(%[[VAL_11]]) fastmath : (!fir.ref) -> () + call perror(one) ! CHECK: %[[VAL_12:.*]] = fir.embox %[[VAL_1]]#0 : (!fir.ref>) -> !fir.box> ! CHECK: %[[VAL_13:.*]] = fir.box_addr %[[VAL_12]] : (!fir.box>) -> !fir.ref> ! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (!fir.ref>) -> !fir.ref - ! CHECK: fir.call @_FortranAPerror(%[[VAL_14]]) fastmath : (!fir.ref) -> () + ! CHECK: fir.call @_FortranAPerror(%[[VAL_14]]) fastmath : (!fir.ref) -> () end subroutine test_perror ! CHECK-LABEL: func @_QPtest_perror_unknown_length( diff --git a/flang/test/Lower/Intrinsics/product.f90 b/flang/test/Lower/Intrinsics/product.f90 index df7c1e4ce7eaa..c64982e435f07 100644 --- a/flang/test/Lower/Intrinsics/product.f90 +++ b/flang/test/Lower/Intrinsics/product.f90 @@ -111,7 +111,7 @@ real function product_test_optional_4(x, use_mask) real :: x(:) logical :: use_mask logical, allocatable :: mask(:) -if (use_mask) then +if (use_mask) then allocate(mask(size(x, 1))) call set_mask(mask) ! CHECK: fir.call @_QPset_mask diff --git a/flang/test/Lower/Intrinsics/reduce.f90 b/flang/test/Lower/Intrinsics/reduce.f90 index 083dca5c3cd9f..27c4277ffebeb 100644 --- a/flang/test/Lower/Intrinsics/reduce.f90 +++ b/flang/test/Lower/Intrinsics/reduce.f90 @@ -19,7 +19,7 @@ pure function red_int1_interface_value(a, b) integer, parameter :: kind10 = merge(10, 4, selected_real_kind(p=18).eq.10) integer, parameter :: kind16 = merge(16, 4, selected_real_kind(p=33).eq.16) - + contains @@ -46,11 +46,11 @@ subroutine integer1(a, id, d1, d2) res = reduce(a, red_int1) res = reduce(a, red_int1, identity=id) - + res = reduce(a, red_int1, identity=id, ordered = .true.) res = reduce(a, red_int1, [.true., .true., .false.]) - + res = reduce(a, red_int1_value) fptr => red_int1 diff --git a/flang/test/Lower/Intrinsics/reshape.f90 b/flang/test/Lower/Intrinsics/reshape.f90 index 4f4f50965dd1b..b960a3e380786 100644 --- a/flang/test/Lower/Intrinsics/reshape.f90 +++ b/flang/test/Lower/Intrinsics/reshape.f90 @@ -24,16 +24,16 @@ subroutine reshape_test(x, source, pd, sh, ord) ! CHECK-DAG: %[[a18:.*]] = fir.box_addr %[[a15]] : (!fir.box>>) -> !fir.heap> ! CHECK-DAG: fir.freemem %[[a18]] end subroutine - + ! CHECK-LABEL: func @_QPtest_reshape_optional( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>>> ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref>>> subroutine test_reshape_optional(pad, order, source, shape) - real, pointer :: pad(:, :) - integer, pointer :: order(:) + real, pointer :: pad(:, :) + integer, pointer :: order(:) real :: source(:, :, :) - integer :: shape(4) - print *, reshape(source=source, shape=shape, pad=pad, order=order) + integer :: shape(4) + print *, reshape(source=source, shape=shape, pad=pad, order=order) ! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_0]] : !fir.ref>>> ! CHECK: %[[VAL_14:.*]] = fir.box_addr %[[VAL_13]] : (!fir.box>>) -> !fir.ptr> ! CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (!fir.ptr>) -> i64 diff --git a/flang/test/Lower/Intrinsics/scale.f90 b/flang/test/Lower/Intrinsics/scale.f90 index 9c97349d1dd57..9034c48e639b5 100644 --- a/flang/test/Lower/Intrinsics/scale.f90 +++ b/flang/test/Lower/Intrinsics/scale.f90 @@ -14,7 +14,7 @@ subroutine scale_test1(x, i) ! CHECK: %[[tmp:.*]] = fir.call @_FortranAScale4(%[[x_val]], %[[i_cast]]) {{.*}}: (f32, i64) -> f32 ! CHECK: hlfir.assign %[[tmp]] to %[[res]]#0 : f32, !fir.ref end subroutine scale_test1 - + ! CHECK-LABEL: scale_test2 subroutine scale_test2(x, i) real(kind=8) :: x, res diff --git a/flang/test/Lower/Intrinsics/spread.f90 b/flang/test/Lower/Intrinsics/spread.f90 index 3c20ec29ebc11..d4d16a6637767 100644 --- a/flang/test/Lower/Intrinsics/spread.f90 +++ b/flang/test/Lower/Intrinsics/spread.f90 @@ -30,7 +30,7 @@ subroutine spread_test(s,d,n,r) ! CHECK-DAG: %[[a15:.*]] = fir.box_addr %[[a13]] : (!fir.box>>) -> !fir.heap> ! CHECK: fir.freemem %[[a15]] end subroutine - + ! CHECK-LABEL: func @_QMspread_modPspread_test2( ! CHECK-SAME: %[[arg0:.*]]: !fir.box>{{.*}}, %[[arg1:[^:]+]]: !fir.ref{{.*}}, %[[arg2:[^:]+]]: !fir.ref{{.*}}, %[[arg3:.*]]: !fir.box>{{.*}}) { subroutine spread_test2(s,d,n,r) diff --git a/flang/test/Lower/Intrinsics/sum.f90 b/flang/test/Lower/Intrinsics/sum.f90 index 3167617b60457..454d564684e89 100644 --- a/flang/test/Lower/Intrinsics/sum.f90 +++ b/flang/test/Lower/Intrinsics/sum.f90 @@ -111,7 +111,7 @@ integer function sum_test_optional_4(x, use_mask) integer :: x(:) logical :: use_mask logical, allocatable :: mask(:) -if (use_mask) then +if (use_mask) then allocate(mask(size(x, 1))) call set_mask(mask) ! CHECK: fir.call @_QPset_mask diff --git a/flang/test/Lower/Intrinsics/system.f90 b/flang/test/Lower/Intrinsics/system.f90 index 6ea98bca7de72..183725cf29133 100644 --- a/flang/test/Lower/Intrinsics/system.f90 +++ b/flang/test/Lower/Intrinsics/system.f90 @@ -1,8 +1,8 @@ ! RUN: bbc -emit-hlfir %s -o - | FileCheck %s ! CHECK-LABEL: func.func @_QPall_args( -! CHECK-SAME: %[[commandArg:.*]]: !fir.boxchar<1> {fir.bindc_name = "command"}, -! CHECK-SAME: %[[exitstatArg:.*]]: !fir.ref {fir.bindc_name = "exitstat"}) { +! CHECK-SAME: %[[commandArg:.*]]: !fir.boxchar<1> {fir.bindc_name = "command"}, +! CHECK-SAME: %[[exitstatArg:.*]]: !fir.ref {fir.bindc_name = "exitstat"}) { subroutine all_args(command, exitstat) CHARACTER(*) :: command INTEGER :: exitstat diff --git a/flang/test/Lower/Intrinsics/transfer.f90 b/flang/test/Lower/Intrinsics/transfer.f90 index 2cc7e93f86f51..a792c8e91ba01 100644 --- a/flang/test/Lower/Intrinsics/transfer.f90 +++ b/flang/test/Lower/Intrinsics/transfer.f90 @@ -27,7 +27,7 @@ subroutine trans_test(store, word) real :: word store = transfer(word, store) end subroutine - + ! CHECK-LABEL: func @_QPtrans_test2( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>{{.*}}, %[[VAL_1:.*]]: !fir.ref{{.*}}) { ! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.box>> @@ -69,13 +69,13 @@ subroutine trans_test(store, word) ! CHECK: fir.freemem %[[VAL_25]] ! CHECK: return ! CHECK: } - + subroutine trans_test2(store, word) integer :: store(3) real :: word store = transfer(word, store, 3) end subroutine - + integer function trans_test3(p) ! CHECK-LABEL: func @_QPtrans_test3( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref{{.*}}) -> i32 { diff --git a/flang/test/Lower/Intrinsics/unlink-sub.f90 b/flang/test/Lower/Intrinsics/unlink-sub.f90 index ac535005fd442..3b5c22adf58ea 100644 --- a/flang/test/Lower/Intrinsics/unlink-sub.f90 +++ b/flang/test/Lower/Intrinsics/unlink-sub.f90 @@ -41,7 +41,7 @@ subroutine all_arguments(path, status) !CHECK: %[[unlink_result:.*]] = fir.call @_FortranAUnlink(%[[path]], %[[path_len]], %[[src_path]], %[[line]]) !CHECK-SAME: : (!fir.ref, i64, !fir.ref, i32) !CHECK-SAME: -> i32 - + !CHECK-DAG: %[[status_i64:.*]] = fir.convert %[[status_decl]]#0 : (!fir.ref) -> i64 !CHECK-DAG: %[[c_null:.*]] = arith.constant 0 : i64 !CHECK-DAG: %[[cmp_result:.*]] = arith.cmpi ne, %[[status_i64]], %[[c_null]] : i64 diff --git a/flang/test/Lower/OpenMP/declare-mapper.f90 b/flang/test/Lower/OpenMP/declare-mapper.f90 index f58f4ba0fe80d..2653e8064279e 100644 --- a/flang/test/Lower/OpenMP/declare-mapper.f90 +++ b/flang/test/Lower/OpenMP/declare-mapper.f90 @@ -6,9 +6,7 @@ ! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 %t/omp-declare-mapper-3.f90 -o - | FileCheck %t/omp-declare-mapper-3.f90 ! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 %t/omp-declare-mapper-4.f90 -o - | FileCheck %t/omp-declare-mapper-4.f90 ! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 %t/omp-declare-mapper-5.f90 -o - | FileCheck %t/omp-declare-mapper-5.f90 -! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 %t/omp-declare-mapper-6.f90 -o - | FileCheck %t/omp-declare-mapper-6.f90 -! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 -module-dir %t %t/omp-declare-mapper-7.mod.f90 -o - >/dev/null -! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 -J %t %t/omp-declare-mapper-7.use.f90 -o - | FileCheck %t/omp-declare-mapper-7.use.f90 +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 %t/omp-declare-mapper-6.f90 -o - | FileCheck %t/omp-declare-mapper-6.f90 !--- omp-declare-mapper-1.f90 subroutine declare_mapper_1 @@ -303,25 +301,3 @@ subroutine declare_mapper_nested_parent r%real_arr = r%base_arr(1) + r%inner%deep_arr(1) !$omp end target end subroutine declare_mapper_nested_parent - -!--- omp-declare-mapper-7.mod.f90 -! Module with DECLARE MAPPER to be compiled separately -module m_mod - implicit none - type :: mty - integer :: x - end type mty - !$omp declare mapper(mymap : mty :: v) map(tofrom: v%x) -end module m_mod - -!--- omp-declare-mapper-7.use.f90 -! Consumer program that USEs the module and applies the mapper by name. -! CHECK: %{{.*}} = omp.map.info {{.*}} mapper(@{{.*mymap}}) {{.*}} {name = "a"} -program use_module_mapper - use m_mod - implicit none - type(mty) :: a - !$omp target map(mapper(mymap) : a) - a%x = 42 - !$omp end target -end program use_module_mapper diff --git a/flang/test/Parser/OpenMP/map-modifiers.f90 b/flang/test/Parser/OpenMP/map-modifiers.f90 index 7d9b8856ac833..83662b70f08f5 100644 --- a/flang/test/Parser/OpenMP/map-modifiers.f90 +++ b/flang/test/Parser/OpenMP/map-modifiers.f90 @@ -320,7 +320,7 @@ subroutine f21(x, y) integer :: x(10) integer :: y integer, parameter :: p = 23 - !$omp target map(mapper(default), from: x) + !$omp target map(mapper(xx), from: x) x = x + 1 !$omp end target end @@ -329,7 +329,7 @@ subroutine f21(x, y) !UNPARSE: INTEGER x(10_4) !UNPARSE: INTEGER y !UNPARSE: INTEGER, PARAMETER :: p = 23_4 -!UNPARSE: !$OMP TARGET MAP(MAPPER(DEFAULT), FROM: X) +!UNPARSE: !$OMP TARGET MAP(MAPPER(XX), FROM: X) !UNPARSE: x=x+1_4 !UNPARSE: !$OMP END TARGET !UNPARSE: END SUBROUTINE @@ -337,7 +337,7 @@ subroutine f21(x, y) !PARSE-TREE: OmpBeginDirective !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause -!PARSE-TREE: | | Modifier -> OmpMapper -> Name = 'default' +!PARSE-TREE: | | Modifier -> OmpMapper -> Name = 'xx' !PARSE-TREE: | | Modifier -> OmpMapType -> Value = From !PARSE-TREE: | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x' @@ -375,3 +375,4 @@ subroutine f22(x) !PARSE-TREE: | | SectionSubscript -> Integer -> Expr = 'i' !PARSE-TREE: | | | Designator -> DataRef -> Name = 'i' !PARSE-TREE: | bool = 'true' + diff --git a/flang/test/Semantics/OpenMP/declare-mapper-modfile.f90 b/flang/test/Semantics/OpenMP/declare-mapper-modfile.f90 deleted file mode 100644 index 480f87bc0f8e9..0000000000000 --- a/flang/test/Semantics/OpenMP/declare-mapper-modfile.f90 +++ /dev/null @@ -1,14 +0,0 @@ -! RUN: split-file %s %t -! RUN: %flang_fc1 -fsyntax-only -fopenmp -fopenmp-version=50 -module-dir %t %t/m.f90 -! RUN: cat %t/m.mod | FileCheck --ignore-case %s - -!--- m.f90 -module m - implicit none - type :: t - integer :: x - end type t - !$omp declare mapper(mymap : t :: v) map(v%x) -end module m - -!CHECK: !$OMP DECLARE MAPPER(mymap:t::v) MAP(v%x) diff --git a/flang/test/Semantics/OpenMP/declare-mapper-symbols.f90 b/flang/test/Semantics/OpenMP/declare-mapper-symbols.f90 index 5d77540aa6453..e57a5c0c1cea6 100644 --- a/flang/test/Semantics/OpenMP/declare-mapper-symbols.f90 +++ b/flang/test/Semantics/OpenMP/declare-mapper-symbols.f90 @@ -11,9 +11,9 @@ program main !$omp declare mapper(ty :: maptwo) map(maptwo, maptwo%x) !! Note, symbols come out in their respective scope, but not in declaration order. -!CHECK: mymapper: MapperDetails +!CHECK: mymapper: Misc ConstructName !CHECK: ty: DerivedType components: x -!CHECK: ty.omp.default.mapper: MapperDetails +!CHECK: ty.omp.default.mapper: Misc ConstructName !CHECK: DerivedType scope: ty !CHECK: OtherConstruct scope: !CHECK: mapped (OmpMapToFrom) {{.*}} ObjectEntity type: TYPE(ty) @@ -21,3 +21,4 @@ program main !CHECK: maptwo (OmpMapToFrom) {{.*}} ObjectEntity type: TYPE(ty) end program main + diff --git a/flang/test/Semantics/OpenMP/map-clause-symbols.f90 b/flang/test/Semantics/OpenMP/map-clause-symbols.f90 index 3b723e817ce87..1d6315b4a2312 100644 --- a/flang/test/Semantics/OpenMP/map-clause-symbols.f90 +++ b/flang/test/Semantics/OpenMP/map-clause-symbols.f90 @@ -1,16 +1,14 @@ ! RUN: %flang_fc1 -fdebug-dump-symbols -fopenmp -fopenmp-version=50 %s | FileCheck %s program main !CHECK-LABEL: MainProgram scope: MAIN - type ty - real(4) :: x - end type ty - !$omp declare mapper(xx : ty :: v) map(v) integer, parameter :: n = 256 - type(ty) :: a(256) + real(8) :: a(256) !$omp target map(mapper(xx), from:a) do i=1,n - a(i)%x = 4.2 + a(i) = 4.2 end do !$omp end target -!CHECK: xx: MapperDetails +!CHECK: OtherConstruct scope: size=0 alignment=1 sourceRange=74 bytes +!CHECK: OtherClause scope: size=0 alignment=1 sourceRange=0 bytes +!CHECK: xx: Misc ConstructName end program main diff --git a/libc/shared/rpc_opcodes.h b/libc/shared/rpc_opcodes.h index 583d622e1fa0d..a9c4f5521021e 100644 --- a/libc/shared/rpc_opcodes.h +++ b/libc/shared/rpc_opcodes.h @@ -47,9 +47,9 @@ typedef enum { LIBC_SYSTEM = LLVM_LIBC_OPCODE(29), // Internal opcodes for testing. - LIBC_TEST_INCREMENT = LLVM_LIBC_OPCODE(1 << 15), - LIBC_TEST_INTERFACE = LLVM_LIBC_OPCODE((1 << 15) + 1), - LIBC_TEST_STREAM = LLVM_LIBC_OPCODE((1 << 15) + 2), + LIBC_TEST_INCREMENT = LLVM_LIBC_OPCODE((1 << 15)), + LIBC_TEST_INTERFACE = LLVM_LIBC_OPCODE(((1 << 15) + 1)), + LIBC_TEST_STREAM = LLVM_LIBC_OPCODE(((1 << 15) + 2)), LIBC_LAST = 0xFFFFFFFF, } rpc_opcode_t; diff --git a/libc/src/__support/RPC/rpc_server.h b/libc/src/__support/RPC/rpc_server.h index 4c8242acafd28..abd604ae48146 100644 --- a/libc/src/__support/RPC/rpc_server.h +++ b/libc/src/__support/RPC/rpc_server.h @@ -298,7 +298,7 @@ LIBC_INLINE static void handle_printf(rpc::Server::Port &port, results[lane] = static_cast( fwrite(buffer, 1, writer.get_chars_written(), files[lane])); - if (results[lane] != writer.get_chars_written() || ret == -1) + if (size_t(results[lane]) != writer.get_chars_written() || ret == -1) results[lane] = -1; } diff --git a/libcxx/include/fstream b/libcxx/include/fstream index b07ca636094af..90e35740c17cf 100644 --- a/libcxx/include/fstream +++ b/libcxx/include/fstream @@ -315,8 +315,14 @@ protected: traits_type::copy(__str, this->gptr(), __n); this->__gbump_ptrdiff(__n); } - if (__len - __n >= this->egptr() - this->eback()) - return std::fread(__str + __n, sizeof(char_type), __len - __n, __file_); + const streamsize __remainder = __len - __n; + const streamsize __buffer_space = this->egptr() - this->eback(); + + if (__remainder >= __buffer_space) + return std::fread(__str + __n, sizeof(char_type), __remainder, __file_) + __n; + else if (__remainder > 0) + return basic_streambuf<_CharT, _Traits>::xsgetn(__str + __n, __remainder) + __n; + return __n; } return basic_streambuf<_CharT, _Traits>::xsgetn(__str, __len); } diff --git a/lldb/unittests/Expression/CMakeLists.txt b/lldb/unittests/Expression/CMakeLists.txt index 2600557b6b376..0e0b002500eb4 100644 --- a/lldb/unittests/Expression/CMakeLists.txt +++ b/lldb/unittests/Expression/CMakeLists.txt @@ -10,6 +10,7 @@ add_lldb_unittest(ExpressionTests DWARFExpressionTest.cpp CppModuleConfigurationTest.cpp ExpressionTest.cpp + ValueMatcher.cpp LINK_COMPONENTS Support diff --git a/lldb/unittests/Expression/DWARFExpressionTest.cpp b/lldb/unittests/Expression/DWARFExpressionTest.cpp index 9d11060becfae..8c5568d9e4e65 100644 --- a/lldb/unittests/Expression/DWARFExpressionTest.cpp +++ b/lldb/unittests/Expression/DWARFExpressionTest.cpp @@ -5,8 +5,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - #include "lldb/Expression/DWARFExpression.h" +#include "ValueMatcher.h" #ifdef ARCH_AARCH64 #include "Plugins/ABI/AArch64/ABISysV_arm64.h" #endif @@ -135,40 +135,18 @@ class MockRegisterContext : public RegisterContext { }; } // namespace -static llvm::Expected Evaluate(llvm::ArrayRef expr, - lldb::ModuleSP module_sp = {}, - DWARFUnit *unit = nullptr, - ExecutionContext *exe_ctx = nullptr, - RegisterContext *reg_ctx = nullptr) { +static llvm::Expected Evaluate(llvm::ArrayRef expr, + lldb::ModuleSP module_sp = {}, + DWARFUnit *unit = nullptr, + ExecutionContext *exe_ctx = nullptr, + RegisterContext *reg_ctx = nullptr) { DataExtractor extractor(expr.data(), expr.size(), lldb::eByteOrderLittle, /*addr_size*/ 4); - llvm::Expected result = DWARFExpression::Evaluate( - exe_ctx, reg_ctx, module_sp, extractor, unit, lldb::eRegisterKindLLDB, - /*initial_value_ptr=*/nullptr, - /*object_address_ptr=*/nullptr); - if (!result) - return result.takeError(); - - switch (result->GetValueType()) { - case Value::ValueType::Scalar: - return result->GetScalar(); - case Value::ValueType::LoadAddress: - return LLDB_INVALID_ADDRESS; - case Value::ValueType::HostAddress: { - // Convert small buffers to scalars to simplify the tests. - DataBufferHeap &buf = result->GetBuffer(); - if (buf.GetByteSize() <= 8) { - uint64_t val = 0; - memcpy(&val, buf.GetBytes(), buf.GetByteSize()); - return Scalar(llvm::APInt(buf.GetByteSize() * 8, val, false)); - } - } - [[fallthrough]]; - default: - break; - } - return llvm::createStringError("unsupported value type"); + return DWARFExpression::Evaluate(exe_ctx, reg_ctx, module_sp, extractor, unit, + lldb::eRegisterKindLLDB, + /*initial_value_ptr=*/nullptr, + /*object_address_ptr=*/nullptr); } class DWARFExpressionTester : public YAMLModuleTester { @@ -177,18 +155,11 @@ class DWARFExpressionTester : public YAMLModuleTester { : YAMLModuleTester(yaml_data, cu_index) {} using YAMLModuleTester::YAMLModuleTester; - llvm::Expected Eval(llvm::ArrayRef expr) { + llvm::Expected Eval(llvm::ArrayRef expr) { return ::Evaluate(expr, m_module_sp, m_dwarf_unit); } }; -/// Unfortunately Scalar's operator==() is really picky. -static Scalar GetScalar(unsigned bits, uint64_t value, bool sign) { - Scalar scalar(value); - scalar.TruncOrExtendTo(bits, sign); - return scalar; -} - /// This is needed for the tests that use a mock process. class DWARFExpressionMockProcessTest : public ::testing::Test { public: @@ -255,48 +226,48 @@ class MockTarget : public Target { TEST(DWARFExpression, DW_OP_pick) { EXPECT_THAT_EXPECTED(Evaluate({DW_OP_lit1, DW_OP_lit0, DW_OP_pick, 0}), - llvm::HasValue(0)); + ExpectScalar(0)); EXPECT_THAT_EXPECTED(Evaluate({DW_OP_lit1, DW_OP_lit0, DW_OP_pick, 1}), - llvm::HasValue(1)); + ExpectScalar(1)); EXPECT_THAT_EXPECTED(Evaluate({DW_OP_lit1, DW_OP_lit0, DW_OP_pick, 2}), llvm::Failed()); } TEST(DWARFExpression, DW_OP_const) { // Extend to address size. - EXPECT_THAT_EXPECTED(Evaluate({DW_OP_const1u, 0x88}), llvm::HasValue(0x88)); + EXPECT_THAT_EXPECTED(Evaluate({DW_OP_const1u, 0x88}), ExpectScalar(0x88)); EXPECT_THAT_EXPECTED(Evaluate({DW_OP_const1s, 0x88}), - llvm::HasValue(0xffffff88)); + ExpectScalar(0xffffff88)); EXPECT_THAT_EXPECTED(Evaluate({DW_OP_const2u, 0x47, 0x88}), - llvm::HasValue(0x8847)); + ExpectScalar(0x8847)); EXPECT_THAT_EXPECTED(Evaluate({DW_OP_const2s, 0x47, 0x88}), - llvm::HasValue(0xffff8847)); + ExpectScalar(0xffff8847)); EXPECT_THAT_EXPECTED(Evaluate({DW_OP_const4u, 0x44, 0x42, 0x47, 0x88}), - llvm::HasValue(0x88474244)); + ExpectScalar(0x88474244)); EXPECT_THAT_EXPECTED(Evaluate({DW_OP_const4s, 0x44, 0x42, 0x47, 0x88}), - llvm::HasValue(0x88474244)); + ExpectScalar(0x88474244)); // Truncate to address size. EXPECT_THAT_EXPECTED( Evaluate({DW_OP_const8u, 0x00, 0x11, 0x22, 0x33, 0x44, 0x42, 0x47, 0x88}), - llvm::HasValue(0x33221100)); + ExpectScalar(0x33221100)); EXPECT_THAT_EXPECTED( Evaluate({DW_OP_const8s, 0x00, 0x11, 0x22, 0x33, 0x44, 0x42, 0x47, 0x88}), - llvm::HasValue(0x33221100)); + ExpectScalar(0x33221100)); // Don't truncate to address size for compatibility with clang (pr48087). EXPECT_THAT_EXPECTED( Evaluate({DW_OP_constu, 0x81, 0x82, 0x84, 0x88, 0x90, 0xa0, 0x40}), - llvm::HasValue(0x01010101010101)); + ExpectScalar(0x01010101010101)); EXPECT_THAT_EXPECTED( Evaluate({DW_OP_consts, 0x81, 0x82, 0x84, 0x88, 0x90, 0xa0, 0x40}), - llvm::HasValue(0xffff010101010101)); + ExpectScalar(0xffff010101010101)); } TEST(DWARFExpression, DW_OP_skip) { EXPECT_THAT_EXPECTED(Evaluate({DW_OP_const1u, 0x42, DW_OP_skip, 0x02, 0x00, DW_OP_const1u, 0xff}), - llvm::HasValue(0x42)); + ExpectScalar(0x42)); } TEST(DWARFExpression, DW_OP_bra) { @@ -309,7 +280,7 @@ TEST(DWARFExpression, DW_OP_bra) { DW_OP_const1u, 0xff, // push 0xff }), // clang-format on - llvm::HasValue(0x42)); + ExpectScalar(0x42)); EXPECT_THAT_ERROR(Evaluate({DW_OP_bra, 0x01, 0x00}).takeError(), llvm::Failed()); @@ -414,42 +385,42 @@ TEST(DWARFExpression, DW_OP_convert) { EXPECT_THAT_EXPECTED( t.Eval({DW_OP_const4u, 0x11, 0x22, 0x33, 0x44, // DW_OP_convert, offs_uint32_t, DW_OP_stack_value}), - llvm::HasValue(GetScalar(64, 0x44332211, not_signed))); + ExpectScalar(64, 0x44332211, not_signed)); // Zero-extend to 64 bits. EXPECT_THAT_EXPECTED( t.Eval({DW_OP_const4u, 0x11, 0x22, 0x33, 0x44, // DW_OP_convert, offs_uint64_t, DW_OP_stack_value}), - llvm::HasValue(GetScalar(64, 0x44332211, not_signed))); + ExpectScalar(64, 0x44332211, not_signed)); // Sign-extend to 64 bits. EXPECT_THAT_EXPECTED( t.Eval({DW_OP_const4s, 0xcc, 0xdd, 0xee, 0xff, // DW_OP_convert, offs_sint64_t, DW_OP_stack_value}), - llvm::HasValue(GetScalar(64, 0xffffffffffeeddcc, is_signed))); + ExpectScalar(64, 0xffffffffffeeddcc, is_signed)); // Sign-extend, then truncate. EXPECT_THAT_EXPECTED( t.Eval({DW_OP_const4s, 0xcc, 0xdd, 0xee, 0xff, // DW_OP_convert, offs_sint64_t, // DW_OP_convert, offs_uint32_t, DW_OP_stack_value}), - llvm::HasValue(GetScalar(32, 0xffeeddcc, not_signed))); + ExpectScalar(32, 0xffeeddcc, not_signed)); // Truncate to default unspecified (pointer-sized) type. EXPECT_THAT_EXPECTED(t.Eval({DW_OP_const4s, 0xcc, 0xdd, 0xee, 0xff, // DW_OP_convert, offs_sint64_t, // DW_OP_convert, 0x00, DW_OP_stack_value}), - llvm::HasValue(GetScalar(32, 0xffeeddcc, not_signed))); + ExpectScalar(32, 0xffeeddcc, not_signed)); // Truncate to 8 bits. EXPECT_THAT_EXPECTED(t.Eval({DW_OP_const4s, 'A', 'B', 'C', 'D', DW_OP_convert, offs_uchar, DW_OP_stack_value}), - llvm::HasValue(GetScalar(8, 'A', not_signed))); + ExpectScalar(8, 'A', not_signed)); // Also truncate to 8 bits. EXPECT_THAT_EXPECTED(t.Eval({DW_OP_const4s, 'A', 'B', 'C', 'D', DW_OP_convert, offs_schar, DW_OP_stack_value}), - llvm::HasValue(GetScalar(8, 'A', is_signed))); + ExpectScalar(8, 'A', is_signed)); // // Errors. @@ -479,33 +450,21 @@ TEST(DWARFExpression, DW_OP_stack_value) { TEST(DWARFExpression, DW_OP_piece) { EXPECT_THAT_EXPECTED(Evaluate({DW_OP_const2u, 0x11, 0x22, DW_OP_piece, 2, DW_OP_const2u, 0x33, 0x44, DW_OP_piece, 2}), - llvm::HasValue(GetScalar(32, 0x44332211, true))); + ExpectHostAddress({0x11, 0x22, 0x33, 0x44})); EXPECT_THAT_EXPECTED( Evaluate({DW_OP_piece, 1, DW_OP_const1u, 0xff, DW_OP_piece, 1}), // Note that the "00" should really be "undef", but we can't // represent that yet. - llvm::HasValue(GetScalar(16, 0xff00, true))); -} - -TEST(DWARFExpression, DW_OP_piece_host_address) { - static const uint8_t expr_data[] = {DW_OP_lit2, DW_OP_stack_value, - DW_OP_piece, 40}; - llvm::ArrayRef expr(expr_data, sizeof(expr_data)); - DataExtractor extractor(expr.data(), expr.size(), lldb::eByteOrderLittle, 4); + ExpectHostAddress({0x00, 0xff})); // This tests if ap_int is extended to the right width. // expect 40*8 = 320 bits size. - llvm::Expected result = - DWARFExpression::Evaluate(nullptr, nullptr, nullptr, extractor, nullptr, - lldb::eRegisterKindDWARF, nullptr, nullptr); - ASSERT_THAT_EXPECTED(result, llvm::Succeeded()); - ASSERT_EQ(result->GetValueType(), Value::ValueType::HostAddress); - ASSERT_EQ(result->GetBuffer().GetByteSize(), 40ul); - const uint8_t *data = result->GetBuffer().GetBytes(); - ASSERT_EQ(data[0], 2); - for (int i = 1; i < 40; i++) { - ASSERT_EQ(data[i], 0); - } + std::vector expected_host_buffer(40, 0); + expected_host_buffer[0] = 2; + + EXPECT_THAT_EXPECTED( + Evaluate({{DW_OP_lit2, DW_OP_stack_value, DW_OP_piece, 40}}), + ExpectHostAddress(expected_host_buffer)); } TEST(DWARFExpression, DW_OP_implicit_value) { @@ -513,7 +472,7 @@ TEST(DWARFExpression, DW_OP_implicit_value) { EXPECT_THAT_EXPECTED( Evaluate({DW_OP_implicit_value, bytes, 0x11, 0x22, 0x33, 0x44}), - llvm::HasValue(GetScalar(8 * bytes, 0x44332211, true))); + ExpectHostAddress({0x11, 0x22, 0x33, 0x44})); } TEST(DWARFExpression, DW_OP_unknown) { @@ -548,20 +507,13 @@ TEST_F(DWARFExpressionMockProcessTest, DW_OP_deref) { // Implicit location: *0x4. EXPECT_THAT_EXPECTED( Evaluate({DW_OP_lit4, DW_OP_deref, DW_OP_stack_value}, {}, {}, &exe_ctx), - llvm::HasValue(GetScalar(32, 0x07060504, false))); + ExpectScalar(32, 0x07060504, false)); // Memory location: *(*0x4). - // Evaluate returns LLDB_INVALID_ADDRESS for all load addresses. EXPECT_THAT_EXPECTED(Evaluate({DW_OP_lit4, DW_OP_deref}, {}, {}, &exe_ctx), - llvm::HasValue(Scalar(LLDB_INVALID_ADDRESS))); + ExpectLoadAddress(0x07060504)); // Memory location: *0x4. - // Evaluate returns LLDB_INVALID_ADDRESS for all load addresses. EXPECT_THAT_EXPECTED(Evaluate({DW_OP_lit4}, {}, {}, &exe_ctx), - llvm::HasValue(Scalar(4))); - // Implicit location: *0x4. - // Evaluate returns LLDB_INVALID_ADDRESS for all load addresses. - EXPECT_THAT_EXPECTED( - Evaluate({DW_OP_lit4, DW_OP_deref, DW_OP_stack_value}, {}, {}, &exe_ctx), - llvm::HasValue(GetScalar(32, 0x07060504, false))); + ExpectScalar(Scalar(4))); } TEST_F(DWARFExpressionMockProcessTest, WASM_DW_OP_addr) { @@ -581,18 +533,9 @@ TEST_F(DWARFExpressionMockProcessTest, WASM_DW_OP_addr) { ExecutionContext exe_ctx(target_sp, false); // DW_OP_addr takes a single operand of address size width: - uint8_t expr[] = {DW_OP_addr, 0x40, 0x0, 0x0, 0x0}; - DataExtractor extractor(expr, sizeof(expr), lldb::eByteOrderLittle, - /*addr_size*/ 4); - - llvm::Expected result = DWARFExpression::Evaluate( - &exe_ctx, /*reg_ctx*/ nullptr, /*module_sp*/ {}, extractor, - /*unit*/ nullptr, lldb::eRegisterKindLLDB, - /*initial_value_ptr*/ nullptr, - /*object_address_ptr*/ nullptr); - - ASSERT_THAT_EXPECTED(result, llvm::Succeeded()); - ASSERT_EQ(result->GetValueType(), Value::ValueType::LoadAddress); + EXPECT_THAT_EXPECTED( + Evaluate({DW_OP_addr, 0x40, 0x0, 0x0, 0x0}, {}, {}, &exe_ctx), + ExpectLoadAddress(0x40)); } TEST_F(DWARFExpressionMockProcessTest, WASM_DW_OP_addr_index) { @@ -676,15 +619,11 @@ TEST_F(DWARFExpressionMockProcessTest, WASM_DW_OP_addr_index) { DWARFExpression expr(extractor); llvm::Expected result = evaluate(expr); - ASSERT_THAT_EXPECTED(result, llvm::Succeeded()); - ASSERT_EQ(result->GetValueType(), Value::ValueType::LoadAddress); - ASSERT_EQ(result->GetScalar().UInt(), 0x5678u); + EXPECT_THAT_EXPECTED(result, ExpectLoadAddress(0x5678u)); ASSERT_TRUE(expr.Update_DW_OP_addr(dwarf_cu, 0xdeadbeef)); result = evaluate(expr); - ASSERT_THAT_EXPECTED(result, llvm::Succeeded()); - ASSERT_EQ(result->GetValueType(), Value::ValueType::LoadAddress); - ASSERT_EQ(result->GetScalar().UInt(), 0xdeadbeefu); + EXPECT_THAT_EXPECTED(result, ExpectLoadAddress(0xdeadbeefu)); } class CustomSymbolFileDWARF : public SymbolFileDWARF { @@ -778,11 +717,12 @@ static auto testExpressionVendorExtensions(lldb::ModuleSP module_sp, RegisterContext *reg_ctx) { // Test that expression extensions can be evaluated, for example // DW_OP_WASM_location which is not currently handled by DWARFExpression: - EXPECT_THAT_EXPECTED(Evaluate({DW_OP_WASM_location, 0x03, // WASM_GLOBAL:0x03 - 0x04, 0x00, 0x00, // index:u32 - 0x00, DW_OP_stack_value}, - module_sp, &dwarf_unit, nullptr, reg_ctx), - llvm::HasValue(GetScalar(32, 42, false))); + EXPECT_THAT_EXPECTED( + Evaluate({DW_OP_WASM_location, 0x03, // WASM_GLOBAL:0x03 + 0x04, 0x00, 0x00, // index:u32 + 0x00, DW_OP_stack_value}, + module_sp, &dwarf_unit, nullptr, reg_ctx), + ExpectScalar(32, 42, false, Value::ContextType::RegisterInfo)); // Test that searches for opcodes work in the presence of extensions: uint8_t expr[] = {DW_OP_WASM_location, 0x03, 0x04, 0x00, 0x00, 0x00, @@ -1148,17 +1088,8 @@ TEST_F(DWARFExpressionMockProcessTest, DW_OP_piece_file_addr) { uint8_t expr[] = {DW_OP_addr, 0x40, 0x0, 0x0, 0x0, DW_OP_piece, 1, DW_OP_addr, 0x50, 0x0, 0x0, 0x0, DW_OP_piece, 1}; - DataExtractor extractor(expr, sizeof(expr), lldb::eByteOrderLittle, - /*addr_size=*/4); - llvm::Expected result = DWARFExpression::Evaluate( - &exe_ctx, /*reg_ctx=*/nullptr, /*module_sp=*/{}, extractor, - /*unit=*/nullptr, lldb::eRegisterKindLLDB, - /*initial_value_ptr=*/nullptr, - /*object_address_ptr=*/nullptr); - - ASSERT_THAT_EXPECTED(result, llvm::Succeeded()); - ASSERT_EQ(result->GetValueType(), Value::ValueType::HostAddress); - ASSERT_THAT(result->GetBuffer().GetData(), ElementsAre(0x11, 0x22)); + EXPECT_THAT_EXPECTED(Evaluate(expr, {}, {}, &exe_ctx), + ExpectHostAddress({0x11, 0x22})); } /// A Process whose `ReadMemory` override queries a DenseMap. @@ -1228,28 +1159,15 @@ TEST_F(DWARFExpressionMockProcessTestWithAArch, DW_op_deref_no_ptr_fixing) { process_sp->GetThreadList().AddThread(thread); auto evaluate_expr = [&](auto &expr_data) { - DataExtractor extractor(expr_data, sizeof(expr_data), - lldb::eByteOrderLittle, - /*addr_size*/ 8); - DWARFExpression expr(extractor); - ExecutionContext exe_ctx(process_sp); - llvm::Expected result = DWARFExpression::Evaluate( - &exe_ctx, reg_ctx_sp.get(), /*module_sp*/ nullptr, extractor, - /*unit*/ nullptr, lldb::eRegisterKindLLDB, - /*initial_value_ptr=*/nullptr, - /*object_address_ptr=*/nullptr); - return result; + return Evaluate(expr_data, {}, {}, &exe_ctx, reg_ctx_sp.get()); }; uint8_t expr_reg[] = {DW_OP_breg22, 0}; llvm::Expected result_reg = evaluate_expr(expr_reg); - ASSERT_THAT_EXPECTED(result_reg, llvm::Succeeded()); - ASSERT_EQ(result_reg->GetValueType(), Value::ValueType::LoadAddress); - ASSERT_EQ(result_reg->GetScalar().ULongLong(), addr); + EXPECT_THAT_EXPECTED(result_reg, ExpectLoadAddress(addr)); uint8_t expr_deref[] = {DW_OP_breg22, 0, DW_OP_deref}; llvm::Expected result_deref = evaluate_expr(expr_deref); - ASSERT_THAT_EXPECTED(result_deref, llvm::Succeeded()); - ASSERT_EQ(result_deref->GetScalar().ULongLong(), expected_value); + EXPECT_THAT_EXPECTED(result_deref, ExpectLoadAddress(expected_value)); } diff --git a/lldb/unittests/Expression/ValueMatcher.cpp b/lldb/unittests/Expression/ValueMatcher.cpp new file mode 100644 index 0000000000000..ee7ccaebabd64 --- /dev/null +++ b/lldb/unittests/Expression/ValueMatcher.cpp @@ -0,0 +1,205 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ValueMatcher.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/InterleavedRange.h" +#include "llvm/Support/raw_os_ostream.h" +#include "llvm/Support/raw_ostream.h" + +using namespace lldb_private; + +static void FormatValueDetails(llvm::raw_ostream &os, + Value::ValueType value_type, + Value::ContextType context_type, + const Scalar &scalar, + llvm::ArrayRef buffer_data) { + os << "Value("; + os << "value_type=" << Value::GetValueTypeAsCString(value_type); + os << ", context_type=" << Value::GetContextTypeAsCString(context_type); + + if (value_type == Value::ValueType::HostAddress) { + auto bytes_to_print = buffer_data.take_front(16); + os << ", buffer=["; + llvm::interleave( + bytes_to_print, + [&](uint8_t byte) { + os << llvm::format("%02x", static_cast(byte)); + }, + [&]() { os << " "; }); + if (buffer_data.size() > 16) + os << " ..."; + os << "] (" << buffer_data.size() << " bytes)"; + } else { + os << ", value=" << scalar; + } + os << ")"; +} + +void lldb_private::PrintTo(const Value &val, std::ostream *os) { + if (!os) + return; + + llvm::raw_os_ostream raw_os(*os); + FormatValueDetails(raw_os, val.GetValueType(), val.GetContextType(), + val.GetScalar(), val.GetBuffer().GetData()); +} + +bool ValueMatcher::MatchAndExplain(const Value &val, + std::ostream *stream) const { + if (stream) { + llvm::raw_os_ostream os(*stream); + return MatchAndExplainImpl(val, os); + } + + llvm::raw_null_ostream os; + return MatchAndExplainImpl(val, os); +} + +// Match the provided value and explain any mismatches using +// the raw_ostream. We use the llvm::raw_ostream here to simplify the formatting +// of Scalar values which already know how to print themselves to that stream. +bool ValueMatcher::MatchAndExplainImpl(const Value &val, + llvm::raw_ostream &os) const { + if (val.GetValueType() != m_value_type) { + os << "value_type mismatch: expected " + << Value::GetValueTypeAsCString(m_value_type) << ", got " + << Value::GetValueTypeAsCString(val.GetValueType()) << " "; + return false; + } + + if (val.GetContextType() != m_context_type) { + os << "context_type mismatch: expected " + << Value::GetContextTypeAsCString(m_context_type) << ", got " + << Value::GetContextTypeAsCString(val.GetContextType()) << " "; + return false; + } + + if (m_value_type == Value::ValueType::HostAddress) { + const DataBufferHeap &buffer = val.GetBuffer(); + const size_t buffer_size = buffer.GetByteSize(); + if (buffer_size != m_expected_bytes.size()) { + os << "buffer size mismatch: expected " << m_expected_bytes.size() + << ", got " << buffer_size << " "; + return false; + } + + const uint8_t *data = buffer.GetBytes(); + for (size_t i = 0; i < buffer_size; ++i) { + if (data[i] != m_expected_bytes[i]) { + os << "byte mismatch at index " << i << ": expected " + << llvm::format("0x%02x", static_cast(m_expected_bytes[i])) + << ", got " << llvm::format("0x%02x", static_cast(data[i])) + << " "; + return false; + } + } + } else { + // For Scalar, FileAddress, and LoadAddress compare m_value. + const Scalar &actual_scalar = val.GetScalar(); + if (actual_scalar != m_expected_scalar) { + os << "scalar value mismatch: expected " << m_expected_scalar << ", got " + << actual_scalar; + return false; + } + } + + return true; +} + +void ValueMatcher::DescribeTo(std::ostream *os) const { + if (!os) + return; + llvm::raw_os_ostream raw_os(*os); + FormatValueDetails(raw_os, m_value_type, m_context_type, m_expected_scalar, + m_expected_bytes); +} + +void ValueMatcher::DescribeNegationTo(std::ostream *os) const { + if (!os) + return; + *os << "value does not match"; +} + +testing::Matcher +lldb_private::MatchScalarValue(Value::ValueType value_type, + const Scalar &expected_scalar, + Value::ContextType context_type) { + return ValueMatcher(value_type, expected_scalar, context_type); +} + +testing::Matcher +lldb_private::MatchHostValue(Value::ValueType value_type, + const std::vector &expected_bytes, + Value::ContextType context_type) { + return ValueMatcher(value_type, expected_bytes, context_type); +} + +testing::Matcher +lldb_private::IsScalar(const Scalar &expected_scalar, + Value::ContextType context_type) { + return MatchScalarValue(Value::ValueType::Scalar, expected_scalar, + context_type); +} + +testing::Matcher +lldb_private::IsLoadAddress(const Scalar &expected_address, + Value::ContextType context_type) { + return MatchScalarValue(Value::ValueType::LoadAddress, expected_address, + context_type); +} + +testing::Matcher +lldb_private::IsFileAddress(const Scalar &expected_address, + Value::ContextType context_type) { + return MatchScalarValue(Value::ValueType::FileAddress, expected_address, + context_type); +} + +testing::Matcher +lldb_private::IsHostValue(const std::vector &expected_bytes, + Value::ContextType context_type) { + return MatchHostValue(Value::ValueType::HostAddress, expected_bytes, + context_type); +} + +Scalar lldb_private::GetScalar(unsigned bits, uint64_t value, bool sign) { + Scalar scalar(value); + scalar.TruncOrExtendTo(bits, sign); + return scalar; +} + +llvm::detail::ValueMatchesPoly> +lldb_private::ExpectScalar(const Scalar &expected_scalar, + Value::ContextType context_type) { + return llvm::HasValue(IsScalar(expected_scalar, context_type)); +} + +llvm::detail::ValueMatchesPoly> +lldb_private::ExpectScalar(unsigned bits, uint64_t value, bool sign, + Value::ContextType context_type) { + return ExpectScalar(GetScalar(bits, value, sign), context_type); +} + +llvm::detail::ValueMatchesPoly> +lldb_private::ExpectLoadAddress(const Scalar &expected_address, + Value::ContextType context_type) { + return llvm::HasValue(IsLoadAddress(expected_address, context_type)); +} + +llvm::detail::ValueMatchesPoly> +lldb_private::ExpectFileAddress(const Scalar &expected_address, + Value::ContextType context_type) { + return llvm::HasValue(IsFileAddress(expected_address, context_type)); +} + +llvm::detail::ValueMatchesPoly> +lldb_private::ExpectHostAddress(const std::vector &expected_bytes, + Value::ContextType context_type) { + return llvm::HasValue(IsHostValue(expected_bytes, context_type)); +} diff --git a/lldb/unittests/Expression/ValueMatcher.h b/lldb/unittests/Expression/ValueMatcher.h new file mode 100644 index 0000000000000..3ca7b15e1d3c8 --- /dev/null +++ b/lldb/unittests/Expression/ValueMatcher.h @@ -0,0 +1,155 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file contains the definition of the ValueMatcher class which is a used +/// to match lldb_private::Value in gtest assert/expect macros. It also contains +/// several helper functions to create matchers for common Value types. +/// +/// The ValueMatcher class was created using the gtest guide found here: +// https://google.github.io/googletest/gmock_cook_book.html#writing-new-monomorphic-matchers +//===----------------------------------------------------------------------===// + +#ifndef LLDB_UNITTESTS_EXPRESSION_VALUEMATCHER_H +#define LLDB_UNITTESTS_EXPRESSION_VALUEMATCHER_H + +#include "lldb/Core/Value.h" +#include "lldb/Utility/Scalar.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Testing/Support/Error.h" +#include "gtest/gtest.h" +#include +#include + +namespace lldb_private { + +/// Custom printer for Value objects to make test failures more readable. +void PrintTo(const Value &val, std::ostream *os); + +/// Custom matcher for Value. +/// +/// It matches against an expected value_type, and context_type. +/// For HostAddress value types it will match the expected contents of +/// the host buffer. For other value types it matches against an expected +/// scalar value. +class ValueMatcher { +public: + ValueMatcher(Value::ValueType value_type, const Scalar &expected_scalar, + Value::ContextType context_type) + : m_value_type(value_type), m_context_type(context_type), + m_expected_scalar(expected_scalar) { + assert(value_type == Value::ValueType::Scalar || + value_type == Value::ValueType::FileAddress || + value_type == Value::ValueType::LoadAddress); + } + + ValueMatcher(Value::ValueType value_type, + const std::vector &expected_bytes, + Value::ContextType context_type) + : m_value_type(value_type), m_context_type(context_type), + m_expected_bytes(expected_bytes) { + assert(value_type == Value::ValueType::HostAddress); + } + + // Typedef to hook into the gtest matcher machinery. + using is_gtest_matcher = void; + + bool MatchAndExplain(const Value &val, std::ostream *os) const; + + void DescribeTo(std::ostream *os) const; + + void DescribeNegationTo(std::ostream *os) const; + +private: + Value::ValueType m_value_type = Value::ValueType::Invalid; + Value::ContextType m_context_type = Value::ContextType::Invalid; + Scalar m_expected_scalar; + std::vector m_expected_bytes; + + bool MatchAndExplainImpl(const Value &val, llvm::raw_ostream &os) const; +}; + +/// Matcher for Value with Scalar, FileAddress, or LoadAddress types. +/// Use with llvm::HasValue() to match Expected: +/// EXPECT_THAT_EXPECTED(result, llvm::HasValue(MatchScalarValue(...))); +testing::Matcher MatchScalarValue(Value::ValueType value_type, + const Scalar &expected_scalar, + Value::ContextType context_type); + +/// Matcher for Value with HostAddress type. +/// Use with llvm::HasValue() to match Expected: +/// EXPECT_THAT_EXPECTED(result, llvm::HasValue(MatchHostValue(...))); +testing::Matcher +MatchHostValue(Value::ValueType value_type, + const std::vector &expected_bytes, + Value::ContextType context_type); + +/// Helper to match a Scalar value and context type. +/// Use with llvm::HasValue() to match Expected: +/// EXPECT_THAT_EXPECTED(result, llvm::HasValue(IsScalar(42))); +testing::Matcher IsScalar(const Scalar &expected_scalar, + Value::ContextType context_type); + +/// Helper to match a LoadAddress value and context type. +/// Use with llvm::HasValue() to match Expected: +/// EXPECT_THAT_EXPECTED(result, llvm::HasValue(IsLoadAddress(0x1000))); +testing::Matcher IsLoadAddress(const Scalar &expected_address, + Value::ContextType context_type); + +/// Helper to match a FileAddress value and context type. +/// Use with llvm::HasValue() to match Expected: +/// EXPECT_THAT_EXPECTED(result, llvm::HasValue(IsFileAddress(Scalar(0x1000)))); +testing::Matcher IsFileAddress(const Scalar &expected_address, + Value::ContextType context_type); + +/// Helper to match a HostAddress value and context type. +/// Use with llvm::HasValue() to match Expected: +/// EXPECT_THAT_EXPECTED(result, llvm::HasValue(IsHostValue({0x11, 0x22}))); +testing::Matcher IsHostValue(const std::vector &expected_bytes, + Value::ContextType context_type); + +/// Helper to create a scalar because Scalar's operator==() is really picky. +Scalar GetScalar(unsigned bits, uint64_t value, bool sign); + +/// Helper that combines IsScalar with llvm::HasValue for Expected. +/// Use it on an Expected like this: +/// EXPECT_THAT_EXPECTED(result, ExpectScalar(42)); +llvm::detail::ValueMatchesPoly> +ExpectScalar(const Scalar &expected_scalar, + Value::ContextType context_type = Value::ContextType::Invalid); + +/// Helper that combines GetScalar with ExpectScalar to get a precise scalar. +/// Use it on an Expected like this: +/// EXPECT_THAT_EXPECTED(result, ExpectScalar(8, 42, true)); +llvm::detail::ValueMatchesPoly> +ExpectScalar(unsigned bits, uint64_t value, bool sign, + Value::ContextType context_type = Value::ContextType::Invalid); + +/// Helper that combines IsLoadAddress with llvm::HasValue for Expected. +/// Use it on an Expected like this: +/// EXPECT_THAT_EXPECTED(result, ExpectLoadAddress(0x1000)); +llvm::detail::ValueMatchesPoly> ExpectLoadAddress( + const Scalar &expected_address, + Value::ContextType context_type = Value::ContextType::Invalid); + +/// Helper that combines IsFileAddress with llvm::HasValue for Expected. +/// Use it on an Expected like this: +/// EXPECT_THAT_EXPECTED(result, ExpectFileAddress(Scalar(0x2000))); +llvm::detail::ValueMatchesPoly> ExpectFileAddress( + const Scalar &expected_address, + Value::ContextType context_type = Value::ContextType::Invalid); + +/// Helper that combines IsHostValue with llvm::HasValue for Expected. +/// Use it on an Expected like this: +/// EXPECT_THAT_EXPECTED(result, ExpectHostAddress({0x11, 0x22})); +llvm::detail::ValueMatchesPoly> ExpectHostAddress( + const std::vector &expected_bytes, + Value::ContextType context_type = Value::ContextType::Invalid); + +} // namespace lldb_private + +#endif // LLDB_UNITTESTS_EXPRESSION_VALUEMATCHER_H diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h index d5b13e7731550..9d61a91631fab 100644 --- a/llvm/include/llvm/ADT/DenseMap.h +++ b/llvm/include/llvm/ADT/DenseMap.h @@ -413,9 +413,7 @@ class DenseMapBase : public DebugEpochBase { return NextPowerOf2(NumEntries * 4 / 3 + 1); } - void moveFromOldBuckets(iterator_range OldBuckets) { - initEmpty(); - + void moveFromImpl(iterator_range OldBuckets) { // Insert all the old elements. const KeyT EmptyKey = KeyInfoT::getEmptyKey(); const KeyT TombstoneKey = KeyInfoT::getTombstoneKey(); @@ -438,6 +436,14 @@ class DenseMapBase : public DebugEpochBase { } } + void moveFromOldBuckets(iterator_range OldBuckets) { + initEmpty(); + moveFromImpl(OldBuckets); + } + + // Move key/value from Other to *this. Other will be in a zombie state. + void moveFrom(DerivedT &Other) { moveFromImpl(Other.buckets()); } + void copyFrom(const DerivedT &other) { this->destroyAll(); derived().deallocateBuckets(); @@ -889,6 +895,12 @@ class SmallDenseMap /// a large bucket. This union will be discriminated by the 'Small' bit. AlignedCharArrayUnion storage; + struct ExactBucketCount {}; + SmallDenseMap(unsigned NumBuckets, ExactBucketCount) { + allocateBuckets(NumBuckets); + this->BaseT::initEmpty(); + } + public: explicit SmallDenseMap(unsigned NumElementsToReserve = 0) { init(NumElementsToReserve); @@ -1065,7 +1077,10 @@ class SmallDenseMap } void deallocateBuckets() { - if (Small) + // Fast path to deallocateBuckets in case getLargeRep()->NumBuckets == 0, + // just like destroyAll. This path is used to destruct zombie instances + // after moves. + if (Small || getLargeRep()->NumBuckets == 0) return; deallocate_buffer(getLargeRep()->Buckets, @@ -1096,46 +1111,22 @@ class SmallDenseMap if (AtLeast > InlineBuckets) AtLeast = std::max(64, NextPowerOf2(AtLeast - 1)); - if (Small) { - // First move the inline buckets into a temporary storage. - AlignedCharArrayUnion TmpStorage; - BucketT *TmpBegin = reinterpret_cast(&TmpStorage); - BucketT *TmpEnd = TmpBegin; + SmallDenseMap Tmp(AtLeast, ExactBucketCount{}); + Tmp.moveFrom(*this); - // Loop over the buckets, moving non-empty, non-tombstones into the - // temporary storage. Have the loop move the TmpEnd forward as it goes. - const KeyT EmptyKey = KeyInfoT::getEmptyKey(); - const KeyT TombstoneKey = KeyInfoT::getTombstoneKey(); - for (BucketT &B : inlineBuckets()) { - if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey) && - !KeyInfoT::isEqual(B.getFirst(), TombstoneKey)) { - assert(size_t(TmpEnd - TmpBegin) < InlineBuckets && - "Too many inline buckets!"); - ::new (&TmpEnd->getFirst()) KeyT(std::move(B.getFirst())); - ::new (&TmpEnd->getSecond()) ValueT(std::move(B.getSecond())); - ++TmpEnd; - B.getSecond().~ValueT(); - } - B.getFirst().~KeyT(); - } - - // AtLeast == InlineBuckets can happen if there are many tombstones, - // and grow() is used to remove them. Usually we always switch to the - // large rep here. - allocateBuckets(AtLeast); - this->moveFromOldBuckets(llvm::make_range(TmpBegin, TmpEnd)); - return; + if (Tmp.Small) { + // Use moveFrom in those rare cases where we stay in the small mode. This + // can happen when we have many tombstones. + this->BaseT::initEmpty(); + this->moveFrom(Tmp); + Tmp.Small = false; + Tmp.getLargeRep()->NumBuckets = 0; + } else { + Small = false; + NumTombstones = 0; + *getLargeRep() = std::move(*Tmp.getLargeRep()); + Tmp.getLargeRep()->NumBuckets = 0; } - - LargeRep OldRep = std::move(*getLargeRep()); - getLargeRep()->~LargeRep(); - allocateBuckets(AtLeast); - - this->moveFromOldBuckets(OldRep.buckets()); - - // Free the old table. - deallocate_buffer(OldRep.Buckets, sizeof(BucketT) * OldRep.NumBuckets, - alignof(BucketT)); } // Plan how to shrink the bucket table. Return: diff --git a/llvm/include/llvm/CodeGen/MachineTraceMetrics.h b/llvm/include/llvm/CodeGen/MachineTraceMetrics.h index d51de24d64e8d..74b051d0cddc6 100644 --- a/llvm/include/llvm/CodeGen/MachineTraceMetrics.h +++ b/llvm/include/llvm/CodeGen/MachineTraceMetrics.h @@ -83,6 +83,8 @@ struct LiveRegUnit { LiveRegUnit(unsigned RU) : RegUnit(RU) {} }; +using LiveRegUnitSet = SparseSet; + /// Strategies for selecting traces. enum class MachineTraceStrategy { /// Select the trace through a block that has the fewest instructions. @@ -380,16 +382,15 @@ class MachineTraceMetrics { Trace getTrace(const MachineBasicBlock *MBB); /// Updates the depth of an machine instruction, given RegUnits. - void updateDepth(TraceBlockInfo &TBI, const MachineInstr&, - SparseSet &RegUnits); - void updateDepth(const MachineBasicBlock *, const MachineInstr&, - SparseSet &RegUnits); + void updateDepth(TraceBlockInfo &TBI, const MachineInstr &, + LiveRegUnitSet &RegUnits); + void updateDepth(const MachineBasicBlock *, const MachineInstr &, + LiveRegUnitSet &RegUnits); /// Updates the depth of the instructions from Start to End. void updateDepths(MachineBasicBlock::iterator Start, MachineBasicBlock::iterator End, - SparseSet &RegUnits); - + LiveRegUnitSet &RegUnits); }; /// Get the trace ensemble representing the given trace selection strategy. diff --git a/llvm/include/llvm/MC/MCRegisterInfo.h b/llvm/include/llvm/MC/MCRegisterInfo.h index e6dbb38dfee67..f1caa077a6d7b 100644 --- a/llvm/include/llvm/MC/MCRegisterInfo.h +++ b/llvm/include/llvm/MC/MCRegisterInfo.h @@ -16,6 +16,7 @@ #define LLVM_MC_MCREGISTERINFO_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Sequence.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" #include "llvm/MC/LaneBitmask.h" @@ -259,6 +260,9 @@ class LLVM_ABI MCRegisterInfo { iterator_range> sub_and_superregs_inclusive(MCRegister Reg) const; + /// Returns an iterator range over all regunits. + iota_range regunits() const; + /// Returns an iterator range over all regunits for \p Reg. iterator_range regunits(MCRegister Reg) const; @@ -798,6 +802,10 @@ MCRegisterInfo::sub_and_superregs_inclusive(MCRegister Reg) const { return concat(subregs_inclusive(Reg), superregs(Reg)); } +inline iota_range MCRegisterInfo::regunits() const { + return seq(getNumRegUnits()); +} + inline iterator_range MCRegisterInfo::regunits(MCRegister Reg) const { return make_range({Reg, this}, MCRegUnitIterator()); diff --git a/llvm/include/llvm/TableGen/CodeGenHelpers.h b/llvm/include/llvm/TableGen/CodeGenHelpers.h index 95866e306b5ff..e357b2670be15 100644 --- a/llvm/include/llvm/TableGen/CodeGenHelpers.h +++ b/llvm/include/llvm/TableGen/CodeGenHelpers.h @@ -21,18 +21,37 @@ namespace llvm { -// Simple RAII helper for emitting ifdef-undef-endif scope. +// Simple RAII helper for emitting ifdef-undef-endif scope. `LateUndef` controls +// whether the undef is emitted at the start of the scope (false) or at the end +// of the scope (true). class IfDefEmitter { public: - IfDefEmitter(raw_ostream &OS, StringRef Name) : Name(Name.str()), OS(OS) { - OS << "#ifdef " << Name << "\n" - << "#undef " << Name << "\n\n"; + IfDefEmitter(raw_ostream &OS, StringRef Name, bool LateUndef = false) + : Name(Name.str()), OS(OS), LateUndef(LateUndef) { + OS << "#ifdef " << Name << "\n"; + if (!LateUndef) + OS << "#undef " << Name << "\n"; + OS << "\n"; + } + ~IfDefEmitter() { close(); } + + // Explicit function to close the ifdef scopes. + void close() { + if (Closed) + return; + + OS << "\n"; + if (LateUndef) + OS << "#undef " << Name << "\n"; + OS << "#endif // " << Name << "\n\n"; + Closed = true; } - ~IfDefEmitter() { OS << "\n#endif // " << Name << "\n\n"; } private: std::string Name; raw_ostream &OS; + bool LateUndef; + bool Closed = false; }; // Simple RAII helper for emitting header include guard (ifndef-define-endif). @@ -43,11 +62,20 @@ class IncludeGuardEmitter { OS << "#ifndef " << Name << "\n" << "#define " << Name << "\n\n"; } - ~IncludeGuardEmitter() { OS << "\n#endif // " << Name << "\n"; } + ~IncludeGuardEmitter() { close(); } + + // Explicit function to close the ifdef scopes. + void close() { + if (Closed) + return; + OS << "\n#endif // " << Name << "\n\n"; + Closed = true; + } private: std::string Name; raw_ostream &OS; + bool Closed = false; }; // Simple RAII helper for emitting namespace scope. Name can be a single @@ -65,7 +93,9 @@ class NamespaceEmitter { // Explicit function to close the namespace scopes. void close() { - if (!Closed && !Name.empty()) + if (Closed) + return; + if (!Name.empty()) OS << "\n} // namespace " << Name << "\n"; Closed = true; } diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index e4dd25214edce..acbf7d845f6d8 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -1616,8 +1616,8 @@ std::optional InstrRefBasedLDV::getValueForInstrRef( unsigned MainRegSize = TRI->getRegSizeInBits(*TRC); if (Size != MainRegSize || Offset) { // Enumerate all subregisters, searching. - Register NewReg = 0; - for (MCPhysReg SR : TRI->subregs(Reg)) { + Register NewReg = Register(); + for (MCRegister SR : TRI->subregs(Reg)) { unsigned Subreg = TRI->getSubRegIndex(Reg, SR); unsigned SubregSize = TRI->getSubRegIdxSize(Subreg); unsigned SubregOffset = TRI->getSubRegIdxOffset(Subreg); @@ -1633,7 +1633,8 @@ std::optional InstrRefBasedLDV::getValueForInstrRef( } else { // Re-state the value as being defined within the subregister // that we found. - LocIdx NewLoc = MTracker->lookupOrTrackRegister(NewReg); + LocIdx NewLoc = + MTracker->lookupOrTrackRegister(MTracker->getLocID(NewReg)); NewID = ValueIDNum(NewID->getBlock(), NewID->getInst(), NewLoc); } } @@ -1832,12 +1833,13 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) { Register Reg = MO.getReg(); ValueIDNum Num = MTracker->readReg(Reg); auto PHIRec = DebugPHIRecord( - {InstrNum, MI.getParent(), Num, MTracker->lookupOrTrackRegister(Reg)}); + {InstrNum, MI.getParent(), Num, + MTracker->lookupOrTrackRegister(MTracker->getLocID(Reg))}); DebugPHINumToValue.push_back(PHIRec); // Ensure this register is tracked. for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI) - MTracker->lookupOrTrackRegister(*RAI); + MTracker->lookupOrTrackRegister(MTracker->getLocID(*RAI)); } else if (MO.isFI()) { // The value is whatever's in this stack slot. unsigned FI = MO.getIndex(); @@ -1963,8 +1965,8 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) { // different location. // Inform TTracker about any direct clobbers. - for (uint32_t DeadReg : DeadRegs) { - LocIdx Loc = MTracker->lookupOrTrackRegister(DeadReg); + for (MCRegister DeadReg : DeadRegs) { + LocIdx Loc = MTracker->lookupOrTrackRegister(MTracker->getLocID(DeadReg)); TTracker->clobberMloc(Loc, MI.getIterator(), false); } @@ -2009,9 +2011,9 @@ void InstrRefBasedLDV::performCopy(Register SrcRegNum, Register DstRegNum) { // Copy subregisters from one location to another. for (MCSubRegIndexIterator SRI(SrcRegNum, TRI); SRI.isValid(); ++SRI) { - unsigned SrcSubReg = SRI.getSubReg(); + MCRegister SrcSubReg = SRI.getSubReg(); unsigned SubRegIdx = SRI.getSubRegIndex(); - unsigned DstSubReg = TRI->getSubReg(DstRegNum, SubRegIdx); + MCRegister DstSubReg = TRI->getSubReg(DstRegNum, SubRegIdx); if (!DstSubReg) continue; @@ -2020,8 +2022,10 @@ void InstrRefBasedLDV::performCopy(Register SrcRegNum, Register DstRegNum) { // yet. // This will force SrcSubReg to be tracked, if it isn't yet. Will read // mphi values if it wasn't tracked. - LocIdx SrcL = MTracker->lookupOrTrackRegister(SrcSubReg); - LocIdx DstL = MTracker->lookupOrTrackRegister(DstSubReg); + LocIdx SrcL = + MTracker->lookupOrTrackRegister(MTracker->getLocID(SrcSubReg)); + LocIdx DstL = + MTracker->lookupOrTrackRegister(MTracker->getLocID(DstSubReg)); (void)SrcL; (void)DstL; ValueIDNum CpyValue = MTracker->readReg(SrcSubReg); @@ -2144,7 +2148,7 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) { // Then, transfer subreg bits. for (MCPhysReg SR : TRI->subregs(Reg)) { // Ensure this reg is tracked, - (void)MTracker->lookupOrTrackRegister(SR); + (void)MTracker->lookupOrTrackRegister(MTracker->getLocID(SR)); unsigned SubregIdx = TRI->getSubRegIndex(Reg, SR); unsigned SpillID = MTracker->getLocID(Loc, SubregIdx); DoTransfer(SR, SpillID); @@ -2676,7 +2680,7 @@ void InstrRefBasedLDV::placeMLocPHIs( // For reg units, place PHIs, and then place them for any aliasing registers. for (Register R : RegUnitsToPHIUp) { - LocIdx L = MTracker->lookupOrTrackRegister(R); + LocIdx L = MTracker->lookupOrTrackRegister(MTracker->getLocID(R)); CollectPHIsForLoc(L); // Install those PHI values into the live-in value array. @@ -2689,7 +2693,8 @@ void InstrRefBasedLDV::placeMLocPHIs( if (!MTracker->isRegisterTracked(*RAI)) continue; - LocIdx AliasLoc = MTracker->lookupOrTrackRegister(*RAI); + LocIdx AliasLoc = + MTracker->lookupOrTrackRegister(MTracker->getLocID(*RAI)); InstallPHIsAtLoc(AliasLoc); } } diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp index 27c5addffa4ab..b600e0411bc48 100644 --- a/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/llvm/lib/CodeGen/LiveIntervals.cpp @@ -173,8 +173,8 @@ void LiveIntervals::analyze(MachineFunction &fn) { if (EnablePrecomputePhysRegs) { // For stress testing, precompute live ranges of all physical register // units, including reserved registers. - for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i) - getRegUnit(i); + for (MCRegUnit Unit : TRI->regunits()) + getRegUnit(Unit); } } diff --git a/llvm/lib/CodeGen/LiveRegUnits.cpp b/llvm/lib/CodeGen/LiveRegUnits.cpp index 0d87062169585..3e7052a9b6245 100644 --- a/llvm/lib/CodeGen/LiveRegUnits.cpp +++ b/llvm/lib/CodeGen/LiveRegUnits.cpp @@ -20,7 +20,7 @@ using namespace llvm; void LiveRegUnits::removeRegsNotPreserved(const uint32_t *RegMask) { - for (unsigned U = 0, E = TRI->getNumRegUnits(); U != E; ++U) { + for (MCRegUnit U : TRI->regunits()) { for (MCRegUnitRootIterator RootReg(U, TRI); RootReg.isValid(); ++RootReg) { if (MachineOperand::clobbersPhysReg(RegMask, *RootReg)) { Units.reset(U); @@ -31,7 +31,7 @@ void LiveRegUnits::removeRegsNotPreserved(const uint32_t *RegMask) { } void LiveRegUnits::addRegsInMask(const uint32_t *RegMask) { - for (unsigned U = 0, E = TRI->getNumRegUnits(); U != E; ++U) { + for (MCRegUnit U : TRI->regunits()) { for (MCRegUnitRootIterator RootReg(U, TRI); RootReg.isValid(); ++RootReg) { if (MachineOperand::clobbersPhysReg(RegMask, *RootReg)) { Units.set(U); diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp index 54e2a009b464d..205c79e71854f 100644 --- a/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/llvm/lib/CodeGen/MachineCombiner.cpp @@ -482,9 +482,8 @@ insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, MachineTraceMetrics::Ensemble *TraceEnsemble, - SparseSet &RegUnits, - const TargetInstrInfo *TII, unsigned Pattern, - bool IncrementalUpdate) { + LiveRegUnitSet &RegUnits, const TargetInstrInfo *TII, + unsigned Pattern, bool IncrementalUpdate) { // If we want to fix up some placeholder for some target, do it now. // We need this because in genAlternativeCodeSequence, we have not decided the // better pattern InsInstrs or DelInstrs, so we don't want generate some @@ -565,7 +564,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { if (!TraceEnsemble) TraceEnsemble = Traces->getEnsemble(TII->getMachineCombinerTraceStrategy()); - SparseSet RegUnits; + LiveRegUnitSet RegUnits; RegUnits.setUniverse(TRI->getNumRegUnits()); bool OptForSize = llvm::shouldOptimizeForSize(MBB, PSI, MBFI); diff --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/llvm/lib/CodeGen/MachineTraceMetrics.cpp index c40bd1c83f34a..0312a8e33d669 100644 --- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -737,7 +737,7 @@ static void getPHIDeps(const MachineInstr &UseMI, // tracking set when scanning instructions downwards. static void updatePhysDepsDownwards(const MachineInstr *UseMI, SmallVectorImpl &Deps, - SparseSet &RegUnits, + LiveRegUnitSet &RegUnits, const TargetRegisterInfo *TRI) { SmallVector Kills; SmallVector LiveDefOps; @@ -758,7 +758,7 @@ static void updatePhysDepsDownwards(const MachineInstr *UseMI, if (!MO.readsReg()) continue; for (MCRegUnit Unit : TRI->regunits(Reg)) { - SparseSet::iterator I = RegUnits.find(Unit); + LiveRegUnitSet::iterator I = RegUnits.find(Unit); if (I == RegUnits.end()) continue; Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo())); @@ -813,9 +813,9 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) { return MaxLen; } -void MachineTraceMetrics::Ensemble:: -updateDepth(MachineTraceMetrics::TraceBlockInfo &TBI, const MachineInstr &UseMI, - SparseSet &RegUnits) { +void MachineTraceMetrics::Ensemble::updateDepth(TraceBlockInfo &TBI, + const MachineInstr &UseMI, + LiveRegUnitSet &RegUnits) { SmallVector Deps; // Collect all data dependencies. if (UseMI.isPHI()) @@ -852,18 +852,17 @@ updateDepth(MachineTraceMetrics::TraceBlockInfo &TBI, const MachineInstr &UseMI, } } -void MachineTraceMetrics::Ensemble:: -updateDepth(const MachineBasicBlock *MBB, const MachineInstr &UseMI, - SparseSet &RegUnits) { +void MachineTraceMetrics::Ensemble::updateDepth(const MachineBasicBlock *MBB, + const MachineInstr &UseMI, + LiveRegUnitSet &RegUnits) { updateDepth(BlockInfo[MBB->getNumber()], UseMI, RegUnits); } -void MachineTraceMetrics::Ensemble:: -updateDepths(MachineBasicBlock::iterator Start, - MachineBasicBlock::iterator End, - SparseSet &RegUnits) { - for (; Start != End; Start++) - updateDepth(Start->getParent(), *Start, RegUnits); +void MachineTraceMetrics::Ensemble::updateDepths( + MachineBasicBlock::iterator Start, MachineBasicBlock::iterator End, + LiveRegUnitSet &RegUnits) { + for (; Start != End; Start++) + updateDepth(Start->getParent(), *Start, RegUnits); } /// Compute instruction depths for all instructions above or in MBB in its @@ -887,7 +886,7 @@ computeInstrDepths(const MachineBasicBlock *MBB) { // in the trace. We should track any live-out physregs that were defined in // the trace. This is quite rare in SSA form, typically created by CSE // hoisting a compare. - SparseSet RegUnits; + LiveRegUnitSet RegUnits; RegUnits.setUniverse(MTM.TRI->getNumRegUnits()); // Go through trace blocks in top-down order, stopping after the center block. @@ -925,7 +924,7 @@ computeInstrDepths(const MachineBasicBlock *MBB) { // Return the issue height of MI after considering any live regunits. // Height is the issue height computed from virtual register dependencies alone. static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, - SparseSet &RegUnits, + LiveRegUnitSet &RegUnits, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { @@ -944,7 +943,7 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, // This is a def of Reg. Remove corresponding entries from RegUnits, and // update MI Height to consider the physreg dependencies. for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg())) { - SparseSet::iterator I = RegUnits.find(Unit); + LiveRegUnitSet::iterator I = RegUnits.find(Unit); if (I == RegUnits.end()) continue; unsigned DepHeight = I->Cycle; @@ -1048,7 +1047,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) { // For physregs, the def isn't known when we see the use. // Instead, keep track of the highest use of each regunit. - SparseSet RegUnits; + LiveRegUnitSet RegUnits; RegUnits.setUniverse(MTM.TRI->getNumRegUnits()); // If the bottom of the trace was already precomputed, initialize heights diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 013f52938b65c..a2a66d6128348 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -3564,9 +3564,9 @@ void MachineVerifier::verifyLiveIntervals() { } // Verify all the cached regunit intervals. - for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i) - if (const LiveRange *LR = LiveInts->getCachedRegUnit(i)) - verifyLiveRange(*LR, VirtRegOrUnit(i)); + for (MCRegUnit Unit : TRI->regunits()) + if (const LiveRange *LR = LiveInts->getCachedRegUnit(Unit)) + verifyLiveRange(*LR, VirtRegOrUnit(Unit)); } void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, diff --git a/llvm/lib/CodeGen/RDFRegisters.cpp b/llvm/lib/CodeGen/RDFRegisters.cpp index 1400699a607ff..e4b63a3a40805 100644 --- a/llvm/lib/CodeGen/RDFRegisters.cpp +++ b/llvm/lib/CodeGen/RDFRegisters.cpp @@ -46,7 +46,7 @@ PhysicalRegisterInfo::PhysicalRegisterInfo(const TargetRegisterInfo &tri, UnitInfos.resize(TRI.getNumRegUnits()); - for (uint32_t U = 0, NU = TRI.getNumRegUnits(); U != NU; ++U) { + for (MCRegUnit U : TRI.regunits()) { if (UnitInfos[U].Reg != 0) continue; MCRegUnitRootIterator R(U, &TRI); @@ -88,7 +88,7 @@ PhysicalRegisterInfo::PhysicalRegisterInfo(const TargetRegisterInfo &tri, } AliasInfos.resize(TRI.getNumRegUnits()); - for (uint32_t U = 0, NU = TRI.getNumRegUnits(); U != NU; ++U) { + for (MCRegUnit U : TRI.regunits()) { BitVector AS(TRI.getNumRegs()); for (MCRegUnitRootIterator R(U, &TRI); R.isValid(); ++R) for (MCPhysReg S : TRI.superregs_inclusive(*R)) diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index 3b72bd2f0cd91..7f37868f3043b 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -1292,8 +1292,7 @@ bool RegAllocFastImpl::setPhysReg(MachineInstr &MI, MachineOperand &MO, #ifndef NDEBUG void RegAllocFastImpl::dumpState() const { - for (unsigned Unit = 1, UnitE = TRI->getNumRegUnits(); Unit != UnitE; - ++Unit) { + for (MCRegUnit Unit : TRI->regunits()) { switch (unsigned VirtReg = RegUnitStates[Unit]) { case regFree: break; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index e7d4c4b88191b..5bed32db528d6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2783,7 +2783,7 @@ void SelectionDAGISel::UpdateChains( /// be used as the input node chain for the generated nodes. static SDValue HandleMergeInputChains(const SmallVectorImpl &ChainNodesMatched, - SelectionDAG *CurDAG) { + SDValue InputGlue, SelectionDAG *CurDAG) { SmallPtrSet Visited; SmallVector Worklist; @@ -2826,8 +2826,16 @@ HandleMergeInputChains(const SmallVectorImpl &ChainNodesMatched, // node that is both the predecessor and successor of the // to-be-merged nodes. Fail. Visited.clear(); - for (SDValue V : InputChains) + for (SDValue V : InputChains) { + // If we need to create a TokenFactor, and any of the input chain nodes will + // also be glued to the output, we cannot merge the chains. The TokenFactor + // would prevent the glue from being honored. + if (InputChains.size() != 1 && + V->getValueType(V->getNumValues() - 1) == MVT::Glue && + InputGlue.getNode() == V.getNode()) + return SDValue(); Worklist.push_back(V.getNode()); + } for (auto *N : ChainNodesMatched) if (SDNode::hasPredecessorHelper(N, Visited, Worklist, Max, true)) @@ -3989,7 +3997,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, } // Merge the input chains if they are not intra-pattern references. - InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); + InputChain = HandleMergeInputChains(ChainNodesMatched, InputGlue, CurDAG); if (!InputChain.getNode()) break; // Failed to merge. @@ -4033,7 +4041,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, break; // Merge the input chains if they are not intra-pattern references. - InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); + InputChain = HandleMergeInputChains(ChainNodesMatched, InputGlue, CurDAG); if (!InputChain.getNode()) break; // Failed to merge. diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 7b51f453b4974..13339818b0b68 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -445,6 +445,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass); addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass); + // Add sve predicate as counter type + addRegisterClass(MVT::aarch64svcount, &AArch64::PPRRegClass); + // Add legal sve data types addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass); addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass); @@ -473,15 +476,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, } } - if (Subtarget->hasSVE2p1() || Subtarget->hasSME2()) { - addRegisterClass(MVT::aarch64svcount, &AArch64::PPRRegClass); - setOperationPromotedToType(ISD::LOAD, MVT::aarch64svcount, MVT::nxv16i1); - setOperationPromotedToType(ISD::STORE, MVT::aarch64svcount, MVT::nxv16i1); - - setOperationAction(ISD::SELECT, MVT::aarch64svcount, Custom); - setOperationAction(ISD::SELECT_CC, MVT::aarch64svcount, Expand); - } - // Compute derived properties from the register classes computeRegisterProperties(Subtarget->getRegisterInfo()); @@ -1609,6 +1603,14 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 }) setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal); + // Promote predicate as counter load/stores to standard predicates. + setOperationPromotedToType(ISD::LOAD, MVT::aarch64svcount, MVT::nxv16i1); + setOperationPromotedToType(ISD::STORE, MVT::aarch64svcount, MVT::nxv16i1); + + // Predicate as counter legalization actions. + setOperationAction(ISD::SELECT, MVT::aarch64svcount, Custom); + setOperationAction(ISD::SELECT_CC, MVT::aarch64svcount, Expand); + for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) { setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 7293b7fdb0d20..2730833ba06d9 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -5923,21 +5923,15 @@ bool AArch64AsmParser::validateInstruction(MCInst &Inst, SMLoc &IDLoc, case AArch64::CPYETWN: case AArch64::CPYETRN: case AArch64::CPYETN: { - MCRegister Xd_wb = Inst.getOperand(0).getReg(); - MCRegister Xs_wb = Inst.getOperand(1).getReg(); - MCRegister Xn_wb = Inst.getOperand(2).getReg(); + // Xd_wb == op0, Xs_wb == op1, Xn_wb == op2 MCRegister Xd = Inst.getOperand(3).getReg(); MCRegister Xs = Inst.getOperand(4).getReg(); MCRegister Xn = Inst.getOperand(5).getReg(); - if (Xd_wb != Xd) - return Error(Loc[0], - "invalid CPY instruction, Xd_wb and Xd do not match"); - if (Xs_wb != Xs) - return Error(Loc[0], - "invalid CPY instruction, Xs_wb and Xs do not match"); - if (Xn_wb != Xn) - return Error(Loc[0], - "invalid CPY instruction, Xn_wb and Xn do not match"); + + assert(Xd == Inst.getOperand(0).getReg() && "Xd_wb and Xd do not match"); + assert(Xs == Inst.getOperand(1).getReg() && "Xs_wb and Xs do not match"); + assert(Xn == Inst.getOperand(2).getReg() && "Xn_wb and Xn do not match"); + if (Xd == Xs) return Error(Loc[0], "invalid CPY instruction, destination and source" " registers are the same"); @@ -5973,17 +5967,14 @@ bool AArch64AsmParser::validateInstruction(MCInst &Inst, SMLoc &IDLoc, case AArch64::MOPSSETGET: case AArch64::MOPSSETGEN: case AArch64::MOPSSETGETN: { - MCRegister Xd_wb = Inst.getOperand(0).getReg(); - MCRegister Xn_wb = Inst.getOperand(1).getReg(); + // Xd_wb == op0, Xn_wb == op1 MCRegister Xd = Inst.getOperand(2).getReg(); MCRegister Xn = Inst.getOperand(3).getReg(); MCRegister Xm = Inst.getOperand(4).getReg(); - if (Xd_wb != Xd) - return Error(Loc[0], - "invalid SET instruction, Xd_wb and Xd do not match"); - if (Xn_wb != Xn) - return Error(Loc[0], - "invalid SET instruction, Xn_wb and Xn do not match"); + + assert(Xd == Inst.getOperand(0).getReg() && "Xd_wb and Xd do not match"); + assert(Xn == Inst.getOperand(1).getReg() && "Xn_wb and Xn do not match"); + if (Xd == Xn) return Error(Loc[0], "invalid SET instruction, destination and size" " registers are the same"); @@ -6007,16 +5998,13 @@ bool AArch64AsmParser::validateInstruction(MCInst &Inst, SMLoc &IDLoc, case AArch64::SETGOET: case AArch64::SETGOEN: case AArch64::SETGOETN: { - MCRegister Xd_wb = Inst.getOperand(0).getReg(); - MCRegister Xn_wb = Inst.getOperand(1).getReg(); + // Xd_wb == op0, Xn_wb == op1 MCRegister Xd = Inst.getOperand(2).getReg(); MCRegister Xn = Inst.getOperand(3).getReg(); - if (Xd_wb != Xd) - return Error(Loc[0], - "invalid SET instruction, Xd_wb and Xd do not match"); - if (Xn_wb != Xn) - return Error(Loc[0], - "invalid SET instruction, Xn_wb and Xn do not match"); + + assert(Xd == Inst.getOperand(0).getReg() && "Xd_wb and Xd do not match"); + assert(Xn == Inst.getOperand(1).getReg() && "Xn_wb and Xn do not match"); + if (Xd == Xn) return Error(Loc[0], "invalid SET instruction, destination and size" " registers are the same"); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 394024693194c..64db3765c433f 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -310,6 +310,8 @@ class AArch64InstructionSelector : public InstructionSelector { MachineIRBuilder &MIRBuilder) const; MachineInstr *emitSBCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; + MachineInstr *emitCMP(MachineOperand &LHS, MachineOperand &RHS, + MachineIRBuilder &MIRBuilder) const; MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS, @@ -4412,6 +4414,15 @@ AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS, return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder); } +MachineInstr * +AArch64InstructionSelector::emitCMP(MachineOperand &LHS, MachineOperand &RHS, + MachineIRBuilder &MIRBuilder) const { + MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); + bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32; + auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass; + return emitSUBS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder); +} + MachineInstr * AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const { @@ -4464,8 +4475,7 @@ MachineInstr *AArch64InstructionSelector::emitIntegerCompare( // Fold the compare into a cmn or tst if possible. if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder)) return FoldCmp; - auto Dst = MRI.cloneVirtualRegister(LHS.getReg()); - return emitSUBS(Dst, LHS, RHS, MIRBuilder); + return emitCMP(LHS, RHS, MIRBuilder); } MachineInstr *AArch64InstructionSelector::emitCSetForFCmp( @@ -4870,9 +4880,8 @@ MachineInstr *AArch64InstructionSelector::emitConjunctionRec( // Produce a normal comparison if we are first in the chain if (!CCOp) { - auto Dst = MRI.cloneVirtualRegister(LHS); if (isa(Cmp)) - return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB); + return emitCMP(Cmp->getOperand(2), Cmp->getOperand(3), MIB); return emitFPCompare(Cmp->getOperand(2).getReg(), Cmp->getOperand(3).getReg(), MIB); } diff --git a/llvm/lib/Target/BPF/BPF.td b/llvm/lib/Target/BPF/BPF.td index dff76ca07af51..a7aa6274f5ac1 100644 --- a/llvm/lib/Target/BPF/BPF.td +++ b/llvm/lib/Target/BPF/BPF.td @@ -27,6 +27,10 @@ def ALU32 : SubtargetFeature<"alu32", "HasAlu32", "true", def DwarfRIS: SubtargetFeature<"dwarfris", "UseDwarfRIS", "true", "Disable MCAsmInfo DwarfUsesRelocationsAcrossSections">; +def MisalignedMemAccess : SubtargetFeature<"allows-misaligned-mem-access", + "AllowsMisalignedMemAccess", "true", + "Allows misaligned memory access">; + def : Proc<"generic", []>; def : Proc<"v1", []>; def : Proc<"v2", []>; diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp index 3c61216cd9327..ecefd2379356a 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -206,6 +206,26 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, HasJmp32 = STI.getHasJmp32(); HasJmpExt = STI.getHasJmpExt(); HasMovsx = STI.hasMovsx(); + + AllowsMisalignedMemAccess = STI.getAllowsMisalignedMemAccess(); +} + +bool BPFTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align, + MachineMemOperand::Flags, + unsigned *Fast) const { + // allows-misaligned-mem-access is disabled + if (!AllowsMisalignedMemAccess) + return false; + + // only allow misalignment for simple value types + if (!VT.isSimple()) + return false; + + // always assume fast mode when misalignment is allowed + if (Fast) + *Fast = true; + + return true; } bool BPFTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h index 3d6e7c70df28b..8607e4f8c9e69 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.h +++ b/llvm/lib/Target/BPF/BPFISelLowering.h @@ -32,6 +32,10 @@ class BPFTargetLowering : public TargetLowering { // with the given GlobalAddress is legal. bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align, + MachineMemOperand::Flags, + unsigned *) const override; + BPFTargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override; @@ -61,6 +65,9 @@ class BPFTargetLowering : public TargetLowering { bool HasJmpExt; bool HasMovsx; + // Allows Misalignment + bool AllowsMisalignedMemAccess; + SDValue LowerSDIVSREM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/BPF/BPFSubtarget.cpp b/llvm/lib/Target/BPF/BPFSubtarget.cpp index 8f16fe5bfdb51..726f8f4b39827 100644 --- a/llvm/lib/Target/BPF/BPFSubtarget.cpp +++ b/llvm/lib/Target/BPF/BPFSubtarget.cpp @@ -69,6 +69,7 @@ void BPFSubtarget::initializeEnvironment() { HasStoreImm = false; HasLoadAcqStoreRel = false; HasGotox = false; + AllowsMisalignedMemAccess = false; } void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { diff --git a/llvm/lib/Target/BPF/BPFSubtarget.h b/llvm/lib/Target/BPF/BPFSubtarget.h index e870dfdc85ec9..24eff862224b0 100644 --- a/llvm/lib/Target/BPF/BPFSubtarget.h +++ b/llvm/lib/Target/BPF/BPFSubtarget.h @@ -63,6 +63,9 @@ class BPFSubtarget : public BPFGenSubtargetInfo { // whether we should enable MCAsmInfo DwarfUsesRelocationsAcrossSections bool UseDwarfRIS; + // whether we allows misaligned memory access + bool AllowsMisalignedMemAccess; + // whether cpu v4 insns are enabled. bool HasLdsx, HasMovsx, HasBswap, HasSdivSmod, HasGotol, HasStoreImm, HasLoadAcqStoreRel, HasGotox; @@ -87,6 +90,9 @@ class BPFSubtarget : public BPFGenSubtargetInfo { bool getHasJmp32() const { return HasJmp32; } bool getHasAlu32() const { return HasAlu32; } bool getUseDwarfRIS() const { return UseDwarfRIS; } + bool getAllowsMisalignedMemAccess() const { + return AllowsMisalignedMemAccess; + } bool hasLdsx() const { return HasLdsx; } bool hasMovsx() const { return HasMovsx; } bool hasBswap() const { return HasBswap; } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 28fe76bb35b0c..38cce26e44af4 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1264,11 +1264,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom); - setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::CONCAT_VECTORS, - ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR, - ISD::VECTOR_DEINTERLEAVE, ISD::VECTOR_INTERLEAVE, - ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE, - ISD::VECTOR_COMPRESS}, + setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, + ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, + ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_DEINTERLEAVE, + ISD::VECTOR_INTERLEAVE, ISD::VECTOR_REVERSE, + ISD::VECTOR_SPLICE, ISD::VECTOR_COMPRESS}, VT, Custom); setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); @@ -1278,9 +1278,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, MVT EltVT = VT.getVectorElementType(); if (isTypeLegal(EltVT)) - setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT, - ISD::EXTRACT_VECTOR_ELT}, - VT, Custom); + setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT}, VT, + Custom); else setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT}, EltVT, Custom); @@ -10356,7 +10355,7 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, } if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) || - ValVT == MVT::bf16) { + (ValVT == MVT::bf16 && !Subtarget.hasVInstructionsBF16())) { // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first. MVT IntVT = VecVT.changeTypeToInteger(); SDValue IntInsert = DAG.getNode( @@ -10593,7 +10592,7 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, } if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) || - EltVT == MVT::bf16) { + (EltVT == MVT::bf16 && !Subtarget.hasVInstructionsBF16())) { // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x MVT IntVT = VecVT.changeTypeToInteger(); SDValue IntVec = DAG.getBitcast(IntVT, Vec); @@ -16792,22 +16791,35 @@ static SDValue expandMulToNAFSequence(SDNode *N, SelectionDAG &DAG, static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt) { uint64_t MulAmtLowBit = MulAmt & (-MulAmt); + SDValue X = N->getOperand(0); ISD::NodeType Op; uint64_t ShiftAmt1; - if (isPowerOf2_64(MulAmt + MulAmtLowBit)) { - Op = ISD::SUB; - ShiftAmt1 = MulAmt + MulAmtLowBit; - } else if (isPowerOf2_64(MulAmt - MulAmtLowBit)) { + bool CanSub = isPowerOf2_64(MulAmt + MulAmtLowBit); + auto PreferSub = [X, MulAmtLowBit]() { + // For MulAmt == 3 << M both (X << M + 2) - (X << M) + // and (X << M + 1) + (X << M) are valid expansions. + // Prefer SUB if we can get (X << M + 2) for free, + // because X is exact (Y >> M + 2). + uint64_t ShAmt = Log2_64(MulAmtLowBit) + 2; + using namespace SDPatternMatch; + return sd_match(X, m_AnyOf(m_Sra(m_Value(), m_SpecificInt(ShAmt)), + m_Srl(m_Value(), m_SpecificInt(ShAmt)))) && + X->getFlags().hasExact(); + }; + if (isPowerOf2_64(MulAmt - MulAmtLowBit) && !(CanSub && PreferSub())) { Op = ISD::ADD; ShiftAmt1 = MulAmt - MulAmtLowBit; + } else if (CanSub) { + Op = ISD::SUB; + ShiftAmt1 = MulAmt + MulAmtLowBit; } else { return SDValue(); } EVT VT = N->getValueType(0); SDLoc DL(N); - SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(Log2_64(ShiftAmt1), DL, VT)); - SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT)); return DAG.getNode(Op, DL, VT, Shift1, Shift2); } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 835b0995cc4fc..f4629d22002dc 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4018,6 +4018,9 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks( .Case([](const VPInterleaveRecipe *R) { return R->getStoredValues().empty() ? Instruction::Load : Instruction::Store; + }) + .Case([](const auto *R) { + return RecurrenceDescriptor::getOpcode(R->getRecurrenceKind()); }); // If the next recipe is different, or if there are no other pairs, diff --git a/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll b/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll index aee705f0be9b9..ecbf0bdb06d15 100644 --- a/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll +++ b/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -O0 -mtriple=aarch64 -mattr=+sve2p1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-O0 -; RUN: llc -O3 -mtriple=aarch64 -mattr=+sve2p1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-O3 +; RUN: llc -O0 -mtriple=aarch64 -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-O0 +; RUN: llc -O3 -mtriple=aarch64 -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-O3 ; ; Test simple loads, stores and return. diff --git a/llvm/test/CodeGen/BPF/unaligned_load_store.ll b/llvm/test/CodeGen/BPF/unaligned_load_store.ll new file mode 100644 index 0000000000000..b302a80d6cd4f --- /dev/null +++ b/llvm/test/CodeGen/BPF/unaligned_load_store.ll @@ -0,0 +1,196 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 + +; RUN: llc -mtriple=bpfel -mattr=+allows-misaligned-mem-access -verify-machineinstrs %s -o - \ +; RUN: | FileCheck --check-prefixes=ALL,MISALIGN %s +; RUN: llc -mtriple=bpfeb -mattr=+allows-misaligned-mem-access -verify-machineinstrs %s -o - \ +; RUN: | FileCheck --check-prefixes=ALL,MISALIGN %s + +; RUN: llc -mtriple=bpfel -verify-machineinstrs %s -o - \ +; RUN: | FileCheck --check-prefixes=ALL,ALIGN %s +; RUN: llc -mtriple=bpfeb -verify-machineinstrs %s -o - \ +; RUN: | FileCheck --check-prefixes=ALL,ALIGN %s +; NOTE: +; This test verifies that the new +bpf-allow-misaligned-mem-access +; feature allows the BPF backend to emit direct unaligned load/store +; instructions instead of byte-by-byte emulation sequences. + +; --------------------------------------------------------------------- +; i8 load +; --------------------------------------------------------------------- +define i8 @test_load_i8(i8* %p) { +; ALL-LABEL: test_load_i8: +; ALL: # %bb.0: +; ALL-NEXT: w{{[0-9]+}} = *(u8 *)(r1 + 0) +; ALL-NEXT: exit + %v = load i8, i8* %p, align 1 + ret i8 %v +} + +; --------------------------------------------------------------------- +; i8 store +; --------------------------------------------------------------------- +define void @test_store_i8(i8* %p, i8 %v) { +; ALL-LABEL: test_store_i8: +; ALL: # %bb.0: +; ALL-NEXT: *(u8 *)(r1 + 0) = w{{[0-9]+}} +; ALL-NEXT: exit + store i8 %v, i8* %p, align 1 + ret void +} + +; --------------------------------------------------------------------- +; i16 load +; --------------------------------------------------------------------- +define i16 @test_load_i16(i16* %p) { +; MISALIGN-LABEL: test_load_i16: +; MISALIGN: # %bb.0: +; MISALIGN: w{{[0-9]+}} = *(u16 *)(r1 + 0) +; MISALIGN: exit +; +; ALIGN-LABEL: test_load_i16: +; ALIGN: # %bb.0: +; ALIGN-DAG: w{{[0-9]+}} = *(u8 *)(r1 + 0) +; ALIGN-DAG: w{{[0-9]+}} = *(u8 *)(r1 + 1) +; ALIGN-DAG: w{{[0-9]+}} <<= 8 +; ALIGN-DAG: w{{[0-9]+}} |= w{{[0-9]+}} +; ALIGN: exit + %v = load i16, i16* %p, align 1 + ret i16 %v +} + +; --------------------------------------------------------------------- +; i16 store +; --------------------------------------------------------------------- +define void @test_store_i16(i16* %p, i16 %v) { +; MISALIGN-LABEL: test_store_i16: +; MISALIGN: # %bb.0: +; MISALIGN: *(u16 *)(r1 + 0) = w{{[0-9]+}} +; MISALIGN: exit +; +; ALIGN-LABEL: test_store_i16: +; ALIGN: # %bb.0: +; ALIGN-DAG: *(u8 *)(r1 + 0) = w{{[0-9]+}} +; ALIGN-DAG: w{{[0-9]+}} >>= 8 +; ALIGN-DAG: *(u8 *)(r1 + 1) = w{{[0-9]+}} +; ALIGN: exit + store i16 %v, i16* %p, align 1 + ret void +} + +; --------------------------------------------------------------------- +; i32 load +; --------------------------------------------------------------------- + +define i32 @test_load_i32(i32* %p) { +; MISALIGN-LABEL: test_load_i32: +; MISALIGN: # %bb.0: +; MISALIGN: w{{[0-9]+}} = *(u32 *)(r1 + 0) +; MISALIGN: exit +; +; ALIGN-LABEL: test_load_i32: +; ALIGN: # %bb.0: +; ALIGN-DAG: w{{[0-9]+}} = *(u8 *)(r1 + 0) +; ALIGN-DAG: w{{[0-9]+}} <<= 8 +; ALIGN-DAG: w{{[0-9]+}} = *(u8 *)(r1 + 1) +; ALIGN-DAG: w{{[0-9]+}} |= w{{[0-9]+}} +; ALIGN-DAG: w{{[0-9]+}} = *(u8 *)(r1 + 2) +; ALIGN-DAG: w{{[0-9]+}} <<= 16 +; ALIGN-DAG: w{{[0-9]+}} = *(u8 *)(r1 + 3) +; ALIGN-DAG: w{{[0-9]+}} <<= 24 +; ALIGN: exit + %v = load i32, i32* %p, align 1 + ret i32 %v +} + +; --------------------------------------------------------------------- +; i32 store +; --------------------------------------------------------------------- + +define void @test_store_i32(i32* %p, i32 %v) { +; MISALIGN-LABEL: test_store_i32: +; MISALIGN: # %bb.0: +; MISALIGN: *(u32 *)(r1 + 0) = w{{[0-9]+}} +; MISALIGN: exit +; +; ALIGN-LABEL: test_store_i32: +; ALIGN: # %bb.0: +; ALIGN-DAG: w{{[0-9]+}} = w{{[0-9]+}} +; ALIGN-DAG: w{{[0-9]+}} >>= 24 +; ALIGN-DAG: *(u8 *)(r1 + 0) = w{{[0-9]+}} +; ALIGN-DAG: w{{[0-9]+}} = w{{[0-9]+}} +; ALIGN-DAG: w{{[0-9]+}} >>= 16 +; ALIGN-DAG: *(u8 *)(r1 + 1) = w{{[0-9]+}} +; ALIGN-DAG: *(u8 *)(r1 + 2) = w{{[0-9]+}} +; ALIGN-DAG: w{{[0-9]+}} >>= 8 +; ALIGN-DAG: *(u8 *)(r1 + 3) = w{{[0-9]+}} +; ALIGN: exit + store i32 %v, i32* %p, align 1 + ret void +} + +; --------------------------------------------------------------------- +; i64 load +; --------------------------------------------------------------------- + +define i64 @test_load_i64(i64* %p) { +; MISALIGN-LABEL: test_load_i64: +; MISALIGN: # %bb.0: +; MISALIGN: r0 = *(u64 *)(r1 + 0) +; MISALIGN: exit +; +; ALIGN-LABEL: test_load_i64: +; ALIGN: # %bb.0: +; ALIGN-DAG: w{{[0-9]+}} = *(u8 *)(r1 + 0) +; ALIGN-DAG: w{{[0-9]+}} = *(u8 *)(r1 + 1) +; ALIGN-DAG: r{{[0-9]+}} <<= 8 +; ALIGN-DAG: r{{[0-9]+}} |= r{{[0-9]+}} +; ALIGN-DAG: w{{[0-9]+}} = *(u8 *)(r1 + 2) +; ALIGN-DAG: r{{[0-9]+}} <<= 16 +; ALIGN-DAG: w{{[0-9]+}} = *(u8 *)(r1 + 3) +; ALIGN-DAG: r{{[0-9]+}} <<= 24 +; ALIGN-DAG: w{{[0-9]+}} = *(u8 *)(r1 + 4) +; ALIGN-DAG: w{{[0-9]+}} <<= 8 +; ALIGN-DAG: w{{[0-9]+}} = *(u8 *)(r1 + 5) +; ALIGN-DAG: w{{[0-9]+}} |= w{{[0-9]+}} +; ALIGN-DAG: w{{[0-9]+}} = *(u8 *)(r1 + 6) +; ALIGN-DAG: w{{[0-9]+}} <<= 16 +; ALIGN-DAG: w{{[0-9]+}} = *(u8 *)(r1 + 7) +; ALIGN-DAG: w{{[0-9]+}} <<= 24 +; ALIGN-DAG: r{{[0-9]+}} <<= 32 +; ALIGN: exit + %v = load i64, i64* %p, align 1 + ret i64 %v +} + +; --------------------------------------------------------------------- +; i64 store +; --------------------------------------------------------------------- + +define void @test_store_i64(i64* %p, i64 %v) { +; MISALIGN-LABEL: test_store_i64: +; MISALIGN: # %bb.0: +; MISALIGN: *(u64 *)(r1 + 0) = r2 +; MISALIGN: exit +; +; ALIGN-LABEL: test_store_i64: +; ALIGN: # %bb.0: +; ALIGN-DAG: *(u8 *)(r1 + 0) = w{{[0-9]+}} +; ALIGN-DAG: r{{[0-9]+}} = r{{[0-9]+}} +; ALIGN-DAG: r{{[0-9]+}} >>= 56 +; ALIGN-DAG: *(u8 *)(r1 + 1) = w{{[0-9]+}} +; ALIGN-DAG: r{{[0-9]+}} >>= 48 +; ALIGN-DAG: *(u8 *)(r1 + 2) = w{{[0-9]+}} +; ALIGN-DAG: r{{[0-9]+}} >>= 40 +; ALIGN-DAG: *(u8 *)(r1 + 3) = w{{[0-9]+}} +; ALIGN-DAG: r{{[0-9]+}} >>= 32 +; ALIGN-DAG: *(u8 *)(r1 + 4) = w{{[0-9]+}} +; ALIGN-DAG: r{{[0-9]+}} >>= 24 +; ALIGN-DAG: *(u8 *)(r1 + 5) = w{{[0-9]+}} +; ALIGN-DAG: r{{[0-9]+}} >>= 16 +; ALIGN-DAG: *(u8 *)(r1 + 6) = w{{[0-9]+}} +; ALIGN-DAG: r{{[0-9]+}} >>= 8 +; ALIGN-DAG: *(u8 *)(r1 + 7) = w{{[0-9]+}} +; ALIGN: exit + store i64 %v, i64* %p, align 1 + ret void +} diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll index 4c9a98cabb15f..4533e14c672e7 100644 --- a/llvm/test/CodeGen/RISCV/mul.ll +++ b/llvm/test/CodeGen/RISCV/mul.ll @@ -1185,29 +1185,29 @@ define i32 @muli32_p384(i32 %a) nounwind { ; RV32I-LABEL: muli32_p384: ; RV32I: # %bb.0: ; RV32I-NEXT: slli a1, a0, 7 -; RV32I-NEXT: slli a0, a0, 9 -; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli32_p384: ; RV32IM: # %bb.0: ; RV32IM-NEXT: slli a1, a0, 7 -; RV32IM-NEXT: slli a0, a0, 9 -; RV32IM-NEXT: sub a0, a0, a1 +; RV32IM-NEXT: slli a0, a0, 8 +; RV32IM-NEXT: add a0, a0, a1 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: muli32_p384: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 7 -; RV64I-NEXT: slli a0, a0, 9 -; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli32_p384: ; RV64IM: # %bb.0: ; RV64IM-NEXT: slli a1, a0, 7 -; RV64IM-NEXT: slli a0, a0, 9 -; RV64IM-NEXT: subw a0, a0, a1 +; RV64IM-NEXT: slli a0, a0, 8 +; RV64IM-NEXT: addw a0, a0, a1 ; RV64IM-NEXT: ret %1 = mul i32 %a, 384 ret i32 %1 @@ -1217,29 +1217,29 @@ define i32 @muli32_p12288(i32 %a) nounwind { ; RV32I-LABEL: muli32_p12288: ; RV32I: # %bb.0: ; RV32I-NEXT: slli a1, a0, 12 -; RV32I-NEXT: slli a0, a0, 14 -; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: slli a0, a0, 13 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli32_p12288: ; RV32IM: # %bb.0: ; RV32IM-NEXT: slli a1, a0, 12 -; RV32IM-NEXT: slli a0, a0, 14 -; RV32IM-NEXT: sub a0, a0, a1 +; RV32IM-NEXT: slli a0, a0, 13 +; RV32IM-NEXT: add a0, a0, a1 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: muli32_p12288: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 12 -; RV64I-NEXT: slli a0, a0, 14 -; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 13 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli32_p12288: ; RV64IM: # %bb.0: ; RV64IM-NEXT: slli a1, a0, 12 -; RV64IM-NEXT: slli a0, a0, 14 -; RV64IM-NEXT: subw a0, a0, a1 +; RV64IM-NEXT: slli a0, a0, 13 +; RV64IM-NEXT: addw a0, a0, a1 ; RV64IM-NEXT: ret %1 = mul i32 %a, 12288 ret i32 %1 @@ -2117,14 +2117,14 @@ define i64 @muland_demand(i64 %x) nounwind { ; RV32IM: # %bb.0: ; RV32IM-NEXT: andi a0, a0, -8 ; RV32IM-NEXT: slli a2, a1, 2 -; RV32IM-NEXT: slli a1, a1, 4 -; RV32IM-NEXT: sub a1, a1, a2 +; RV32IM-NEXT: slli a1, a1, 3 +; RV32IM-NEXT: add a1, a1, a2 ; RV32IM-NEXT: li a2, 12 ; RV32IM-NEXT: mulhu a2, a0, a2 ; RV32IM-NEXT: add a1, a2, a1 ; RV32IM-NEXT: slli a2, a0, 2 -; RV32IM-NEXT: slli a0, a0, 4 -; RV32IM-NEXT: sub a0, a0, a2 +; RV32IM-NEXT: slli a0, a0, 3 +; RV32IM-NEXT: add a0, a0, a2 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: muland_demand: @@ -2133,16 +2133,16 @@ define i64 @muland_demand(i64 %x) nounwind { ; RV64I-NEXT: srli a1, a1, 2 ; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: slli a0, a0, 4 -; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 3 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muland_demand: ; RV64IM: # %bb.0: ; RV64IM-NEXT: andi a0, a0, -8 ; RV64IM-NEXT: slli a1, a0, 2 -; RV64IM-NEXT: slli a0, a0, 4 -; RV64IM-NEXT: sub a0, a0, a1 +; RV64IM-NEXT: slli a0, a0, 3 +; RV64IM-NEXT: add a0, a0, a1 ; RV64IM-NEXT: ret %and = and i64 %x, 4611686018427387896 %mul = mul i64 %and, 12 @@ -2171,15 +2171,15 @@ define i64 @mulzext_demand(i32 signext %x) nounwind { ; RV64I-LABEL: mulzext_demand: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 32 -; RV64I-NEXT: slli a0, a0, 34 -; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 33 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: mulzext_demand: ; RV64IM: # %bb.0: ; RV64IM-NEXT: slli a1, a0, 32 -; RV64IM-NEXT: slli a0, a0, 34 -; RV64IM-NEXT: sub a0, a0, a1 +; RV64IM-NEXT: slli a0, a0, 33 +; RV64IM-NEXT: add a0, a0, a1 ; RV64IM-NEXT: ret %ext = zext i32 %x to i64 %mul = mul i64 %ext, 12884901888 diff --git a/llvm/test/CodeGen/RISCV/pr145360.ll b/llvm/test/CodeGen/RISCV/pr145360.ll index 1c77fadbd4b7d..013bab4ce6292 100644 --- a/llvm/test/CodeGen/RISCV/pr145360.ll +++ b/llvm/test/CodeGen/RISCV/pr145360.ll @@ -27,11 +27,11 @@ define i32 @unsigned(i32 %0, ptr %1) { ; CHECK-NEXT: slli a4, a3, 32 ; CHECK-NEXT: mulhu a2, a2, a4 ; CHECK-NEXT: srli a2, a2, 36 -; CHECK-NEXT: slli a4, a2, 5 -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: sub a2, a2, a4 +; CHECK-NEXT: slli a4, a2, 3 +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, a2, a4 ; CHECK-NEXT: srliw a4, a0, 3 -; CHECK-NEXT: add a2, a0, a2 +; CHECK-NEXT: sub a2, a0, a2 ; CHECK-NEXT: mulw a0, a4, a3 ; CHECK-NEXT: sw a2, 0(a1) ; CHECK-NEXT: ret @@ -68,10 +68,10 @@ define i32 @unsigned_div_first(i32 %0, ptr %1) { ; CHECK-NEXT: slli a3, a3, 32 ; CHECK-NEXT: mulhu a2, a2, a3 ; CHECK-NEXT: srli a2, a2, 36 -; CHECK-NEXT: slli a3, a2, 5 -; CHECK-NEXT: slli a4, a2, 3 -; CHECK-NEXT: sub a4, a4, a3 -; CHECK-NEXT: add a0, a0, a4 +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: slli a4, a2, 4 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: sub a0, a0, a3 ; CHECK-NEXT: sw a0, 0(a1) ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rv32xtheadba.ll b/llvm/test/CodeGen/RISCV/rv32xtheadba.ll index 0e4a5c07020ee..fd341da86599f 100644 --- a/llvm/test/CodeGen/RISCV/rv32xtheadba.ll +++ b/llvm/test/CodeGen/RISCV/rv32xtheadba.ll @@ -98,8 +98,8 @@ define i32 @addmul6(i32 %a, i32 %b) { ; RV32I-LABEL: addmul6: ; RV32I: # %bb.0: ; RV32I-NEXT: slli a2, a0, 1 -; RV32I-NEXT: slli a0, a0, 3 -; RV32I-NEXT: sub a0, a0, a2 +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; @@ -136,8 +136,8 @@ define i32 @addmul12(i32 %a, i32 %b) { ; RV32I-LABEL: addmul12: ; RV32I: # %bb.0: ; RV32I-NEXT: slli a2, a0, 2 -; RV32I-NEXT: slli a0, a0, 4 -; RV32I-NEXT: sub a0, a0, a2 +; RV32I-NEXT: slli a0, a0, 3 +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; @@ -193,8 +193,8 @@ define i32 @addmul24(i32 %a, i32 %b) { ; RV32I-LABEL: addmul24: ; RV32I: # %bb.0: ; RV32I-NEXT: slli a2, a0, 3 -; RV32I-NEXT: slli a0, a0, 5 -; RV32I-NEXT: sub a0, a0, a2 +; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; @@ -269,8 +269,8 @@ define i32 @mul96(i32 %a) { ; RV32I-LABEL: mul96: ; RV32I: # %bb.0: ; RV32I-NEXT: slli a1, a0, 5 -; RV32I-NEXT: slli a0, a0, 7 -; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: slli a0, a0, 6 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32XTHEADBA-LABEL: mul96: diff --git a/llvm/test/CodeGen/RISCV/rv32zba.ll b/llvm/test/CodeGen/RISCV/rv32zba.ll index a6dbd94caad4f..ea9d117f2e2e3 100644 --- a/llvm/test/CodeGen/RISCV/rv32zba.ll +++ b/llvm/test/CodeGen/RISCV/rv32zba.ll @@ -85,8 +85,8 @@ define i32 @addmul6(i32 %a, i32 %b) { ; RV32I-LABEL: addmul6: ; RV32I: # %bb.0: ; RV32I-NEXT: slli a2, a0, 1 -; RV32I-NEXT: slli a0, a0, 3 -; RV32I-NEXT: sub a0, a0, a2 +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; @@ -135,8 +135,8 @@ define i32 @addmul12(i32 %a, i32 %b) { ; RV32I-LABEL: addmul12: ; RV32I: # %bb.0: ; RV32I-NEXT: slli a2, a0, 2 -; RV32I-NEXT: slli a0, a0, 4 -; RV32I-NEXT: sub a0, a0, a2 +; RV32I-NEXT: slli a0, a0, 3 +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; @@ -210,8 +210,8 @@ define i32 @addmul24(i32 %a, i32 %b) { ; RV32I-LABEL: addmul24: ; RV32I: # %bb.0: ; RV32I-NEXT: slli a2, a0, 3 -; RV32I-NEXT: slli a0, a0, 5 -; RV32I-NEXT: sub a0, a0, a2 +; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; @@ -310,8 +310,8 @@ define i32 @mul96(i32 %a) { ; RV32I-LABEL: mul96: ; RV32I: # %bb.0: ; RV32I-NEXT: slli a1, a0, 5 -; RV32I-NEXT: slli a0, a0, 7 -; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: slli a0, a0, 6 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32ZBA-LABEL: mul96: @@ -1272,8 +1272,8 @@ define ptr @shl_add_knownbits(ptr %p, i32 %i) { ; RV32I-NEXT: slli a1, a1, 18 ; RV32I-NEXT: srli a1, a1, 18 ; RV32I-NEXT: slli a2, a1, 1 -; RV32I-NEXT: slli a1, a1, 3 -; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: slli a1, a1, 2 +; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: srli a1, a1, 3 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadba.ll b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll index f4964288e3541..c57dfca1389b6 100644 --- a/llvm/test/CodeGen/RISCV/rv64xtheadba.ll +++ b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll @@ -94,8 +94,8 @@ define i64 @addmul6(i64 %a, i64 %b) { ; RV64I-LABEL: addmul6: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a2, a0, 1 -; RV64I-NEXT: slli a0, a0, 3 -; RV64I-NEXT: sub a0, a0, a2 +; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; @@ -113,8 +113,8 @@ define i64 @disjointormul6(i64 %a, i64 %b) { ; RV64I-LABEL: disjointormul6: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a2, a0, 1 -; RV64I-NEXT: slli a0, a0, 3 -; RV64I-NEXT: sub a0, a0, a2 +; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; @@ -151,8 +151,8 @@ define i64 @addmul12(i64 %a, i64 %b) { ; RV64I-LABEL: addmul12: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a2, a0, 2 -; RV64I-NEXT: slli a0, a0, 4 -; RV64I-NEXT: sub a0, a0, a2 +; RV64I-NEXT: slli a0, a0, 3 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; @@ -227,8 +227,8 @@ define i64 @addmul24(i64 %a, i64 %b) { ; RV64I-LABEL: addmul24: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a2, a0, 3 -; RV64I-NEXT: slli a0, a0, 5 -; RV64I-NEXT: sub a0, a0, a2 +; RV64I-NEXT: slli a0, a0, 4 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; @@ -527,8 +527,8 @@ define i64 @mul96(i64 %a) { ; RV64I-LABEL: mul96: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 5 -; RV64I-NEXT: slli a0, a0, 7 -; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 6 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64XTHEADBA-LABEL: mul96: @@ -990,8 +990,8 @@ define signext i32 @mulw192(i32 signext %a) { ; RV64I-LABEL: mulw192: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 6 -; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: subw a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 7 +; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64XTHEADBA-LABEL: mulw192: diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index 156599fb72877..4ab4ff84dac57 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -489,8 +489,8 @@ define i64 @addmul6(i64 %a, i64 %b) { ; RV64I-LABEL: addmul6: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a2, a0, 1 -; RV64I-NEXT: slli a0, a0, 3 -; RV64I-NEXT: sub a0, a0, a2 +; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; @@ -514,8 +514,8 @@ define i64 @disjointormul6(i64 %a, i64 %b) { ; RV64I-LABEL: disjointormul6: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a2, a0, 1 -; RV64I-NEXT: slli a0, a0, 3 -; RV64I-NEXT: sub a0, a0, a2 +; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; @@ -564,8 +564,8 @@ define i64 @addmul12(i64 %a, i64 %b) { ; RV64I-LABEL: addmul12: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a2, a0, 2 -; RV64I-NEXT: slli a0, a0, 4 -; RV64I-NEXT: sub a0, a0, a2 +; RV64I-NEXT: slli a0, a0, 3 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; @@ -692,8 +692,8 @@ define i64 @addmul24(i64 %a, i64 %b) { ; RV64I-LABEL: addmul24: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a2, a0, 3 -; RV64I-NEXT: slli a0, a0, 5 -; RV64I-NEXT: sub a0, a0, a2 +; RV64I-NEXT: slli a0, a0, 4 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; @@ -1350,8 +1350,8 @@ define i64 @mul96(i64 %a) { ; RV64I-LABEL: mul96: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 5 -; RV64I-NEXT: slli a0, a0, 7 -; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 6 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBA-LABEL: mul96: @@ -1618,8 +1618,8 @@ define i64 @zext_mul96(i32 signext %a) { ; RV64I: # %bb.0: ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: srli a1, a0, 27 -; RV64I-NEXT: srli a0, a0, 25 -; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: srli a0, a0, 26 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBA-LABEL: zext_mul96: @@ -1724,8 +1724,8 @@ define i64 @zext_mul12884901888(i32 signext %a) { ; RV64I-LABEL: zext_mul12884901888: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 32 -; RV64I-NEXT: slli a0, a0, 34 -; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 33 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBA-LABEL: zext_mul12884901888: @@ -2336,8 +2336,8 @@ define signext i32 @mulw192(i32 signext %a) { ; RV64I-LABEL: mulw192: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 6 -; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: subw a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 7 +; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBA-LABEL: mulw192: @@ -4055,8 +4055,8 @@ define i64 @regression(i32 signext %x, i32 signext %y) { ; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: srli a1, a0, 29 -; RV64I-NEXT: srli a0, a0, 27 -; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: srli a0, a0, 28 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBA-LABEL: regression: @@ -4190,8 +4190,8 @@ define i64 @bext_mul12(i32 %1, i32 %2) { ; RV64I-NEXT: srlw a0, a0, a1 ; RV64I-NEXT: andi a0, a0, 1 ; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: slli a0, a0, 4 -; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 3 +; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBANOZBB-LABEL: bext_mul12: @@ -4988,8 +4988,8 @@ define ptr @shl_add_knownbits(ptr %p, i64 %i) { ; RV64I-NEXT: slli a1, a1, 50 ; RV64I-NEXT: srli a1, a1, 50 ; RV64I-NEXT: slli a2, a1, 1 -; RV64I-NEXT: slli a1, a1, 3 -; RV64I-NEXT: sub a1, a1, a2 +; RV64I-NEXT: slli a1, a1, 2 +; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: srli a1, a1, 3 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll index bd912193c4fed..39732602cc85e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll @@ -72,9 +72,8 @@ define fastcc @ret_split_nxv64i32(ptr %x) { ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: vl8re32.v v8, (a1) ; CHECK-NEXT: slli a3, a2, 3 -; CHECK-NEXT: slli a4, a2, 5 ; CHECK-NEXT: slli a2, a2, 4 -; CHECK-NEXT: sub a4, a4, a3 +; CHECK-NEXT: add a4, a2, a3 ; CHECK-NEXT: add a5, a1, a2 ; CHECK-NEXT: vl8re32.v v16, (a5) ; CHECK-NEXT: add a5, a1, a3 @@ -112,16 +111,16 @@ define fastcc @ret_split_nxv128i32(ptr %x) { ; CHECK-NEXT: addi a3, a3, 16 ; CHECK-NEXT: vs8r.v v8, (a3) # vscale x 64-byte Folded Spill ; CHECK-NEXT: slli a3, a2, 3 -; CHECK-NEXT: slli a4, a2, 5 -; CHECK-NEXT: slli a5, a2, 4 +; CHECK-NEXT: slli a4, a2, 4 +; CHECK-NEXT: slli a5, a2, 5 ; CHECK-NEXT: slli a2, a2, 6 -; CHECK-NEXT: sub a6, a4, a3 -; CHECK-NEXT: add a7, a4, a3 -; CHECK-NEXT: sub t0, a2, a5 +; CHECK-NEXT: add a6, a4, a3 +; CHECK-NEXT: add a7, a5, a3 +; CHECK-NEXT: add t0, a5, a4 ; CHECK-NEXT: sub a2, a2, a3 ; CHECK-NEXT: add t1, a1, a3 -; CHECK-NEXT: add t2, a1, a5 -; CHECK-NEXT: add t3, a1, a4 +; CHECK-NEXT: add t2, a1, a4 +; CHECK-NEXT: add t3, a1, a5 ; CHECK-NEXT: vl8re32.v v8, (t1) ; CHECK-NEXT: csrr t1, vlenb ; CHECK-NEXT: slli t1, t1, 4 @@ -157,12 +156,12 @@ define fastcc @ret_split_nxv128i32(ptr %x) { ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload ; CHECK-NEXT: vs8r.v v0, (a0) -; CHECK-NEXT: add a4, a0, a4 -; CHECK-NEXT: vs8r.v v16, (a4) ; CHECK-NEXT: add a5, a0, a5 +; CHECK-NEXT: vs8r.v v16, (a5) +; CHECK-NEXT: add a4, a0, a4 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vs8r.v v16, (a5) +; CHECK-NEXT: vs8r.v v16, (a4) ; CHECK-NEXT: add a3, a0, a3 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll index 692a7ce0b20e8..903c0dcaba2d8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll @@ -5,6 +5,8 @@ ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,NOZFMIN,ZVFHMIN ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfhmin,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZFMIN ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfhmin,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZFMIN +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfhmin,+zvfhmin,+experimental-zvfbfa -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfhmin,+zvfhmin,+experimental-zvfbfa -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVFBFA define bfloat @extractelt_nxv1bf16_0( %v) { ; NOZFMIN-LABEL: extractelt_nxv1bf16_0: @@ -22,6 +24,12 @@ define bfloat @extractelt_nxv1bf16_0( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv1bf16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 0 ret bfloat %r } @@ -44,6 +52,13 @@ define bfloat @extractelt_nxv1bf16_imm( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv1bf16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vslidedown.vi v8, v8, 2 +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 2 ret bfloat %r } @@ -66,6 +81,13 @@ define bfloat @extractelt_nxv1bf16_idx( %v, i32 zeroext %id ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv1bf16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vslidedown.vx v8, v8, a0 +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 %idx ret bfloat %r } @@ -86,6 +108,12 @@ define bfloat @extractelt_nxv2bf16_0( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv2bf16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 0 ret bfloat %r } @@ -108,6 +136,13 @@ define bfloat @extractelt_nxv2bf16_imm( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv2bf16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vslidedown.vi v8, v8, 2 +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 2 ret bfloat %r } @@ -130,6 +165,13 @@ define bfloat @extractelt_nxv2bf16_idx( %v, i32 zeroext %id ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv2bf16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vslidedown.vx v8, v8, a0 +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 %idx ret bfloat %r } @@ -150,6 +192,12 @@ define bfloat @extractelt_nxv4bf16_0( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv4bf16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 0 ret bfloat %r } @@ -172,6 +220,13 @@ define bfloat @extractelt_nxv4bf16_imm( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv4bf16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vslidedown.vi v8, v8, 2 +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 2 ret bfloat %r } @@ -194,6 +249,13 @@ define bfloat @extractelt_nxv4bf16_idx( %v, i32 zeroext %id ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv4bf16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vslidedown.vx v8, v8, a0 +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 %idx ret bfloat %r } @@ -214,6 +276,12 @@ define bfloat @extractelt_nxv8bf16_0( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv8bf16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 0 ret bfloat %r } @@ -236,6 +304,13 @@ define bfloat @extractelt_nxv8bf16_imm( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv8bf16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vslidedown.vi v8, v8, 2 +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 2 ret bfloat %r } @@ -258,6 +333,14 @@ define bfloat @extractelt_nxv8bf16_idx( %v, i32 zeroext %id ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv8bf16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m2, ta, ma +; ZVFBFA-NEXT: vslidedown.vx v8, v8, a0 +; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 %idx ret bfloat %r } @@ -278,6 +361,12 @@ define bfloat @extractelt_nxv16bf16_0( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv16bf16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 0 ret bfloat %r } @@ -300,6 +389,13 @@ define bfloat @extractelt_nxv16bf16_imm( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv16bf16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vslidedown.vi v8, v8, 2 +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 2 ret bfloat %r } @@ -322,6 +418,14 @@ define bfloat @extractelt_nxv16bf16_idx( %v, i32 zeroext % ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv16bf16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m4, ta, ma +; ZVFBFA-NEXT: vslidedown.vx v8, v8, a0 +; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 %idx ret bfloat %r } @@ -342,6 +446,12 @@ define bfloat @extractelt_nxv32bf16_0( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv32bf16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 0 ret bfloat %r } @@ -364,6 +474,13 @@ define bfloat @extractelt_nxv32bf16_imm( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv32bf16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vslidedown.vi v8, v8, 2 +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 2 ret bfloat %r } @@ -386,6 +503,14 @@ define bfloat @extractelt_nxv32bf16_idx( %v, i32 zeroext % ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv32bf16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m8, ta, ma +; ZVFBFA-NEXT: vslidedown.vx v8, v8, a0 +; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfmv.f.s fa0, v8 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 %idx ret bfloat %r } @@ -412,6 +537,13 @@ define half @extractelt_nxv1f16_0( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv1f16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 0 ret half %r } @@ -441,6 +573,14 @@ define half @extractelt_nxv1f16_imm( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv1f16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; ZVFBFA-NEXT: vslidedown.vi v8, v8, 2 +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 2 ret half %r } @@ -470,6 +610,14 @@ define half @extractelt_nxv1f16_idx( %v, i32 zeroext %idx) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv1f16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; ZVFBFA-NEXT: vslidedown.vx v8, v8, a0 +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 %idx ret half %r } @@ -496,6 +644,13 @@ define half @extractelt_nxv2f16_0( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv2f16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 0 ret half %r } @@ -525,6 +680,14 @@ define half @extractelt_nxv2f16_imm( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv2f16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; ZVFBFA-NEXT: vslidedown.vi v8, v8, 2 +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 2 ret half %r } @@ -554,6 +717,14 @@ define half @extractelt_nxv2f16_idx( %v, i32 zeroext %idx) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv2f16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; ZVFBFA-NEXT: vslidedown.vx v8, v8, a0 +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 %idx ret half %r } @@ -580,6 +751,13 @@ define half @extractelt_nxv4f16_0( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv4f16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 0 ret half %r } @@ -609,6 +787,14 @@ define half @extractelt_nxv4f16_imm( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv4f16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFBFA-NEXT: vslidedown.vi v8, v8, 2 +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 2 ret half %r } @@ -638,6 +824,14 @@ define half @extractelt_nxv4f16_idx( %v, i32 zeroext %idx) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv4f16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFBFA-NEXT: vslidedown.vx v8, v8, a0 +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 %idx ret half %r } @@ -664,6 +858,13 @@ define half @extractelt_nxv8f16_0( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv8f16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 0 ret half %r } @@ -693,6 +894,14 @@ define half @extractelt_nxv8f16_imm( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv8f16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFBFA-NEXT: vslidedown.vi v8, v8, 2 +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 2 ret half %r } @@ -722,6 +931,14 @@ define half @extractelt_nxv8f16_idx( %v, i32 zeroext %idx) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv8f16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m2, ta, ma +; ZVFBFA-NEXT: vslidedown.vx v8, v8, a0 +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 %idx ret half %r } @@ -748,6 +965,13 @@ define half @extractelt_nxv16f16_0( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv16f16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 0 ret half %r } @@ -777,6 +1001,14 @@ define half @extractelt_nxv16f16_imm( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv16f16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFBFA-NEXT: vslidedown.vi v8, v8, 2 +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 2 ret half %r } @@ -806,6 +1038,14 @@ define half @extractelt_nxv16f16_idx( %v, i32 zeroext %idx) ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv16f16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m4, ta, ma +; ZVFBFA-NEXT: vslidedown.vx v8, v8, a0 +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 %idx ret half %r } @@ -832,6 +1072,13 @@ define half @extractelt_nxv32f16_0( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv32f16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 0 ret half %r } @@ -861,6 +1108,14 @@ define half @extractelt_nxv32f16_imm( %v) { ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv32f16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFBFA-NEXT: vslidedown.vi v8, v8, 2 +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 2 ret half %r } @@ -890,6 +1145,14 @@ define half @extractelt_nxv32f16_idx( %v, i32 zeroext %idx) ; ZFMIN-NEXT: vmv.x.s a0, v8 ; ZFMIN-NEXT: fmv.h.x fa0, a0 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: extractelt_nxv32f16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 1, e16, m8, ta, ma +; ZVFBFA-NEXT: vslidedown.vx v8, v8, a0 +; ZVFBFA-NEXT: vmv.x.s a0, v8 +; ZVFBFA-NEXT: fmv.h.x fa0, a0 +; ZVFBFA-NEXT: ret %r = extractelement %v, i32 %idx ret half %r } diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll index 607e0085c3f46..7c6e0cea706d7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll @@ -7,225 +7,511 @@ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zfbfmin,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+experimental-zvfbfa,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+experimental-zvfbfa,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA define @insertelt_nxv1bf16_0( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv1bf16_0: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv1bf16_0: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a0, fa0 +; ZVFH-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFH-NEXT: vmv.s.x v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv1bf16_0: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv1bf16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, tu, ma +; ZVFBFA-NEXT: vfmv.s.f v8, fa0 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 0 ret %r } define @insertelt_nxv1bf16_imm( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv1bf16_imm: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetivli zero, 4, e16, mf4, tu, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vslideup.vi v8, v9, 3 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv1bf16_imm: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a0, fa0 +; ZVFH-NEXT: vsetivli zero, 4, e16, mf4, tu, ma +; ZVFH-NEXT: vmv.s.x v9, a0 +; ZVFH-NEXT: vslideup.vi v8, v9, 3 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv1bf16_imm: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf4, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v9, a0 +; ZVFHMIN-NEXT: vslideup.vi v8, v9, 3 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv1bf16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf4, tu, ma +; ZVFBFA-NEXT: vfmv.s.f v9, fa0 +; ZVFBFA-NEXT: vslideup.vi v8, v9, 3 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 3 ret %r } define @insertelt_nxv1bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { -; CHECK-LABEL: insertelt_nxv1bf16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, 1 -; CHECK-NEXT: fmv.x.h a2, fa0 -; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a2 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv1bf16_idx: +; ZVFH: # %bb.0: +; ZVFH-NEXT: addi a1, a0, 1 +; ZVFH-NEXT: fmv.x.h a2, fa0 +; ZVFH-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; ZVFH-NEXT: vmv.s.x v9, a2 +; ZVFH-NEXT: vsetvli zero, a1, e16, mf4, tu, ma +; ZVFH-NEXT: vslideup.vx v8, v9, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv1bf16_idx: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi a1, a0, 1 +; ZVFHMIN-NEXT: fmv.x.h a2, fa0 +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.s.x v9, a2 +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, mf4, tu, ma +; ZVFHMIN-NEXT: vslideup.vx v8, v9, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv1bf16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: addi a1, a0, 1 +; ZVFBFA-NEXT: vsetvli a2, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmv.s.f v9, fa0 +; ZVFBFA-NEXT: vsetvli zero, a1, e16alt, mf4, tu, ma +; ZVFBFA-NEXT: vslideup.vx v8, v9, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 %idx ret %r } define @insertelt_nxv2bf16_0( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv2bf16_0: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv2bf16_0: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a0, fa0 +; ZVFH-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFH-NEXT: vmv.s.x v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv2bf16_0: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv2bf16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, tu, ma +; ZVFBFA-NEXT: vfmv.s.f v8, fa0 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 0 ret %r } define @insertelt_nxv2bf16_imm( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv2bf16_imm: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vslideup.vi v8, v9, 3 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv2bf16_imm: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a0, fa0 +; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, tu, ma +; ZVFH-NEXT: vmv.s.x v9, a0 +; ZVFH-NEXT: vslideup.vi v8, v9, 3 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv2bf16_imm: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v9, a0 +; ZVFHMIN-NEXT: vslideup.vi v8, v9, 3 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv2bf16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, tu, ma +; ZVFBFA-NEXT: vfmv.s.f v9, fa0 +; ZVFBFA-NEXT: vslideup.vi v8, v9, 3 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 3 ret %r } define @insertelt_nxv2bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { -; CHECK-LABEL: insertelt_nxv2bf16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, 1 -; CHECK-NEXT: fmv.x.h a2, fa0 -; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a2 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv2bf16_idx: +; ZVFH: # %bb.0: +; ZVFH-NEXT: addi a1, a0, 1 +; ZVFH-NEXT: fmv.x.h a2, fa0 +; ZVFH-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; ZVFH-NEXT: vmv.s.x v9, a2 +; ZVFH-NEXT: vsetvli zero, a1, e16, mf2, tu, ma +; ZVFH-NEXT: vslideup.vx v8, v9, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv2bf16_idx: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi a1, a0, 1 +; ZVFHMIN-NEXT: fmv.x.h a2, fa0 +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.s.x v9, a2 +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, mf2, tu, ma +; ZVFHMIN-NEXT: vslideup.vx v8, v9, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv2bf16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: addi a1, a0, 1 +; ZVFBFA-NEXT: vsetvli a2, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmv.s.f v9, fa0 +; ZVFBFA-NEXT: vsetvli zero, a1, e16alt, mf2, tu, ma +; ZVFBFA-NEXT: vslideup.vx v8, v9, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 %idx ret %r } define @insertelt_nxv4bf16_0( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv4bf16_0: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv4bf16_0: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a0, fa0 +; ZVFH-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFH-NEXT: vmv.s.x v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv4bf16_0: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv4bf16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, tu, ma +; ZVFBFA-NEXT: vfmv.s.f v8, fa0 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 0 ret %r } define @insertelt_nxv4bf16_imm( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv4bf16_imm: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vslideup.vi v8, v9, 3 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv4bf16_imm: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a0, fa0 +; ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFH-NEXT: vmv.s.x v9, a0 +; ZVFH-NEXT: vslideup.vi v8, v9, 3 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv4bf16_imm: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v9, a0 +; ZVFHMIN-NEXT: vslideup.vi v8, v9, 3 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv4bf16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, m1, tu, ma +; ZVFBFA-NEXT: vfmv.s.f v9, fa0 +; ZVFBFA-NEXT: vslideup.vi v8, v9, 3 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 3 ret %r } define @insertelt_nxv4bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { -; CHECK-LABEL: insertelt_nxv4bf16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, 1 -; CHECK-NEXT: fmv.x.h a2, fa0 -; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a2 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv4bf16_idx: +; ZVFH: # %bb.0: +; ZVFH-NEXT: addi a1, a0, 1 +; ZVFH-NEXT: fmv.x.h a2, fa0 +; ZVFH-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; ZVFH-NEXT: vmv.s.x v9, a2 +; ZVFH-NEXT: vsetvli zero, a1, e16, m1, tu, ma +; ZVFH-NEXT: vslideup.vx v8, v9, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv4bf16_idx: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi a1, a0, 1 +; ZVFHMIN-NEXT: fmv.x.h a2, fa0 +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.s.x v9, a2 +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m1, tu, ma +; ZVFHMIN-NEXT: vslideup.vx v8, v9, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv4bf16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: addi a1, a0, 1 +; ZVFBFA-NEXT: vsetvli a2, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmv.s.f v9, fa0 +; ZVFBFA-NEXT: vsetvli zero, a1, e16alt, m1, tu, ma +; ZVFBFA-NEXT: vslideup.vx v8, v9, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 %idx ret %r } define @insertelt_nxv8bf16_0( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv8bf16_0: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv8bf16_0: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a0, fa0 +; ZVFH-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFH-NEXT: vmv.s.x v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv8bf16_0: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv8bf16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, tu, ma +; ZVFBFA-NEXT: vfmv.s.f v8, fa0 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 0 ret %r } define @insertelt_nxv8bf16_imm( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv8bf16_imm: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vslideup.vi v8, v10, 3 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv8bf16_imm: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a0, fa0 +; ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFH-NEXT: vmv.s.x v10, a0 +; ZVFH-NEXT: vslideup.vi v8, v10, 3 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv8bf16_imm: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v10, a0 +; ZVFHMIN-NEXT: vslideup.vi v8, v10, 3 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv8bf16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, m1, tu, ma +; ZVFBFA-NEXT: vfmv.s.f v10, fa0 +; ZVFBFA-NEXT: vslideup.vi v8, v10, 3 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 3 ret %r } define @insertelt_nxv8bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { -; CHECK-LABEL: insertelt_nxv8bf16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a1, fa0 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a1 -; CHECK-NEXT: addi a1, a0, 1 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, ma -; CHECK-NEXT: vslideup.vx v8, v10, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv8bf16_idx: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a1, fa0 +; ZVFH-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFH-NEXT: vmv.s.x v10, a1 +; ZVFH-NEXT: addi a1, a0, 1 +; ZVFH-NEXT: vsetvli zero, a1, e16, m2, tu, ma +; ZVFH-NEXT: vslideup.vx v8, v10, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv8bf16_idx: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.s.x v10, a1 +; ZVFHMIN-NEXT: addi a1, a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m2, tu, ma +; ZVFHMIN-NEXT: vslideup.vx v8, v10, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv8bf16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a1, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmv.s.f v10, fa0 +; ZVFBFA-NEXT: addi a1, a0, 1 +; ZVFBFA-NEXT: vsetvli zero, a1, e16alt, m2, tu, ma +; ZVFBFA-NEXT: vslideup.vx v8, v10, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 %idx ret %r } define @insertelt_nxv16bf16_0( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv16bf16_0: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv16bf16_0: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a0, fa0 +; ZVFH-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFH-NEXT: vmv.s.x v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv16bf16_0: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv16bf16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, tu, ma +; ZVFBFA-NEXT: vfmv.s.f v8, fa0 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 0 ret %r } define @insertelt_nxv16bf16_imm( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv16bf16_imm: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: vslideup.vi v8, v12, 3 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv16bf16_imm: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a0, fa0 +; ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFH-NEXT: vmv.s.x v12, a0 +; ZVFH-NEXT: vslideup.vi v8, v12, 3 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv16bf16_imm: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v12, a0 +; ZVFHMIN-NEXT: vslideup.vi v8, v12, 3 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv16bf16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, m1, tu, ma +; ZVFBFA-NEXT: vfmv.s.f v12, fa0 +; ZVFBFA-NEXT: vslideup.vi v8, v12, 3 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 3 ret %r } define @insertelt_nxv16bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { -; CHECK-LABEL: insertelt_nxv16bf16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a1, fa0 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, a1 -; CHECK-NEXT: addi a1, a0, 1 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, ma -; CHECK-NEXT: vslideup.vx v8, v12, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv16bf16_idx: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a1, fa0 +; ZVFH-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFH-NEXT: vmv.s.x v12, a1 +; ZVFH-NEXT: addi a1, a0, 1 +; ZVFH-NEXT: vsetvli zero, a1, e16, m4, tu, ma +; ZVFH-NEXT: vslideup.vx v8, v12, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv16bf16_idx: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.s.x v12, a1 +; ZVFHMIN-NEXT: addi a1, a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, tu, ma +; ZVFHMIN-NEXT: vslideup.vx v8, v12, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv16bf16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a1, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmv.s.f v12, fa0 +; ZVFBFA-NEXT: addi a1, a0, 1 +; ZVFBFA-NEXT: vsetvli zero, a1, e16alt, m4, tu, ma +; ZVFBFA-NEXT: vslideup.vx v8, v12, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 %idx ret %r } define @insertelt_nxv32bf16_0( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv32bf16_0: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv32bf16_0: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a0, fa0 +; ZVFH-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFH-NEXT: vmv.s.x v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv32bf16_0: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv32bf16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, tu, ma +; ZVFBFA-NEXT: vfmv.s.f v8, fa0 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 0 ret %r } define @insertelt_nxv32bf16_imm( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv32bf16_imm: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vslideup.vi v8, v16, 3 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv32bf16_imm: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a0, fa0 +; ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFH-NEXT: vmv.s.x v16, a0 +; ZVFH-NEXT: vslideup.vi v8, v16, 3 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv32bf16_imm: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v16, a0 +; ZVFHMIN-NEXT: vslideup.vi v8, v16, 3 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv32bf16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, m1, tu, ma +; ZVFBFA-NEXT: vfmv.s.f v16, fa0 +; ZVFBFA-NEXT: vslideup.vi v8, v16, 3 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 3 ret %r } define @insertelt_nxv32bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { -; CHECK-LABEL: insertelt_nxv32bf16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a1, fa0 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, a1 -; CHECK-NEXT: addi a1, a0, 1 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma -; CHECK-NEXT: vslideup.vx v8, v16, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: insertelt_nxv32bf16_idx: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.h a1, fa0 +; ZVFH-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFH-NEXT: vmv.s.x v16, a1 +; ZVFH-NEXT: addi a1, a0, 1 +; ZVFH-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; ZVFH-NEXT: vslideup.vx v8, v16, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv32bf16_idx: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.s.x v16, a1 +; ZVFHMIN-NEXT: addi a1, a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; ZVFHMIN-NEXT: vslideup.vx v8, v16, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv32bf16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a1, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmv.s.f v16, fa0 +; ZVFBFA-NEXT: addi a1, a0, 1 +; ZVFBFA-NEXT: vsetvli zero, a1, e16alt, m8, tu, ma +; ZVFBFA-NEXT: vslideup.vx v8, v16, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, bfloat %elt, i32 %idx ret %r } @@ -243,6 +529,13 @@ define @insertelt_nxv1f16_0( %v, half %el ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma ; ZVFHMIN-NEXT: vmv.s.x v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv1f16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFBFA-NEXT: vmv.s.x v8, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 0 ret %r } @@ -262,6 +555,14 @@ define @insertelt_nxv1f16_imm( %v, half % ; ZVFHMIN-NEXT: vmv.s.x v9, a0 ; ZVFHMIN-NEXT: vslideup.vi v8, v9, 3 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv1f16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: vsetivli zero, 4, e16, mf4, tu, ma +; ZVFBFA-NEXT: vmv.s.x v9, a0 +; ZVFBFA-NEXT: vslideup.vi v8, v9, 3 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 3 ret %r } @@ -285,6 +586,16 @@ define @insertelt_nxv1f16_idx( %v, half % ; ZVFHMIN-NEXT: vsetvli zero, a1, e16, mf4, tu, ma ; ZVFHMIN-NEXT: vslideup.vx v8, v9, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv1f16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: addi a1, a0, 1 +; ZVFBFA-NEXT: fmv.x.h a2, fa0 +; ZVFBFA-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.s.x v9, a2 +; ZVFBFA-NEXT: vsetvli zero, a1, e16, mf4, tu, ma +; ZVFBFA-NEXT: vslideup.vx v8, v9, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 %idx ret %r } @@ -302,6 +613,13 @@ define @insertelt_nxv2f16_0( %v, half %el ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma ; ZVFHMIN-NEXT: vmv.s.x v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv2f16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFBFA-NEXT: vmv.s.x v8, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 0 ret %r } @@ -321,6 +639,14 @@ define @insertelt_nxv2f16_imm( %v, half % ; ZVFHMIN-NEXT: vmv.s.x v9, a0 ; ZVFHMIN-NEXT: vslideup.vi v8, v9, 3 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv2f16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: vsetivli zero, 4, e16, mf2, tu, ma +; ZVFBFA-NEXT: vmv.s.x v9, a0 +; ZVFBFA-NEXT: vslideup.vi v8, v9, 3 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 3 ret %r } @@ -344,6 +670,16 @@ define @insertelt_nxv2f16_idx( %v, half % ; ZVFHMIN-NEXT: vsetvli zero, a1, e16, mf2, tu, ma ; ZVFHMIN-NEXT: vslideup.vx v8, v9, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv2f16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: addi a1, a0, 1 +; ZVFBFA-NEXT: fmv.x.h a2, fa0 +; ZVFBFA-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.s.x v9, a2 +; ZVFBFA-NEXT: vsetvli zero, a1, e16, mf2, tu, ma +; ZVFBFA-NEXT: vslideup.vx v8, v9, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 %idx ret %r } @@ -361,6 +697,13 @@ define @insertelt_nxv4f16_0( %v, half %el ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma ; ZVFHMIN-NEXT: vmv.s.x v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv4f16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFBFA-NEXT: vmv.s.x v8, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 0 ret %r } @@ -380,6 +723,14 @@ define @insertelt_nxv4f16_imm( %v, half % ; ZVFHMIN-NEXT: vmv.s.x v9, a0 ; ZVFHMIN-NEXT: vslideup.vi v8, v9, 3 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv4f16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFBFA-NEXT: vmv.s.x v9, a0 +; ZVFBFA-NEXT: vslideup.vi v8, v9, 3 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 3 ret %r } @@ -403,6 +754,16 @@ define @insertelt_nxv4f16_idx( %v, half % ; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; ZVFHMIN-NEXT: vslideup.vx v8, v9, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv4f16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: addi a1, a0, 1 +; ZVFBFA-NEXT: fmv.x.h a2, fa0 +; ZVFBFA-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.s.x v9, a2 +; ZVFBFA-NEXT: vsetvli zero, a1, e16, m1, tu, ma +; ZVFBFA-NEXT: vslideup.vx v8, v9, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 %idx ret %r } @@ -420,6 +781,13 @@ define @insertelt_nxv8f16_0( %v, half %el ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma ; ZVFHMIN-NEXT: vmv.s.x v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv8f16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFBFA-NEXT: vmv.s.x v8, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 0 ret %r } @@ -439,6 +807,14 @@ define @insertelt_nxv8f16_imm( %v, half % ; ZVFHMIN-NEXT: vmv.s.x v10, a0 ; ZVFHMIN-NEXT: vslideup.vi v8, v10, 3 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv8f16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFBFA-NEXT: vmv.s.x v10, a0 +; ZVFBFA-NEXT: vslideup.vi v8, v10, 3 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 3 ret %r } @@ -462,6 +838,16 @@ define @insertelt_nxv8f16_idx( %v, half % ; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m2, tu, ma ; ZVFHMIN-NEXT: vslideup.vx v8, v10, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv8f16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a1, fa0 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.s.x v10, a1 +; ZVFBFA-NEXT: addi a1, a0, 1 +; ZVFBFA-NEXT: vsetvli zero, a1, e16, m2, tu, ma +; ZVFBFA-NEXT: vslideup.vx v8, v10, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 %idx ret %r } @@ -479,6 +865,13 @@ define @insertelt_nxv16f16_0( %v, half ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma ; ZVFHMIN-NEXT: vmv.s.x v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv16f16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFBFA-NEXT: vmv.s.x v8, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 0 ret %r } @@ -498,6 +891,14 @@ define @insertelt_nxv16f16_imm( %v, hal ; ZVFHMIN-NEXT: vmv.s.x v12, a0 ; ZVFHMIN-NEXT: vslideup.vi v8, v12, 3 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv16f16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFBFA-NEXT: vmv.s.x v12, a0 +; ZVFBFA-NEXT: vslideup.vi v8, v12, 3 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 3 ret %r } @@ -521,6 +922,16 @@ define @insertelt_nxv16f16_idx( %v, hal ; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, tu, ma ; ZVFHMIN-NEXT: vslideup.vx v8, v12, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv16f16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a1, fa0 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.s.x v12, a1 +; ZVFBFA-NEXT: addi a1, a0, 1 +; ZVFBFA-NEXT: vsetvli zero, a1, e16, m4, tu, ma +; ZVFBFA-NEXT: vslideup.vx v8, v12, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 %idx ret %r } @@ -538,6 +949,13 @@ define @insertelt_nxv32f16_0( %v, half ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma ; ZVFHMIN-NEXT: vmv.s.x v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv32f16_0: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFBFA-NEXT: vmv.s.x v8, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 0 ret %r } @@ -557,6 +975,14 @@ define @insertelt_nxv32f16_imm( %v, hal ; ZVFHMIN-NEXT: vmv.s.x v16, a0 ; ZVFHMIN-NEXT: vslideup.vi v8, v16, 3 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv32f16_imm: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFBFA-NEXT: vmv.s.x v16, a0 +; ZVFBFA-NEXT: vslideup.vi v8, v16, 3 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 3 ret %r } @@ -580,6 +1006,16 @@ define @insertelt_nxv32f16_idx( %v, hal ; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, tu, ma ; ZVFHMIN-NEXT: vslideup.vx v8, v16, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: insertelt_nxv32f16_idx: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a1, fa0 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.s.x v16, a1 +; ZVFBFA-NEXT: addi a1, a0, 1 +; ZVFBFA-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; ZVFBFA-NEXT: vslideup.vx v8, v16, a0 +; ZVFBFA-NEXT: ret %r = insertelement %v, half %elt, i32 %idx ret %r } diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll index ee38257f09cd5..0577fb1ff67bb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll @@ -3852,29 +3852,26 @@ define @vector_interleave_nxv112i8_nxv16i8( @vector_interleave_nxv112i8_nxv16i8( @vector_interleave_nxv112i8_nxv16i8( @vector_interleave_nxv112i8_nxv16i8( @vector_interleave_nxv112i8_nxv16i8( @vector_interleave_nxv112i8_nxv16i8( @vector_interleave_nxv112i8_nxv16i8( @vector_interleave_nxv112i8_nxv16i8( @vector_interleave_nxv112i8_nxv16i8( @vector_interleave_nxv112i8_nxv16i8( @vector_interleave_nxv56i16_nxv8i16( @vector_interleave_nxv56i16_nxv8i16( @vector_interleave_nxv56i16_nxv8i16( @vector_interleave_nxv56i16_nxv8i16( @vector_interleave_nxv56i16_nxv8i16( @vector_interleave_nxv56i16_nxv8i16( @vector_interleave_nxv56i16_nxv8i16( @vector_interleave_nxv56i16_nxv8i16( @vector_interleave_nxv56i16_nxv8i16( @vector_interleave_nxv56i16_nxv8i16( @vector_interleave_nxv28i32_nxv4i32( @vector_interleave_nxv28i32_nxv4i32( @vector_interleave_nxv28i32_nxv4i32( @vector_interleave_nxv28i32_nxv4i32( @vector_interleave_nxv28i32_nxv4i32( @vector_interleave_nxv28i32_nxv4i32( @vector_interleave_nxv28i32_nxv4i32( @vector_interleave_nxv28i32_nxv4i32( @vector_interleave_nxv28i32_nxv4i32( @vector_interleave_nxv28i32_nxv4i32( @vector_interleave_nxv14i64_nxv2i64( @vector_interleave_nxv14i64_nxv2i64( @vector_interleave_nxv14i64_nxv2i64( @vector_interleave_nxv14i64_nxv2i64( @vector_interleave_nxv14i64_nxv2i64( @vector_interleave_nxv14i64_nxv2i64( @vector_interleave_nxv14i64_nxv2i64( @vector_interleave_nxv14i64_nxv2i64( @vector_interleave_nxv14i64_nxv2i64( @vector_interleave_nxv14i64_nxv2i64( @vector_interleave_nxv56f16_nxv8f16( @vector_interleave_nxv56f16_nxv8f16( @vector_interleave_nxv56f16_nxv8f16( @vector_interleave_nxv56f16_nxv8f16( @vector_interleave_nxv56f16_nxv8f16( @vector_interleave_nxv56f16_nxv8f16( @vector_interleave_nxv56f16_nxv8f16( @vector_interleave_nxv56f16_nxv8f16( @vector_interleave_nxv56f16_nxv8f16( @vector_interleave_nxv56f16_nxv8f16( @vector_interleave_nxv56bf16_nxv8bf16( @vector_interleave_nxv56bf16_nxv8bf16( @vector_interleave_nxv56bf16_nxv8bf16( @vector_interleave_nxv56bf16_nxv8bf16( @vector_interleave_nxv56bf16_nxv8bf16( @vector_interleave_nxv56bf16_nxv8bf16( @vector_interleave_nxv56bf16_nxv8bf16( @vector_interleave_nxv56bf16_nxv8bf16( @vector_interleave_nxv56bf16_nxv8bf16( @vector_interleave_nxv56bf16_nxv8bf16( @vector_interleave_nxv28f32_nxv4f32( @vector_interleave_nxv28f32_nxv4f32( @vector_interleave_nxv28f32_nxv4f32( @vector_interleave_nxv28f32_nxv4f32( @vector_interleave_nxv28f32_nxv4f32( @vector_interleave_nxv28f32_nxv4f32( @vector_interleave_nxv28f32_nxv4f32( @vector_interleave_nxv28f32_nxv4f32( @vector_interleave_nxv28f32_nxv4f32( @vector_interleave_nxv28f32_nxv4f32( @vector_interleave_nxv14f64_nxv2f64( @vector_interleave_nxv14f64_nxv2f64( @vector_interleave_nxv14f64_nxv2f64( @vector_interleave_nxv14f64_nxv2f64( @vector_interleave_nxv14f64_nxv2f64( @vector_interleave_nxv14f64_nxv2f64( @vector_interleave_nxv14f64_nxv2f64( @vector_interleave_nxv14f64_nxv2f64( @vector_interleave_nxv14f64_nxv2f64( @vector_interleave_nxv14f64_nxv2f64() define bfloat @intrinsic_vfmv.f.s_s_nxv1bf16( %0) nounwind { ; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv1bf16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: fmv.h.x fa0, a0 +; CHECK-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret entry: %a = call bfloat @llvm.riscv.vfmv.f.s.nxv1bf16( %0) @@ -21,9 +20,8 @@ declare bfloat @llvm.riscv.vfmv.f.s.nxv2bf16() define bfloat @intrinsic_vfmv.f.s_s_nxv2bf16( %0) nounwind { ; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv2bf16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: fmv.h.x fa0, a0 +; CHECK-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret entry: %a = call bfloat @llvm.riscv.vfmv.f.s.nxv2bf16( %0) @@ -35,9 +33,8 @@ declare bfloat @llvm.riscv.vfmv.f.s.nxv4bf16() define bfloat @intrinsic_vfmv.f.s_s_nxv4bf16( %0) nounwind { ; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv4bf16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: fmv.h.x fa0, a0 +; CHECK-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret entry: %a = call bfloat @llvm.riscv.vfmv.f.s.nxv4bf16( %0) @@ -49,9 +46,8 @@ declare bfloat @llvm.riscv.vfmv.f.s.nxv8bf16() define bfloat @intrinsic_vfmv.f.s_s_nxv8bf16( %0) nounwind { ; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv8bf16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: fmv.h.x fa0, a0 +; CHECK-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret entry: %a = call bfloat @llvm.riscv.vfmv.f.s.nxv8bf16( %0) @@ -63,9 +59,8 @@ declare bfloat @llvm.riscv.vfmv.f.s.nxv16bf16() define bfloat @intrinsic_vfmv.f.s_s_nxv16bf16( %0) nounwind { ; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv16bf16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: fmv.h.x fa0, a0 +; CHECK-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret entry: %a = call bfloat @llvm.riscv.vfmv.f.s.nxv16bf16( %0) @@ -77,9 +72,8 @@ declare bfloat @llvm.riscv.vfmv.f.s.nxv32bf16() define bfloat @intrinsic_vfmv.f.s_s_nxv32bf16( %0) nounwind { ; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv32bf16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: fmv.h.x fa0, a0 +; CHECK-NEXT: vsetivli zero, 1, e16alt, m1, ta, ma +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret entry: %a = call bfloat @llvm.riscv.vfmv.f.s.nxv32bf16( %0) diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll index 861998a2ba51a..274ac18deb273 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll @@ -943,8 +943,7 @@ define half @vreduce_ord_fadd_nxv12f16( %v, half %s) { ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a1, a0, 1 -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: sub a0, a0, a1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma @@ -1020,8 +1019,7 @@ define half @vreduce_fmax_nxv12f16( %v) { ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a1, a0, 1 -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: sub a0, a0, a1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: li a1, -512 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, a1 diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll index bc23388315de7..06bbe5209df35 100644 --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -169,10 +169,10 @@ define i1 @test_srem_even(i4 %X) nounwind { ; RV32M-NEXT: srli a2, a1, 31 ; RV32M-NEXT: srli a1, a1, 4 ; RV32M-NEXT: add a1, a1, a2 -; RV32M-NEXT: slli a2, a1, 3 -; RV32M-NEXT: slli a1, a1, 1 -; RV32M-NEXT: sub a1, a1, a2 -; RV32M-NEXT: add a0, a0, a1 +; RV32M-NEXT: slli a2, a1, 1 +; RV32M-NEXT: slli a1, a1, 2 +; RV32M-NEXT: add a1, a1, a2 +; RV32M-NEXT: sub a0, a0, a1 ; RV32M-NEXT: andi a0, a0, 15 ; RV32M-NEXT: addi a0, a0, -1 ; RV32M-NEXT: seqz a0, a0 @@ -187,10 +187,10 @@ define i1 @test_srem_even(i4 %X) nounwind { ; RV64M-NEXT: srli a2, a1, 63 ; RV64M-NEXT: srli a1, a1, 4 ; RV64M-NEXT: add a1, a1, a2 -; RV64M-NEXT: slli a2, a1, 3 -; RV64M-NEXT: slli a1, a1, 1 -; RV64M-NEXT: sub a1, a1, a2 -; RV64M-NEXT: add a0, a0, a1 +; RV64M-NEXT: slli a2, a1, 1 +; RV64M-NEXT: slli a1, a1, 2 +; RV64M-NEXT: add a1, a1, a2 +; RV64M-NEXT: sub a0, a0, a1 ; RV64M-NEXT: andi a0, a0, 15 ; RV64M-NEXT: addi a0, a0, -1 ; RV64M-NEXT: seqz a0, a0 @@ -205,10 +205,10 @@ define i1 @test_srem_even(i4 %X) nounwind { ; RV32MV-NEXT: srli a2, a1, 31 ; RV32MV-NEXT: srli a1, a1, 4 ; RV32MV-NEXT: add a1, a1, a2 -; RV32MV-NEXT: slli a2, a1, 3 -; RV32MV-NEXT: slli a1, a1, 1 -; RV32MV-NEXT: sub a1, a1, a2 -; RV32MV-NEXT: add a0, a0, a1 +; RV32MV-NEXT: slli a2, a1, 1 +; RV32MV-NEXT: slli a1, a1, 2 +; RV32MV-NEXT: add a1, a1, a2 +; RV32MV-NEXT: sub a0, a0, a1 ; RV32MV-NEXT: andi a0, a0, 15 ; RV32MV-NEXT: addi a0, a0, -1 ; RV32MV-NEXT: seqz a0, a0 @@ -223,10 +223,10 @@ define i1 @test_srem_even(i4 %X) nounwind { ; RV64MV-NEXT: srli a2, a1, 63 ; RV64MV-NEXT: srli a1, a1, 4 ; RV64MV-NEXT: add a1, a1, a2 -; RV64MV-NEXT: slli a2, a1, 3 -; RV64MV-NEXT: slli a1, a1, 1 -; RV64MV-NEXT: sub a1, a1, a2 -; RV64MV-NEXT: add a0, a0, a1 +; RV64MV-NEXT: slli a2, a1, 1 +; RV64MV-NEXT: slli a1, a1, 2 +; RV64MV-NEXT: add a1, a1, a2 +; RV64MV-NEXT: sub a0, a0, a1 ; RV64MV-NEXT: andi a0, a0, 15 ; RV64MV-NEXT: addi a0, a0, -1 ; RV64MV-NEXT: seqz a0, a0 @@ -823,16 +823,16 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV64MV-NEXT: srai a4, a4, 1 ; RV64MV-NEXT: mulh a6, a3, a6 ; RV64MV-NEXT: add a4, a4, a7 -; RV64MV-NEXT: slli a7, a5, 3 -; RV64MV-NEXT: slli a5, a5, 1 -; RV64MV-NEXT: sub a5, a5, a7 +; RV64MV-NEXT: slli a7, a5, 1 +; RV64MV-NEXT: slli a5, a5, 2 +; RV64MV-NEXT: add a5, a5, a7 ; RV64MV-NEXT: srli a7, a6, 63 ; RV64MV-NEXT: srai a6, a6, 1 ; RV64MV-NEXT: add a6, a6, a7 ; RV64MV-NEXT: add a2, a2, a4 ; RV64MV-NEXT: slli a4, a4, 3 ; RV64MV-NEXT: sub a2, a2, a4 -; RV64MV-NEXT: add a1, a1, a5 +; RV64MV-NEXT: sub a1, a1, a5 ; RV64MV-NEXT: li a4, -1 ; RV64MV-NEXT: srli a4, a4, 31 ; RV64MV-NEXT: vsext.vf8 v8, v10 diff --git a/llvm/test/CodeGen/RISCV/xqciac.ll b/llvm/test/CodeGen/RISCV/xqciac.ll index 918468bdf03d3..92be4c977dd82 100644 --- a/llvm/test/CodeGen/RISCV/xqciac.ll +++ b/llvm/test/CodeGen/RISCV/xqciac.ll @@ -172,8 +172,8 @@ define dso_local i32 @pow2minuspow2(i32 %a, i32 %b) local_unnamed_addr #0 { ; RV32IM-LABEL: pow2minuspow2: ; RV32IM: # %bb.0: # %entry ; RV32IM-NEXT: slli a2, a1, 7 -; RV32IM-NEXT: slli a1, a1, 9 -; RV32IM-NEXT: sub a1, a1, a2 +; RV32IM-NEXT: slli a1, a1, 8 +; RV32IM-NEXT: add a1, a1, a2 ; RV32IM-NEXT: add a0, a1, a0 ; RV32IM-NEXT: ret ; diff --git a/llvm/test/CodeGen/X86/pr63790.ll b/llvm/test/CodeGen/X86/pr63790.ll new file mode 100644 index 0000000000000..e4e7a3c536d07 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr63790.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=x86_64 | FileCheck %s + +define void @f(ptr %0, i64 %1) { +; CHECK-LABEL: f: +; CHECK: # %bb.0: # %BB +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: andl $1, %esi +; CHECK-NEXT: movaps (%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movl $42, %edi +; CHECK-NEXT: callq *16(%rsp,%rsi,8) +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps %xmm0, (%rax) +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +BB: + %fps = load <2 x ptr>, ptr %0 + %fp = extractelement <2 x ptr> %fps, i64 %1 + %p = call ptr %fp(i32 42) + store <2 x ptr> %fps, ptr %p + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/bug165359.ll b/llvm/test/Transforms/LoopVectorize/AArch64/bug165359.ll new file mode 100644 index 0000000000000..87320c547a757 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/bug165359.ll @@ -0,0 +1,25 @@ +; RUN: opt < %s -passes=loop-vectorize -S -pass-remarks-analysis=loop-vectorize -disable-output &> %t +; RUN: cat %t | FileCheck --check-prefix=CHECK-REMARKS %s + +; CHECK-REMARKS: remark: :0:0: Recipe with invalid costs prevented vectorization at VF=(vscale x 1): fadd + +target triple = "aarch64-unknown-linux-gnu" + +define double @reduce_fail(i64 %loop_count, double %d0, ptr %ptr1) #0 { +entry: + %d1 = load double, ptr %ptr1 + br label %loop + +loop: + %acc0 = phi double [ %fadd0, %loop ], [ %d0, %entry ] + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %fadd0 = fadd double %acc0, %d1 + %iv.next = add nsw nuw i64 %iv, 1 + %exit_cond = icmp eq i64 %iv.next, %loop_count + br i1 %exit_cond, label %loopexit, label %loop + +loopexit: + ret double %fadd0 +} + +attributes #0 = { "target-features"="+sve" } diff --git a/llvm/unittests/CodeGen/InstrRefLDVTest.cpp b/llvm/unittests/CodeGen/InstrRefLDVTest.cpp index 235a53dcc156e..5211a6c8ef416 100644 --- a/llvm/unittests/CodeGen/InstrRefLDVTest.cpp +++ b/llvm/unittests/CodeGen/InstrRefLDVTest.cpp @@ -955,7 +955,7 @@ TEST_F(InstrRefLDVTest, MLocSingleBlock) { // Add a new register to be tracked, and insert it into the transfer function // as a copy. The output of $rax should be the live-in value of $rsp. Register RAX = getRegByName("RAX"); - LocIdx RaxLoc = MTracker->lookupOrTrackRegister(RAX); + LocIdx RaxLoc = MTracker->lookupOrTrackRegister(MTracker->getLocID(RAX)); TransferFunc[0].insert({RspLoc, ValueIDNum(0, 1, RspLoc)}); TransferFunc[0].insert({RaxLoc, ValueIDNum(0, 0, RspLoc)}); initValueArray(MInLocs, 1, 2); @@ -980,7 +980,7 @@ TEST_F(InstrRefLDVTest, MLocDiamondBlocks) { ASSERT_TRUE(MTracker->getNumLocs() == 1); LocIdx RspLoc(0); Register RAX = getRegByName("RAX"); - LocIdx RaxLoc = MTracker->lookupOrTrackRegister(RAX); + LocIdx RaxLoc = MTracker->lookupOrTrackRegister(MTracker->getLocID(RAX)); auto [MInLocs, MOutLocs] = allocValueTables(4, 2); @@ -1194,7 +1194,7 @@ TEST_F(InstrRefLDVTest, MLocSimpleLoop) { ASSERT_TRUE(MTracker->getNumLocs() == 1); LocIdx RspLoc(0); Register RAX = getRegByName("RAX"); - LocIdx RaxLoc = MTracker->lookupOrTrackRegister(RAX); + LocIdx RaxLoc = MTracker->lookupOrTrackRegister(MTracker->getLocID(RAX)); auto [MInLocs, MOutLocs] = allocValueTables(3, 2); @@ -1292,7 +1292,7 @@ TEST_F(InstrRefLDVTest, MLocNestedLoop) { ASSERT_TRUE(MTracker->getNumLocs() == 1); LocIdx RspLoc(0); Register RAX = getRegByName("RAX"); - LocIdx RaxLoc = MTracker->lookupOrTrackRegister(RAX); + LocIdx RaxLoc = MTracker->lookupOrTrackRegister(MTracker->getLocID(RAX)); auto [MInLocs, MOutLocs] = allocValueTables(5, 2); @@ -1493,7 +1493,7 @@ TEST_F(InstrRefLDVTest, MLocNoDominatingLoop) { ASSERT_TRUE(MTracker->getNumLocs() == 1); LocIdx RspLoc(0); Register RAX = getRegByName("RAX"); - LocIdx RaxLoc = MTracker->lookupOrTrackRegister(RAX); + LocIdx RaxLoc = MTracker->lookupOrTrackRegister(MTracker->getLocID(RAX)); auto [MInLocs, MOutLocs] = allocValueTables(5, 2); @@ -1648,7 +1648,7 @@ TEST_F(InstrRefLDVTest, MLocBadlyNestedLoops) { ASSERT_TRUE(MTracker->getNumLocs() == 1); LocIdx RspLoc(0); Register RAX = getRegByName("RAX"); - LocIdx RaxLoc = MTracker->lookupOrTrackRegister(RAX); + LocIdx RaxLoc = MTracker->lookupOrTrackRegister(MTracker->getLocID(RAX)); auto [MInLocs, MOutLocs] = allocValueTables(5, 2); @@ -1780,7 +1780,7 @@ TEST_F(InstrRefLDVTest, pickVPHILocDiamond) { ASSERT_TRUE(MTracker->getNumLocs() == 1); LocIdx RspLoc(0); Register RAX = getRegByName("RAX"); - LocIdx RaxLoc = MTracker->lookupOrTrackRegister(RAX); + LocIdx RaxLoc = MTracker->lookupOrTrackRegister(MTracker->getLocID(RAX)); auto [MInLocs, MOutLocs] = allocValueTables(4, 2); @@ -1976,7 +1976,7 @@ TEST_F(InstrRefLDVTest, pickVPHILocLoops) { ASSERT_TRUE(MTracker->getNumLocs() == 1); LocIdx RspLoc(0); Register RAX = getRegByName("RAX"); - LocIdx RaxLoc = MTracker->lookupOrTrackRegister(RAX); + LocIdx RaxLoc = MTracker->lookupOrTrackRegister(MTracker->getLocID(RAX)); auto [MInLocs, MOutLocs] = allocValueTables(3, 2); @@ -2104,9 +2104,9 @@ TEST_F(InstrRefLDVTest, pickVPHILocBadlyNestedLoops) { ASSERT_TRUE(MTracker->getNumLocs() == 1); LocIdx RspLoc(0); Register RAX = getRegByName("RAX"); - LocIdx RaxLoc = MTracker->lookupOrTrackRegister(RAX); + LocIdx RaxLoc = MTracker->lookupOrTrackRegister(MTracker->getLocID(RAX)); Register RBX = getRegByName("RBX"); - LocIdx RbxLoc = MTracker->lookupOrTrackRegister(RBX); + LocIdx RbxLoc = MTracker->lookupOrTrackRegister(MTracker->getLocID(RBX)); auto [MInLocs, MOutLocs] = allocValueTables(5, 3); @@ -2256,7 +2256,7 @@ TEST_F(InstrRefLDVTest, vlocJoinDiamond) { ASSERT_TRUE(MTracker->getNumLocs() == 1); LocIdx RspLoc(0); Register RAX = getRegByName("RAX"); - MTracker->lookupOrTrackRegister(RAX); + MTracker->lookupOrTrackRegister(MTracker->getLocID(RAX)); DbgOpID LiveInRspID = DbgOpID(false, 0); DbgOpID LiveInRaxID = DbgOpID(false, 1); @@ -2440,7 +2440,7 @@ TEST_F(InstrRefLDVTest, vlocJoinLoops) { ASSERT_TRUE(MTracker->getNumLocs() == 1); LocIdx RspLoc(0); Register RAX = getRegByName("RAX"); - MTracker->lookupOrTrackRegister(RAX); + MTracker->lookupOrTrackRegister(MTracker->getLocID(RAX)); DbgOpID LiveInRspID = DbgOpID(false, 0); DbgOpID LiveInRaxID = DbgOpID(false, 1); @@ -2538,9 +2538,9 @@ TEST_F(InstrRefLDVTest, vlocJoinBadlyNestedLoops) { ASSERT_TRUE(MTracker->getNumLocs() == 1); LocIdx RspLoc(0); Register RAX = getRegByName("RAX"); - MTracker->lookupOrTrackRegister(RAX); + MTracker->lookupOrTrackRegister(MTracker->getLocID(RAX)); Register RBX = getRegByName("RBX"); - MTracker->lookupOrTrackRegister(RBX); + MTracker->lookupOrTrackRegister(MTracker->getLocID(RBX)); DbgOpID LiveInRspID = DbgOpID(false, 0); DbgOpID LiveInRaxID = DbgOpID(false, 1); @@ -2678,7 +2678,7 @@ TEST_F(InstrRefLDVTest, VLocDiamondBlocks) { ASSERT_TRUE(MTracker->getNumLocs() == 1); LocIdx RspLoc(0); Register RAX = getRegByName("RAX"); - LocIdx RaxLoc = MTracker->lookupOrTrackRegister(RAX); + LocIdx RaxLoc = MTracker->lookupOrTrackRegister(MTracker->getLocID(RAX)); unsigned EntryBlk = 0, RetBlk = 3; @@ -2896,7 +2896,7 @@ TEST_F(InstrRefLDVTest, VLocSimpleLoop) { ASSERT_TRUE(MTracker->getNumLocs() == 1); LocIdx RspLoc(0); Register RAX = getRegByName("RAX"); - LocIdx RaxLoc = MTracker->lookupOrTrackRegister(RAX); + LocIdx RaxLoc = MTracker->lookupOrTrackRegister(MTracker->getLocID(RAX)); unsigned EntryBlk = 0, LoopBlk = 1; @@ -3175,7 +3175,7 @@ TEST_F(InstrRefLDVTest, VLocNestedLoop) { ASSERT_TRUE(MTracker->getNumLocs() == 1); LocIdx RspLoc(0); Register RAX = getRegByName("RAX"); - LocIdx RaxLoc = MTracker->lookupOrTrackRegister(RAX); + LocIdx RaxLoc = MTracker->lookupOrTrackRegister(MTracker->getLocID(RAX)); unsigned EntryBlk = 0, Loop1Blk = 1, Loop2Blk = 2; diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp index 2f15cc8c76548..ae0431e79e1bc 100644 --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -27,6 +27,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/TableGen/CodeGenHelpers.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/StringToOffsetTable.h" @@ -75,7 +76,15 @@ class SubtargetEmitter : TargetFeaturesEmitter { CodeGenTarget TGT; CodeGenSchedModels &SchedModels; + FeatureMapTy emitEnums(raw_ostream &OS); void emitSubtargetInfoMacroCalls(raw_ostream &OS); + std::tuple + emitMCDesc(raw_ostream &OS, const FeatureMapTy &FeatureMap); + void emitTargetDesc(raw_ostream &OS); + void emitHeader(raw_ostream &OS); + void emitCtor(raw_ostream &OS, unsigned NumNames, unsigned NumFeatures, + unsigned NumProcs); + unsigned featureKeyValues(raw_ostream &OS, const FeatureMapTy &FeatureMap); unsigned cpuKeyValues(raw_ostream &OS, const FeatureMapTy &FeatureMap); unsigned cpuNames(raw_ostream &OS); @@ -141,7 +150,9 @@ class SubtargetEmitter : TargetFeaturesEmitter { /// Emit some information about the SubtargetFeature as calls to a macro so /// that they can be used from C++. void SubtargetEmitter::emitSubtargetInfoMacroCalls(raw_ostream &OS) { - OS << "\n#ifdef GET_SUBTARGETINFO_MACRO\n"; + // Undef the GET_SUBTARGETINFO_MACRO macro at the end of the scope since it's + // used within the scope. + IfDefEmitter IfDefMacro(OS, "GET_SUBTARGETINFO_MACRO", /*LateUndef=*/true); std::vector FeatureList = Records.getAllDerivedDefinitions("SubtargetFeature"); @@ -167,14 +178,6 @@ void SubtargetEmitter::emitSubtargetInfoMacroCalls(raw_ostream &OS) { OS << "GET_SUBTARGETINFO_MACRO(" << FieldName << ", " << Default << ", " << Getter << ")\n"; } - OS << "#undef GET_SUBTARGETINFO_MACRO\n"; - OS << "#endif // GET_SUBTARGETINFO_MACRO\n\n"; - - OS << "\n#ifdef GET_SUBTARGETINFO_MC_DESC\n"; - OS << "#undef GET_SUBTARGETINFO_MC_DESC\n\n"; - - if (Target == "AArch64") - OS << "#include \"llvm/TargetParser/AArch64TargetParser.h\"\n\n"; } // @@ -440,26 +443,24 @@ void SubtargetEmitter::emitStageAndOperandCycleData( continue; StringRef Name = ProcModel.ItinsDef->getName(); - OS << "\n// Functional units for \"" << Name << "\"\n" - << "namespace " << Name << "FU {\n"; - - for (const auto &[Idx, FU] : enumerate(FUs)) - OS << " const InstrStage::FuncUnits " << FU->getName() << " = 1ULL << " - << Idx << ";\n"; + { + OS << "\n// Functional units for \"" << Name << "\"\n"; + NamespaceEmitter FUNamespace(OS, (Name + Twine("FU")).str()); - OS << "} // end namespace " << Name << "FU\n"; + for (const auto &[Idx, FU] : enumerate(FUs)) + OS << " const InstrStage::FuncUnits " << FU->getName() << " = 1ULL << " + << Idx << ";\n"; + } ConstRecVec BPs = ProcModel.ItinsDef->getValueAsListOfDefs("BP"); if (BPs.empty()) continue; - OS << "\n// Pipeline forwarding paths for itineraries \"" << Name << "\"\n" - << "namespace " << Name << "Bypass {\n"; + OS << "\n// Pipeline forwarding paths for itineraries \"" << Name << "\"\n"; + NamespaceEmitter BypassNamespace(OS, (Name + Twine("Bypass")).str()); OS << " const unsigned NoBypass = 0;\n"; for (const auto &[Idx, BP] : enumerate(BPs)) OS << " const unsigned " << BP->getName() << " = 1 << " << Idx << ";\n"; - - OS << "} // end namespace " << Name << "Bypass\n"; } // Begin stages table @@ -1940,13 +1941,14 @@ void SubtargetEmitter::parseFeaturesFunction(raw_ostream &OS) { } void SubtargetEmitter::emitGenMCSubtargetInfo(raw_ostream &OS) { - OS << "namespace " << Target << "_MC {\n" - << "unsigned resolveVariantSchedClassImpl(unsigned SchedClass,\n" - << " const MCInst *MI, const MCInstrInfo *MCII, " - << "const MCSubtargetInfo &STI, unsigned CPUID) {\n"; - emitSchedModelHelpersImpl(OS, /* OnlyExpandMCPredicates */ true); - OS << "}\n"; - OS << "} // end namespace " << Target << "_MC\n\n"; + { + NamespaceEmitter NS(OS, (Target + Twine("_MC")).str()); + OS << "unsigned resolveVariantSchedClassImpl(unsigned SchedClass,\n" + << " const MCInst *MI, const MCInstrInfo *MCII, " + << "const MCSubtargetInfo &STI, unsigned CPUID) {\n"; + emitSchedModelHelpersImpl(OS, /* OnlyExpandMCPredicates */ true); + OS << "}\n"; + } OS << "struct " << Target << "GenMCSubtargetInfo : public MCSubtargetInfo {\n"; @@ -1982,46 +1984,37 @@ void SubtargetEmitter::emitGenMCSubtargetInfo(raw_ostream &OS) { } void SubtargetEmitter::emitMcInstrAnalysisPredicateFunctions(raw_ostream &OS) { - OS << "\n#ifdef GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n"; - OS << "#undef GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n\n"; - STIPredicateExpander PE(Target, /*Indent=*/0); - PE.setExpandForMC(true); - PE.setByRef(true); - for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates()) - PE.expandSTIPredicate(OS, Fn); - - OS << "#endif // GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n\n"; - OS << "\n#ifdef GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS\n"; - OS << "#undef GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS\n\n"; + { + IfDefEmitter IfDefDecls(OS, "GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS"); + PE.setExpandForMC(true); + PE.setByRef(true); + for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates()) + PE.expandSTIPredicate(OS, Fn); + } + IfDefEmitter IfDefDefs(OS, "GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS"); std::string ClassPrefix = Target + "MCInstrAnalysis"; PE.setExpandDefinition(true); PE.setClassPrefix(ClassPrefix); for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates()) PE.expandSTIPredicate(OS, Fn); - - OS << "#endif // GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS\n\n"; } -// -// SubtargetEmitter::run - Main subtarget enumeration emitter. -// -void SubtargetEmitter::run(raw_ostream &OS) { - emitSourceFileHeader("Subtarget Enumeration Source Fragment", OS); - - OS << "\n#ifdef GET_SUBTARGETINFO_ENUM\n"; - OS << "#undef GET_SUBTARGETINFO_ENUM\n\n"; - - OS << "namespace llvm {\n"; - auto FeatureMap = enumeration(OS); - OS << "} // end namespace llvm\n\n"; - OS << "#endif // GET_SUBTARGETINFO_ENUM\n\n"; +FeatureMapTy SubtargetEmitter::emitEnums(raw_ostream &OS) { + IfDefEmitter IfDef(OS, "GET_SUBTARGETINFO_ENUM"); + NamespaceEmitter NS(OS, "llvm"); + return enumeration(OS); +} - emitSubtargetInfoMacroCalls(OS); +std::tuple +SubtargetEmitter::emitMCDesc(raw_ostream &OS, const FeatureMapTy &FeatureMap) { + IfDefEmitter IfDef(OS, "GET_SUBTARGETINFO_MC_DESC"); + if (Target == "AArch64") + OS << "#include \"llvm/TargetParser/AArch64TargetParser.h\"\n\n"; + NamespaceEmitter LlvmNS(OS, "llvm"); - OS << "namespace llvm {\n"; unsigned NumFeatures = featureKeyValues(OS, FeatureMap); OS << "\n"; emitSchedModel(OS); @@ -2067,13 +2060,11 @@ void SubtargetEmitter::run(raw_ostream &OS) { OS << "nullptr, nullptr, nullptr"; } OS << ");\n}\n\n"; + return {NumNames, NumFeatures, NumProcs}; +} - OS << "} // end namespace llvm\n\n"; - - OS << "#endif // GET_SUBTARGETINFO_MC_DESC\n\n"; - - OS << "\n#ifdef GET_SUBTARGETINFO_TARGET_DESC\n"; - OS << "#undef GET_SUBTARGETINFO_TARGET_DESC\n\n"; +void SubtargetEmitter::emitTargetDesc(raw_ostream &OS) { + IfDefEmitter IfDef(OS, "GET_SUBTARGETINFO_TARGET_DESC"); OS << "#include \"llvm/ADT/BitmaskEnum.h\"\n"; OS << "#include \"llvm/Support/Debug.h\"\n"; @@ -2081,21 +2072,21 @@ void SubtargetEmitter::run(raw_ostream &OS) { if (Target == "AArch64") OS << "#include \"llvm/TargetParser/AArch64TargetParser.h\"\n\n"; parseFeaturesFunction(OS); +} - OS << "#endif // GET_SUBTARGETINFO_TARGET_DESC\n\n"; - +void SubtargetEmitter::emitHeader(raw_ostream &OS) { // Create a TargetSubtargetInfo subclass to hide the MC layer initialization. - OS << "\n#ifdef GET_SUBTARGETINFO_HEADER\n"; - OS << "#undef GET_SUBTARGETINFO_HEADER\n\n"; + IfDefEmitter IfDef(OS, "GET_SUBTARGETINFO_HEADER"); + NamespaceEmitter LLVMNS(OS, "llvm"); std::string ClassName = Target + "GenSubtargetInfo"; - OS << "namespace llvm {\n"; OS << "class DFAPacketizer;\n"; - OS << "namespace " << Target << "_MC {\n" - << "unsigned resolveVariantSchedClassImpl(unsigned SchedClass," - << " const MCInst *MI, const MCInstrInfo *MCII, " - << "const MCSubtargetInfo &STI, unsigned CPUID);\n" - << "} // end namespace " << Target << "_MC\n\n"; + { + NamespaceEmitter MCNS(OS, (Target + Twine("_MC")).str()); + OS << "unsigned resolveVariantSchedClassImpl(unsigned SchedClass," + << " const MCInst *MI, const MCInstrInfo *MCII, " + << "const MCSubtargetInfo &STI, unsigned CPUID);\n"; + } OS << "struct " << ClassName << " : public TargetSubtargetInfo {\n" << " explicit " << ClassName << "(const Triple &TT, StringRef CPU, " << "StringRef TuneCPU, StringRef FS);\n" @@ -2140,17 +2131,15 @@ void SubtargetEmitter::run(raw_ostream &OS) { PE.setByRef(false); for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates()) PE.expandSTIPredicate(OS, Fn); + OS << "};\n"; +} - OS << "};\n" - << "} // end namespace llvm\n\n"; - - OS << "#endif // GET_SUBTARGETINFO_HEADER\n\n"; - - OS << "\n#ifdef GET_SUBTARGETINFO_CTOR\n"; - OS << "#undef GET_SUBTARGETINFO_CTOR\n\n"; - +void SubtargetEmitter::emitCtor(raw_ostream &OS, unsigned NumNames, + unsigned NumFeatures, unsigned NumProcs) { + IfDefEmitter IfDef(OS, "GET_SUBTARGETINFO_CTOR"); OS << "#include \"llvm/CodeGen/TargetSchedule.h\"\n\n"; - OS << "namespace llvm {\n"; + + NamespaceEmitter LLVMNS(OS, "llvm"); OS << "extern const llvm::StringRef " << Target << "Names[];\n"; OS << "extern const llvm::SubtargetFeatureKV " << Target << "FeatureKV[];\n"; OS << "extern const llvm::SubtargetSubTypeKV " << Target << "SubTypeKV[];\n"; @@ -2167,6 +2156,7 @@ void SubtargetEmitter::run(raw_ostream &OS) { OS << "extern const unsigned " << Target << "ForwardingPaths[];\n"; } + std::string ClassName = Target + "GenSubtargetInfo"; OS << ClassName << "::" << ClassName << "(const Triple &TT, StringRef CPU, " << "StringRef TuneCPU, StringRef FS)\n"; @@ -2204,11 +2194,20 @@ void SubtargetEmitter::run(raw_ostream &OS) { emitSchedModelHelpers(ClassName, OS); emitHwModeCheck(ClassName, OS, /*IsMC=*/false); emitGetMacroFusions(ClassName, OS); +} - OS << "} // end namespace llvm\n\n"; - - OS << "#endif // GET_SUBTARGETINFO_CTOR\n\n"; +// +// SubtargetEmitter::run - Main subtarget enumeration emitter. +// +void SubtargetEmitter::run(raw_ostream &OS) { + emitSourceFileHeader("Subtarget Enumeration Source Fragment", OS); + auto FeatureMap = emitEnums(OS); + emitSubtargetInfoMacroCalls(OS); + auto [NumNames, NumFeatures, NumProcs] = emitMCDesc(OS, FeatureMap); + emitTargetDesc(OS); + emitHeader(OS); + emitCtor(OS, NumNames, NumFeatures, NumProcs); emitMcInstrAnalysisPredicateFunctions(OS); } diff --git a/offload/tools/offload-tblgen/EntryPointGen.cpp b/offload/tools/offload-tblgen/EntryPointGen.cpp index 4e42e4905b993..4f76100ed2dc3 100644 --- a/offload/tools/offload-tblgen/EntryPointGen.cpp +++ b/offload/tools/offload-tblgen/EntryPointGen.cpp @@ -83,13 +83,15 @@ static void EmitEntryPointFunc(const FunctionRec &F, raw_ostream &OS) { OS << ") {\n"; // Check offload is initialized - if (F.getName() != "olInit") + if (F.getName() != "olInit") { OS << "if (!llvm::offload::isOffloadInitialized()) return &UninitError;"; - // Emit pre-call prints - OS << TAB_1 "if (llvm::offload::isTracingEnabled()) {\n"; - OS << formatv(TAB_2 "llvm::errs() << \"---> {0}\";\n", F.getName()); - OS << TAB_1 "}\n\n"; + // Emit pre-call prints + // Postpone pre-calls for olInit as tracing requires liboffload to be initialized + OS << TAB_1 "if (llvm::offload::isTracingEnabled()) {\n"; + OS << formatv(TAB_2 "llvm::errs() << \"---> {0}\";\n", F.getName()); + OS << TAB_1 "}\n\n"; + } // Perform actual function call to the validation wrapper ParamNameList = ParamNameList.substr(0, ParamNameList.size() - 2); @@ -99,6 +101,10 @@ static void EmitEntryPointFunc(const FunctionRec &F, raw_ostream &OS) { // Emit post-call prints OS << TAB_1 "if (llvm::offload::isTracingEnabled()) {\n"; + // postponed pre-call print for olInit + if (F.getName() == "olInit") + OS << formatv(TAB_2 "llvm::errs() << \"---> {0}\";\n", F.getName()); + if (F.getParams().size() > 0) { OS << formatv(TAB_2 "{0} Params = {{", F.getParamStructName()); for (const auto &Param : F.getParams()) {