diff --git a/clang-tools-extra/clang-doc/JSONGenerator.cpp b/clang-tools-extra/clang-doc/JSONGenerator.cpp index b17cc80bdba34..93ec90e9048ea 100644 --- a/clang-tools-extra/clang-doc/JSONGenerator.cpp +++ b/clang-tools-extra/clang-doc/JSONGenerator.cpp @@ -468,7 +468,6 @@ static void insertArray(Object &Obj, json::Value &Array, StringRef Key) { static void serializeInfo(const RecordInfo &I, json::Object &Obj, const std::optional &RepositoryUrl) { serializeCommonAttributes(I, Obj, RepositoryUrl); - Obj["FullName"] = I.FullName; Obj["TagType"] = getTagType(I.TagType); Obj["IsTypedef"] = I.IsTypeDef; Obj["MangledName"] = I.MangledName; diff --git a/clang-tools-extra/clang-doc/Representation.h b/clang-tools-extra/clang-doc/Representation.h index d8c2b9c0a5842..79e9bfc291c3a 100644 --- a/clang-tools-extra/clang-doc/Representation.h +++ b/clang-tools-extra/clang-doc/Representation.h @@ -437,10 +437,6 @@ struct FunctionInfo : public SymbolInfo { // (AS_public = 0, AS_protected = 1, AS_private = 2, AS_none = 3) AccessSpecifier Access = AccessSpecifier::AS_public; - // Full qualified name of this function, including namespaces and template - // specializations. - SmallString<16> FullName; - // Function Prototype SmallString<256> Prototype; @@ -460,10 +456,6 @@ struct RecordInfo : public SymbolInfo { // Type of this record (struct, class, union, interface). TagTypeKind TagType = TagTypeKind::Struct; - // Full qualified name of this record, including namespaces and template - // specializations. - SmallString<16> FullName; - // When present, this record is a template or specialization. std::optional Template; diff --git a/clang-tools-extra/clang-doc/Serialize.cpp b/clang-tools-extra/clang-doc/Serialize.cpp index 186f634dd892a..7f8691d63622f 100644 --- a/clang-tools-extra/clang-doc/Serialize.cpp +++ b/clang-tools-extra/clang-doc/Serialize.cpp @@ -178,55 +178,6 @@ static llvm::SmallString<16> getTypeAlias(const TypeAliasDecl *Alias) { return Result; } -// extract full syntax for record declaration -static llvm::SmallString<16> getRecordPrototype(const CXXRecordDecl *CXXRD) { - llvm::SmallString<16> Result; - LangOptions LangOpts; - PrintingPolicy Policy(LangOpts); - Policy.SuppressTagKeyword = false; - Policy.FullyQualifiedName = true; - Policy.IncludeNewlines = false; - llvm::raw_svector_ostream OS(Result); - if (const auto *TD = CXXRD->getDescribedClassTemplate()) { - OS << "template <"; - bool FirstParam = true; - for (const auto *Param : *TD->getTemplateParameters()) { - if (!FirstParam) - OS << ", "; - Param->print(OS, Policy); - FirstParam = false; - } - OS << ">\n"; - } - - if (CXXRD->isStruct()) - OS << "struct "; - else if (CXXRD->isClass()) - OS << "class "; - else if (CXXRD->isUnion()) - OS << "union "; - - OS << CXXRD->getNameAsString(); - - // We need to make sure we have a good enough declaration to check. In the - // case where the class is a forward declaration, we'll fail assertions in - // DeclCXX. - if (CXXRD->isCompleteDefinition() && CXXRD->getNumBases() > 0) { - OS << " : "; - bool FirstBase = true; - for (const auto &Base : CXXRD->bases()) { - if (!FirstBase) - OS << ", "; - if (Base.isVirtual()) - OS << "virtual "; - OS << getAccessSpelling(Base.getAccessSpecifier()) << " "; - OS << Base.getType().getAsString(Policy); - FirstBase = false; - } - } - return Result; -} - // A function to extract the appropriate relative path for a given info's // documentation. The path returned is a composite of the parent namespaces. // @@ -1033,7 +984,6 @@ emitInfo(const RecordDecl *D, const FullComment *FC, Location Loc, parseFields(*RI, D, PublicOnly); if (const auto *C = dyn_cast(D)) { - RI->FullName = getRecordPrototype(C); if (const TypedefNameDecl *TD = C->getTypedefNameForAnonDecl()) { RI->Name = TD->getNameAsString(); RI->IsTypeDef = true; diff --git a/clang-tools-extra/test/clang-doc/json/class.cpp b/clang-tools-extra/test/clang-doc/json/class.cpp index 20a9f218b3d79..adb1ed7511c3b 100644 --- a/clang-tools-extra/test/clang-doc/json/class.cpp +++ b/clang-tools-extra/test/clang-doc/json/class.cpp @@ -124,8 +124,6 @@ struct MyClass { // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: ], -// COM: FIXME: FullName is not emitted correctly. -// CHECK-NEXT: "FullName": "", // CHECK-NEXT: "HasEnums": true, // CHECK-NEXT: "HasPublicFunctions": true, // CHECK-NEXT: "HasPublicMembers": true, diff --git a/clang-tools-extra/unittests/clang-doc/JSONGeneratorTest.cpp b/clang-tools-extra/unittests/clang-doc/JSONGeneratorTest.cpp index 07c761fcd0685..2706a5145ebfd 100644 --- a/clang-tools-extra/unittests/clang-doc/JSONGeneratorTest.cpp +++ b/clang-tools-extra/unittests/clang-doc/JSONGeneratorTest.cpp @@ -16,8 +16,6 @@ static std::unique_ptr getJSONGenerator() { TEST(JSONGeneratorTest, emitRecordJSON) { RecordInfo I; I.Name = "Foo"; - // FIXME: FullName is not emitted correctly. - I.FullName = ""; I.IsTypeDef = false; I.Namespace.emplace_back(EmptySID, "GlobalNamespace", InfoType::IT_namespace); I.Path = "GlobalNamespace"; @@ -64,7 +62,6 @@ TEST(JSONGeneratorTest, emitRecordJSON) { { "Access": "public", "End": true, - "FullName": "", "HasPublicFunctions": true, "HasPublicMembers": true, "InfoType": "record", @@ -115,7 +112,6 @@ TEST(JSONGeneratorTest, emitRecordJSON) { "USR": "0000000000000000000000000000000000000000" } ], - "FullName": "", "HasEnums": true, "HasPublicFunctions": true, "HasRecords": true, diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 5fb56c64cf68f..63fc39ed70069 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -802,6 +802,8 @@ of different sizes and signs is forbidden in binary and ternary builtins. T __builtin_elementwise_exp(T x) returns the base-e exponential, e^x, of the specified value floating point types T __builtin_elementwise_exp2(T x) returns the base-2 exponential, 2^x, of the specified value floating point types T __builtin_elementwise_exp10(T x) returns the base-10 exponential, 10^x, of the specified value floating point types + T __builtin_elementwise_ldexp(T x, IntT y) returns the product of x and 2 raised to the power y. T: floating point types, + y must be an integer type matching the shape of x. IntT: integer types T __builtin_elementwise_sqrt(T x) return the square root of a floating-point number floating point types T __builtin_elementwise_roundeven(T x) round x to the nearest integer value in floating point format, floating point types diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 2b315031979bb..872d9f5e64c96 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -209,6 +209,8 @@ C23 Feature Support Non-comprehensive list of changes in this release ------------------------------------------------- +- Added ``__builtin_elementwise_ldexp``. + - Added ``__builtin_elementwise_fshl`` and ``__builtin_elementwise_fshr``. - ``__builtin_elementwise_abs`` can now be used in constant expression. diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 0275447e1090a..d4a3e34a43c53 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -1418,6 +1418,12 @@ def ElementwiseExp10 : Builtin { let Prototype = "void(...)"; } +def ElementwiseLdexp : Builtin { + let Spellings = ["__builtin_elementwise_ldexp"]; + let Attributes = [NoThrow, Const, CustomTypeChecking]; + let Prototype = "void(...)"; +} + def ElementwiseFloor : Builtin { let Spellings = ["__builtin_elementwise_floor"]; let Attributes = [NoThrow, Const, CustomTypeChecking]; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 0b01e5bfac759..d1069e8b7df2b 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3992,6 +3992,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_elementwise_exp10: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, Intrinsic::exp10, "elt.exp10")); + case Builtin::BI__builtin_elementwise_ldexp: { + Value *Src = EmitScalarExpr(E->getArg(0)); + Value *Exp = EmitScalarExpr(E->getArg(1)); + Value *Result = Builder.CreateLdexp(Src, Exp, {}, "elt.ldexp"); + return RValue::get(Result); + } case Builtin::BI__builtin_elementwise_log: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, Intrinsic::log, "elt.log")); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 7ed61febd16d0..8b3c879a7f1b6 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2609,6 +2609,18 @@ static ExprResult BuiltinInvoke(Sema &S, CallExpr *TheCall) { Args.drop_front(), TheCall->getRParenLoc()); } +// Performs a similar job to Sema::UsualUnaryConversions, but without any +// implicit promotion of integral/enumeration types. +static ExprResult BuiltinVectorMathConversions(Sema &S, Expr *E) { + // First, convert to an r-value. + ExprResult Res = S.DefaultFunctionArrayLvalueConversion(E); + if (Res.isInvalid()) + return ExprError(); + + // Promote floating-point types. + return S.UsualUnaryFPConversions(Res.get()); +} + ExprResult Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, CallExpr *TheCall) { @@ -3273,6 +3285,46 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, return ExprError(); break; + case Builtin::BI__builtin_elementwise_ldexp: { + if (checkArgCount(TheCall, 2)) + return ExprError(); + + ExprResult A = BuiltinVectorMathConversions(*this, TheCall->getArg(0)); + if (A.isInvalid()) + return ExprError(); + QualType TyA = A.get()->getType(); + if (checkMathBuiltinElementType(*this, A.get()->getBeginLoc(), TyA, + EltwiseBuiltinArgTyRestriction::FloatTy, 1)) + return ExprError(); + + ExprResult Exp = UsualUnaryConversions(TheCall->getArg(1)); + if (Exp.isInvalid()) + return ExprError(); + QualType TyExp = Exp.get()->getType(); + if (checkMathBuiltinElementType(*this, Exp.get()->getBeginLoc(), TyExp, + EltwiseBuiltinArgTyRestriction::IntegerTy, + 2)) + return ExprError(); + + // Check the two arguments are either scalars or vectors of equal length. + const auto *Vec0 = TyA->getAs(); + const auto *Vec1 = TyExp->getAs(); + unsigned Arg0Length = Vec0 ? Vec0->getNumElements() : 0; + unsigned Arg1Length = Vec1 ? Vec1->getNumElements() : 0; + if (Arg0Length != Arg1Length) { + Diag(Exp.get()->getBeginLoc(), + diag::err_typecheck_vector_lengths_not_equal) + << TyA << TyExp << A.get()->getSourceRange() + << Exp.get()->getSourceRange(); + return ExprError(); + } + + TheCall->setArg(0, A.get()); + TheCall->setArg(1, Exp.get()); + TheCall->setType(TyA); + break; + } + // These builtins restrict the element type to floating point // types only, and take in two arguments. case Builtin::BI__builtin_elementwise_minnum: @@ -15994,18 +16046,6 @@ void Sema::CheckAddressOfPackedMember(Expr *rhs) { _2, _3, _4)); } -// Performs a similar job to Sema::UsualUnaryConversions, but without any -// implicit promotion of integral/enumeration types. -static ExprResult BuiltinVectorMathConversions(Sema &S, Expr *E) { - // First, convert to an r-value. - ExprResult Res = S.DefaultFunctionArrayLvalueConversion(E); - if (Res.isInvalid()) - return ExprError(); - - // Promote floating-point types. - return S.UsualUnaryFPConversions(Res.get()); -} - bool Sema::PrepareBuiltinElementwiseMathOneArgCall( CallExpr *TheCall, EltwiseBuiltinArgTyRestriction ArgTyRestr) { if (checkArgCount(TheCall, 1)) diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp index d3d1f13ab1c78..5cd894af1fd65 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp @@ -578,6 +578,10 @@ class TrivialFunctionAnalysisVisitor return WithCachedResult(CS, [&]() { return VisitChildren(CS); }); } + bool VisitCoroutineBodyStmt(const CoroutineBodyStmt *CBS) { + return WithCachedResult(CBS, [&]() { return VisitChildren(CBS); }); + } + bool VisitReturnStmt(const ReturnStmt *RS) { // A return statement is allowed as long as the return value is trivial. if (auto *RV = RS->getRetValue()) diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures-co_await-assertion-failure.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures-co_await-assertion-failure.cpp new file mode 100644 index 0000000000000..a67f45700cd10 --- /dev/null +++ b/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures-co_await-assertion-failure.cpp @@ -0,0 +1,11 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=webkit.UncountedLambdaCapturesChecker -std=c++20 -verify %s +// expected-no-diagnostics + +template +void foo(Arg&& arg) +{ + [&]{ + co_await [&](auto&&... args) { + }(arg); + }(); +} diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c index e9344d8fe0b8b..2df485f0155c3 100644 --- a/clang/test/CodeGen/builtins-elementwise-math.c +++ b/clang/test/CodeGen/builtins-elementwise-math.c @@ -6,6 +6,7 @@ typedef half half2 __attribute__((ext_vector_type(2))); typedef float float2 __attribute__((ext_vector_type(2))); typedef float float4 __attribute__((ext_vector_type(4))); typedef short int si8 __attribute__((ext_vector_type(8))); +typedef int int4 __attribute__((ext_vector_type(4))); typedef unsigned int u4 __attribute__((ext_vector_type(4))); typedef double double2 __attribute__((ext_vector_type(2))); typedef double double3 __attribute__((ext_vector_type(3))); @@ -729,6 +730,36 @@ void test_builtin_elementwise_exp10(float f1, float f2, double d1, double d2, vf2 = __builtin_elementwise_exp10(vf1); } +void test_builtin_elementwise_ldexp(float f1, float f2, double d1, double d2, + float4 vf1, float4 vf2, int i1, int4 vi1, short s1, long l1) { + // CHECK-LABEL: define void @test_builtin_elementwise_ldexp( + // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 + // CHECK: [[I1:%.+]] = load i32, ptr %i1.addr, align 4 + // CHECK-NEXT: call float @llvm.ldexp.f32.i32(float [[F1]], i32 [[I1]]) + f2 = __builtin_elementwise_ldexp(f1, i1); + + // CHECK: [[F2:%.+]] = load float, ptr %f1.addr, align 4 + // CHECK: [[S1:%.+]] = load i16, ptr %s1.addr, align 2 + // CHECK: [[Ext1:%.+]] = sext i16 [[S1]] to i32 + // CHECK-NEXT: call float @llvm.ldexp.f32.i32(float [[F2]], i32 [[Ext1]]) + f2 = __builtin_elementwise_ldexp(f1, s1); + + // CHECK: [[F3:%.+]] = load float, ptr %f1.addr, align 4 + // CHECK: [[L1:%.+]] = load i64, ptr %l1.addr, align 8 + // CHECK-NEXT: call float @llvm.ldexp.f32.i64(float [[F3]], i64 [[L1]]) + f2 = __builtin_elementwise_ldexp(f1, l1); + + // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 + // CHECK: [[I2:%.+]] = load i32, ptr %i1.addr, align 4 + // CHECK-NEXT: call double @llvm.ldexp.f64.i32(double [[D1]], i32 [[I2]]) + d2 = __builtin_elementwise_ldexp(d1, i1); + + // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 + // CHECK: [[VI1:%.+]] = load <4 x i32>, ptr %vi1.addr, align 16 + // CHECK-NEXT: call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> [[VF1]], <4 x i32> [[VI1]]) + vf2 = __builtin_elementwise_ldexp(vf1, vi1); +} + void test_builtin_elementwise_floor(float f1, float f2, double d1, double d2, float4 vf1, float4 vf2) { // CHECK-LABEL: define void @test_builtin_elementwise_floor( diff --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c index f9df4a6f93e05..37be0e4ebbd28 100644 --- a/clang/test/Sema/builtins-elementwise-math.c +++ b/clang/test/Sema/builtins-elementwise-math.c @@ -645,6 +645,42 @@ void test_builtin_elementwise_exp10(int i, float f, double d, float4 v, int3 iv, // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'unsigned4' (vector of 4 'unsigned int' values))}} } +void test_builtin_elementwise_ldexp(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) { + + struct Foo s = __builtin_elementwise_ldexp(f, i); + // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'float'}} + + f = __builtin_elementwise_ldexp(); + // expected-error@-1 {{too few arguments to function call, expected 2, have 0}} + + f = __builtin_elementwise_ldexp(f); + // expected-error@-1 {{too few arguments to function call, expected 2, have 1}} + + f = __builtin_elementwise_ldexp(f, i, i); + // expected-error@-1 {{too many arguments to function call, expected 2, have 3}} + + f = __builtin_elementwise_ldexp(i, i); + // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'int')}} + + f = __builtin_elementwise_ldexp(f, f); + // expected-error@-1 {{2nd argument must be a scalar or vector of integer types (was 'float')}} + + f = __builtin_elementwise_ldexp(v, iv); + // expected-error@-1 {{vector operands do not have the same number of elements ('float4' (vector of 4 'float' values) and 'int3' (vector of 3 'int' values))}} + + v = __builtin_elementwise_ldexp(v, i); + // expected-error@-1 {{vector operands do not have the same number of elements ('float4' (vector of 4 'float' values) and 'int')}} + + v = __builtin_elementwise_ldexp(f, iv); + // expected-error@-1 {{vector operands do not have the same number of elements ('float' and 'int3' (vector of 3 'int' values))}} + + f = __builtin_elementwise_ldexp(u, i); + // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'unsigned int')}} + + f = __builtin_elementwise_ldexp(uv, i); + // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'unsigned4' (vector of 4 'unsigned int' values))}} +} + void test_builtin_elementwise_floor(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) { struct Foo s = __builtin_elementwise_floor(f); diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index d7861ac6463c8..1f75ed1d8e6a1 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -3106,8 +3106,8 @@ static Op createComputeOp( genDataOperandOperationsWithModifier( copyoutClause, converter, semanticsContext, stmtCtx, - Fortran::parser::AccDataModifier::Modifier::ReadOnly, - dataClauseOperands, mlir::acc::DataClause::acc_copyout, + Fortran::parser::AccDataModifier::Modifier::Zero, dataClauseOperands, + mlir::acc::DataClause::acc_copyout, mlir::acc::DataClause::acc_copyout_zero, async, asyncDeviceTypes, asyncOnlyDeviceTypes, /*setDeclareAttr=*/false, &dataOperandSymbolPairs); diff --git a/flang/test/Driver/gcc-triple.f90 b/flang/test/Driver/gcc-triple.f90 index 324311febf157..3aacb84bfd227 100644 --- a/flang/test/Driver/gcc-triple.f90 +++ b/flang/test/Driver/gcc-triple.f90 @@ -1,4 +1,4 @@ -!! UNSUPPORTED: system-windows +!! UNSUPPORTED: system-windows, system-aix !! Test that --gcc-triple option is working as expected. diff --git a/flang/test/Lower/OpenACC/acc-kernels-loop.f90 b/flang/test/Lower/OpenACC/acc-kernels-loop.f90 index 8d95f35b186ee..ef8dcd34807e0 100644 --- a/flang/test/Lower/OpenACC/acc-kernels-loop.f90 +++ b/flang/test/Lower/OpenACC/acc-kernels-loop.f90 @@ -360,7 +360,7 @@ subroutine acc_kernels_loop END DO ! CHECK: %[[CREATE_A:.*]] = acc.create varPtr(%[[DECLA]]#0 : !fir.ref>) -> !fir.ref> {dataClause = #acc, name = "a"} -! CHECK: %[[CREATE_B:.*]] = acc.create varPtr(%[[DECLB]]#0 : !fir.ref>) -> !fir.ref> {dataClause = #acc, name = "b"} +! CHECK: %[[CREATE_B:.*]] = acc.create varPtr(%[[DECLB]]#0 : !fir.ref>) -> !fir.ref> {dataClause = #acc, name = "b"} ! CHECK: acc.kernels {{.*}} dataOperands(%[[CREATE_A]], %[[CREATE_B]] : !fir.ref>, !fir.ref>) { ! CHECK: acc.loop {{.*}} { ! CHECK: acc.yield @@ -368,7 +368,7 @@ subroutine acc_kernels_loop ! CHECK: acc.terminator ! CHECK-NEXT: }{{$}} ! CHECK: acc.copyout accPtr(%[[CREATE_A]] : !fir.ref>) to varPtr(%[[DECLA]]#0 : !fir.ref>) {name = "a"} -! CHECK: acc.copyout accPtr(%[[CREATE_B]] : !fir.ref>) to varPtr(%[[DECLB]]#0 : !fir.ref>) {name = "b"} +! CHECK: acc.copyout accPtr(%[[CREATE_B]] : !fir.ref>) to varPtr(%[[DECLB]]#0 : !fir.ref>) {dataClause = #acc, name = "b"} !$acc kernels loop create(b) create(zero: a) DO i = 1, n diff --git a/flang/test/Lower/OpenACC/acc-kernels.f90 b/flang/test/Lower/OpenACC/acc-kernels.f90 index b90870db25095..65079e693c74b 100644 --- a/flang/test/Lower/OpenACC/acc-kernels.f90 +++ b/flang/test/Lower/OpenACC/acc-kernels.f90 @@ -222,13 +222,13 @@ subroutine acc_kernels !$acc end kernels ! CHECK: %[[CREATE_A:.*]] = acc.create varPtr(%[[DECLA]]#0 : !fir.ref>) -> !fir.ref> {dataClause = #acc, name = "a"} -! CHECK: %[[CREATE_B:.*]] = acc.create varPtr(%[[DECLB]]#0 : !fir.ref>) -> !fir.ref> {dataClause = #acc, name = "b"} +! CHECK: %[[CREATE_B:.*]] = acc.create varPtr(%[[DECLB]]#0 : !fir.ref>) -> !fir.ref> {dataClause = #acc, name = "b"} ! CHECK: %[[CREATE_C:.*]] = acc.create varPtr(%[[DECLC]]#0 : !fir.ref>) -> !fir.ref> {dataClause = #acc, name = "c"} ! CHECK: acc.kernels dataOperands(%[[CREATE_A]], %[[CREATE_B]], %[[CREATE_C]] : !fir.ref>, !fir.ref>, !fir.ref>) { ! CHECK: acc.terminator ! CHECK-NEXT: }{{$}} ! CHECK: acc.copyout accPtr(%[[CREATE_A]] : !fir.ref>) to varPtr(%[[DECLA]]#0 : !fir.ref>) {name = "a"} -! CHECK: acc.copyout accPtr(%[[CREATE_B]] : !fir.ref>) to varPtr(%[[DECLB]]#0 : !fir.ref>) {name = "b"} +! CHECK: acc.copyout accPtr(%[[CREATE_B]] : !fir.ref>) to varPtr(%[[DECLB]]#0 : !fir.ref>) {dataClause = #acc, name = "b"} ! CHECK: acc.copyout accPtr(%[[CREATE_C]] : !fir.ref>) to varPtr(%[[DECLC]]#0 : !fir.ref>) {name = "c"} !$acc kernels create(a, b) create(zero: c) diff --git a/flang/test/Lower/OpenACC/acc-parallel-loop.f90 b/flang/test/Lower/OpenACC/acc-parallel-loop.f90 index 8086080bd3797..648b8298f0965 100644 --- a/flang/test/Lower/OpenACC/acc-parallel-loop.f90 +++ b/flang/test/Lower/OpenACC/acc-parallel-loop.f90 @@ -360,7 +360,7 @@ subroutine acc_parallel_loop END DO ! CHECK: %[[CREATE_A:.*]] = acc.create varPtr(%[[DECLA]]#0 : !fir.ref>) -> !fir.ref> {dataClause = #acc, name = "a"} -! CHECK: %[[CREATE_B:.*]] = acc.create varPtr(%[[DECLB]]#0 : !fir.ref>) -> !fir.ref> {dataClause = #acc, name = "b"} +! CHECK: %[[CREATE_B:.*]] = acc.create varPtr(%[[DECLB]]#0 : !fir.ref>) -> !fir.ref> {dataClause = #acc, name = "b"} ! CHECK: acc.parallel {{.*}} dataOperands(%[[CREATE_A]], %[[CREATE_B]] : !fir.ref>, !fir.ref>) { ! CHECK: acc.loop {{.*}} { ! CHECK: acc.yield @@ -368,7 +368,7 @@ subroutine acc_parallel_loop ! CHECK: acc.yield ! CHECK-NEXT: }{{$}} ! CHECK: acc.copyout accPtr(%[[CREATE_A]] : !fir.ref>) to varPtr(%[[DECLA]]#0 : !fir.ref>) {name = "a"} -! CHECK: acc.copyout accPtr(%[[CREATE_B]] : !fir.ref>) to varPtr(%[[DECLB]]#0 : !fir.ref>) {name = "b"} +! CHECK: acc.copyout accPtr(%[[CREATE_B]] : !fir.ref>) to varPtr(%[[DECLB]]#0 : !fir.ref>) {dataClause = #acc, name = "b"} !$acc parallel loop create(b) create(zero: a) DO i = 1, n diff --git a/flang/test/Lower/OpenACC/acc-parallel.f90 b/flang/test/Lower/OpenACC/acc-parallel.f90 index 1eae106ba61b2..fa98fb1255f1a 100644 --- a/flang/test/Lower/OpenACC/acc-parallel.f90 +++ b/flang/test/Lower/OpenACC/acc-parallel.f90 @@ -252,13 +252,13 @@ subroutine acc_parallel !$acc end parallel ! CHECK: %[[CREATE_A:.*]] = acc.create varPtr(%[[DECLA]]#0 : !fir.ref>) -> !fir.ref> {dataClause = #acc, name = "a"} -! CHECK: %[[CREATE_B:.*]] = acc.create varPtr(%[[DECLB]]#0 : !fir.ref>) -> !fir.ref> {dataClause = #acc, name = "b"} +! CHECK: %[[CREATE_B:.*]] = acc.create varPtr(%[[DECLB]]#0 : !fir.ref>) -> !fir.ref> {dataClause = #acc, name = "b"} ! CHECK: %[[CREATE_C:.*]] = acc.create varPtr(%[[DECLC]]#0 : !fir.ref>) -> !fir.ref> {dataClause = #acc, name = "c"} ! CHECK: acc.parallel dataOperands(%[[CREATE_A]], %[[CREATE_B]], %[[CREATE_C]] : !fir.ref>, !fir.ref>, !fir.ref>) { ! CHECK: acc.yield ! CHECK-NEXT: }{{$}} ! CHECK: acc.copyout accPtr(%[[CREATE_A]] : !fir.ref>) to varPtr(%[[DECLA]]#0 : !fir.ref>) {name = "a"} -! CHECK: acc.copyout accPtr(%[[CREATE_B]] : !fir.ref>) to varPtr(%[[DECLB]]#0 : !fir.ref>) {name = "b"} +! CHECK: acc.copyout accPtr(%[[CREATE_B]] : !fir.ref>) to varPtr(%[[DECLB]]#0 : !fir.ref>) {dataClause = #acc, name = "b"} ! CHECK: acc.copyout accPtr(%[[CREATE_C]] : !fir.ref>) to varPtr(%[[DECLC]]#0 : !fir.ref>) {name = "c"} !$acc parallel create(a, b) create(zero: c) diff --git a/flang/test/Lower/OpenACC/acc-serial-loop.f90 b/flang/test/Lower/OpenACC/acc-serial-loop.f90 index cad0ee73f6cc5..15ae69ab86965 100644 --- a/flang/test/Lower/OpenACC/acc-serial-loop.f90 +++ b/flang/test/Lower/OpenACC/acc-serial-loop.f90 @@ -301,7 +301,7 @@ subroutine acc_serial_loop END DO ! CHECK: %[[CREATE_A:.*]] = acc.create varPtr(%[[DECLA]]#0 : !fir.ref>) -> !fir.ref> {dataClause = #acc, name = "a"} -! CHECK: %[[CREATE_B:.*]] = acc.create varPtr(%[[DECLB]]#0 : !fir.ref>) -> !fir.ref> {dataClause = #acc, name = "b"} +! CHECK: %[[CREATE_B:.*]] = acc.create varPtr(%[[DECLB]]#0 : !fir.ref>) -> !fir.ref> {dataClause = #acc, name = "b"} ! CHECK: acc.serial {{.*}} dataOperands(%[[CREATE_A]], %[[CREATE_B]] : !fir.ref>, !fir.ref>) { ! CHECK: acc.loop {{.*}} { ! CHECK: acc.yield @@ -309,7 +309,7 @@ subroutine acc_serial_loop ! CHECK: acc.yield ! CHECK-NEXT: }{{$}} ! CHECK: acc.copyout accPtr(%[[CREATE_A]] : !fir.ref>) to varPtr(%[[DECLA]]#0 : !fir.ref>) {name = "a"} -! CHECK: acc.copyout accPtr(%[[CREATE_B]] : !fir.ref>) to varPtr(%[[DECLB]]#0 : !fir.ref>) {name = "b"} +! CHECK: acc.copyout accPtr(%[[CREATE_B]] : !fir.ref>) to varPtr(%[[DECLB]]#0 : !fir.ref>) {dataClause = #acc, name = "b"} !$acc serial loop create(b) create(zero: a) DO i = 1, n diff --git a/flang/test/Lower/OpenACC/acc-serial.f90 b/flang/test/Lower/OpenACC/acc-serial.f90 index 1e4f32fd209ef..1eaa0e4994b05 100644 --- a/flang/test/Lower/OpenACC/acc-serial.f90 +++ b/flang/test/Lower/OpenACC/acc-serial.f90 @@ -201,13 +201,13 @@ subroutine acc_serial !$acc end serial ! CHECK: %[[CREATE_A:.*]] = acc.create varPtr(%[[DECLA]]#0 : !fir.ref>) -> !fir.ref> {dataClause = #acc, name = "a"} -! CHECK: %[[CREATE_B:.*]] = acc.create varPtr(%[[DECLB]]#0 : !fir.ref>) -> !fir.ref> {dataClause = #acc, name = "b"} +! CHECK: %[[CREATE_B:.*]] = acc.create varPtr(%[[DECLB]]#0 : !fir.ref>) -> !fir.ref> {dataClause = #acc, name = "b"} ! CHECK: %[[CREATE_C:.*]] = acc.create varPtr(%[[DECLC]]#0 : !fir.ref>) -> !fir.ref> {dataClause = #acc, name = "c"} ! CHECK: acc.serial dataOperands(%[[CREATE_A]], %[[CREATE_B]], %[[CREATE_C]] : !fir.ref>, !fir.ref>, !fir.ref>) { ! CHECK: acc.yield ! CHECK-NEXT: }{{$}} ! CHECK: acc.copyout accPtr(%[[CREATE_A]] : !fir.ref>) to varPtr(%[[DECLA]]#0 : !fir.ref>) {name = "a"} -! CHECK: acc.copyout accPtr(%[[CREATE_B]] : !fir.ref>) to varPtr(%[[DECLB]]#0 : !fir.ref>) {name = "b"} +! CHECK: acc.copyout accPtr(%[[CREATE_B]] : !fir.ref>) to varPtr(%[[DECLB]]#0 : !fir.ref>) {dataClause = #acc, name = "b"} ! CHECK: acc.copyout accPtr(%[[CREATE_C]] : !fir.ref>) to varPtr(%[[DECLC]]#0 : !fir.ref>) {name = "c"} !$acc serial create(a, b) create(zero: c) diff --git a/libc/src/fcntl/linux/creat.cpp b/libc/src/fcntl/linux/creat.cpp index 71412a8e68c53..e74cef299b59f 100644 --- a/libc/src/fcntl/linux/creat.cpp +++ b/libc/src/fcntl/linux/creat.cpp @@ -27,11 +27,11 @@ LLVM_LIBC_FUNCTION(int, creat, (const char *path, int mode_flags)) { SYS_openat, AT_FDCWD, path, O_CREAT | O_WRONLY | O_TRUNC, mode_flags); #endif - if (fd > 0) - return fd; - - libc_errno = -fd; - return -1; + if (fd < 0) { + libc_errno = -fd; + return -1; + } + return fd; } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/fcntl/linux/openat.cpp b/libc/src/fcntl/linux/openat.cpp index b47ad1fb3bb0f..b80abe532e51c 100644 --- a/libc/src/fcntl/linux/openat.cpp +++ b/libc/src/fcntl/linux/openat.cpp @@ -32,11 +32,11 @@ LLVM_LIBC_FUNCTION(int, openat, (int dfd, const char *path, int flags, ...)) { int fd = LIBC_NAMESPACE::syscall_impl(SYS_openat, dfd, path, flags, mode_flags); - if (fd > 0) - return fd; - - libc_errno = -fd; - return -1; + if (fd < 0) { + libc_errno = -fd; + return -1; + } + return fd; } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt index b0a6ef1e291b5..c75c8b11be2b5 100644 --- a/libc/src/stdio/CMakeLists.txt +++ b/libc/src/stdio/CMakeLists.txt @@ -125,6 +125,10 @@ add_entrypoint_object( DEPENDS libc.src.stdio.printf_core.printf_main libc.src.stdio.printf_core.writer + libc.src.stdio.printf_core.core_structs + libc.src.stdio.printf_core.error_mapper + libc.src.__support.libc_errno + libc.src.__support.CPP.limits ) add_entrypoint_object( @@ -136,6 +140,10 @@ add_entrypoint_object( DEPENDS libc.src.stdio.printf_core.printf_main libc.src.stdio.printf_core.writer + libc.src.stdio.printf_core.core_structs + libc.src.stdio.printf_core.error_mapper + libc.src.__support.libc_errno + libc.src.__support.CPP.limits ) add_entrypoint_object( @@ -146,6 +154,10 @@ add_entrypoint_object( asprintf.h DEPENDS libc.src.stdio.printf_core.vasprintf_internal + libc.src.stdio.printf_core.core_structs + libc.src.stdio.printf_core.error_mapper + libc.src.__support.libc_errno + libc.src.__support.CPP.limits ) add_entrypoint_object( @@ -157,6 +169,10 @@ add_entrypoint_object( DEPENDS libc.src.stdio.printf_core.printf_main libc.src.stdio.printf_core.writer + libc.src.stdio.printf_core.core_structs + libc.src.stdio.printf_core.error_mapper + libc.src.__support.libc_errno + libc.src.__support.CPP.limits ) add_entrypoint_object( @@ -168,6 +184,10 @@ add_entrypoint_object( DEPENDS libc.src.stdio.printf_core.printf_main libc.src.stdio.printf_core.writer + libc.src.stdio.printf_core.core_structs + libc.src.stdio.printf_core.error_mapper + libc.src.__support.libc_errno + libc.src.__support.CPP.limits ) add_entrypoint_object( @@ -178,6 +198,10 @@ add_entrypoint_object( vasprintf.h DEPENDS libc.src.stdio.printf_core.vasprintf_internal + libc.src.stdio.printf_core.core_structs + libc.src.stdio.printf_core.error_mapper + libc.src.__support.libc_errno + libc.src.__support.CPP.limits ) add_subdirectory(printf_core) diff --git a/libc/src/stdio/asprintf.cpp b/libc/src/stdio/asprintf.cpp index f8cfb74ce48ea..0991dfca6a059 100644 --- a/libc/src/stdio/asprintf.cpp +++ b/libc/src/stdio/asprintf.cpp @@ -7,8 +7,12 @@ //===----------------------------------------------------------------------===// #include "src/stdio/asprintf.h" +#include "src/__support/CPP/limits.h" #include "src/__support/arg_list.h" +#include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" +#include "src/stdio/printf_core/core_structs.h" +#include "src/stdio/printf_core/error_mapper.h" #include "src/stdio/printf_core/vasprintf_internal.h" namespace LIBC_NAMESPACE_DECL { @@ -22,8 +26,18 @@ LLVM_LIBC_FUNCTION(int, asprintf, // and pointer semantics, as well as handling // destruction automatically. va_end(vlist); - int ret = printf_core::vasprintf_internal(buffer, format, args); - return ret; + auto ret_val = printf_core::vasprintf_internal(buffer, format, args); + if (!ret_val.has_value()) { + libc_errno = printf_core::internal_error_to_errno(ret_val.error()); + return -1; + } + if (ret_val.value() > static_cast(cpp::numeric_limits::max())) { + libc_errno = + printf_core::internal_error_to_errno(-printf_core::OVERFLOW_ERROR); + return -1; + } + + return static_cast(ret_val.value()); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/baremetal/CMakeLists.txt b/libc/src/stdio/baremetal/CMakeLists.txt index 548938f885c94..bfeff0e2b5880 100644 --- a/libc/src/stdio/baremetal/CMakeLists.txt +++ b/libc/src/stdio/baremetal/CMakeLists.txt @@ -29,8 +29,12 @@ add_entrypoint_object( DEPENDS libc.src.stdio.printf_core.printf_main libc.src.stdio.printf_core.writer + libc.src.stdio.printf_core.error_mapper + libc.src.stdio.printf_core.core_structs libc.src.__support.arg_list libc.src.__support.OSUtil.osutil + libc.src.__support.libc_errno + libc.src.__support.CPP.limits ) add_entrypoint_object( @@ -87,8 +91,12 @@ add_entrypoint_object( DEPENDS libc.src.stdio.printf_core.printf_main libc.src.stdio.printf_core.writer + libc.src.stdio.printf_core.error_mapper + libc.src.stdio.printf_core.core_structs libc.src.__support.arg_list libc.src.__support.OSUtil.osutil + libc.src.__support.libc_errno + libc.src.__support.CPP.limits ) add_entrypoint_object( diff --git a/libc/src/stdio/baremetal/printf.cpp b/libc/src/stdio/baremetal/printf.cpp index 7253c6549a4e4..5a9b19ff20471 100644 --- a/libc/src/stdio/baremetal/printf.cpp +++ b/libc/src/stdio/baremetal/printf.cpp @@ -7,10 +7,13 @@ //===----------------------------------------------------------------------===// #include "src/stdio/printf.h" +#include "src/__support/CPP/limits.h" #include "src/__support/OSUtil/io.h" #include "src/__support/arg_list.h" +#include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" #include "src/stdio/printf_core/core_structs.h" +#include "src/stdio/printf_core/error_mapper.h" #include "src/stdio/printf_core/printf_main.h" #include "src/stdio/printf_core/writer.h" @@ -42,13 +45,25 @@ LLVM_LIBC_FUNCTION(int, printf, (const char *__restrict format, ...)) { buffer, BUFF_SIZE, &stdout_write_hook, nullptr); printf_core::Writer writer(wb); - int retval = printf_core::printf_main(&writer, format, args); + auto retval = printf_core::printf_main(&writer, format, args); + if (!retval.has_value()) { + libc_errno = printf_core::internal_error_to_errno(retval.error()); + return -1; + } int flushval = wb.overflow_write(""); - if (flushval != printf_core::WRITE_OK) - retval = flushval; + if (flushval != printf_core::WRITE_OK) { + libc_errno = printf_core::internal_error_to_errno(-flushval); + return -1; + } - return retval; + if (retval.value() > static_cast(cpp::numeric_limits::max())) { + libc_errno = + printf_core::internal_error_to_errno(-printf_core::OVERFLOW_ERROR); + return -1; + } + + return static_cast(retval.value()); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/baremetal/vprintf.cpp b/libc/src/stdio/baremetal/vprintf.cpp index ab02533f14911..c172b368d15f3 100644 --- a/libc/src/stdio/baremetal/vprintf.cpp +++ b/libc/src/stdio/baremetal/vprintf.cpp @@ -7,10 +7,13 @@ //===----------------------------------------------------------------------===// #include "src/stdio/vprintf.h" +#include "src/__support/CPP/limits.h" #include "src/__support/OSUtil/io.h" #include "src/__support/arg_list.h" +#include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" #include "src/stdio/printf_core/core_structs.h" +#include "src/stdio/printf_core/error_mapper.h" #include "src/stdio/printf_core/printf_main.h" #include "src/stdio/printf_core/writer.h" @@ -40,13 +43,25 @@ LLVM_LIBC_FUNCTION(int, vprintf, buffer, BUFF_SIZE, &stdout_write_hook, nullptr); printf_core::Writer writer(wb); - int retval = printf_core::printf_main(&writer, format, args); + auto retval = printf_core::printf_main(&writer, format, args); + if (!retval.has_value()) { + libc_errno = printf_core::internal_error_to_errno(retval.error()); + return -1; + } int flushval = wb.overflow_write(""); - if (flushval != printf_core::WRITE_OK) - retval = flushval; + if (flushval != printf_core::WRITE_OK) { + libc_errno = printf_core::internal_error_to_errno(-flushval); + return -1; + } - return retval; + if (retval.value() > static_cast(cpp::numeric_limits::max())) { + libc_errno = + printf_core::internal_error_to_errno(-printf_core::OVERFLOW_ERROR); + return -1; + } + + return static_cast(retval.value()); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/generic/CMakeLists.txt b/libc/src/stdio/generic/CMakeLists.txt index 6361822b61999..71055edea3d9e 100644 --- a/libc/src/stdio/generic/CMakeLists.txt +++ b/libc/src/stdio/generic/CMakeLists.txt @@ -393,7 +393,11 @@ add_generic_entrypoint_object( list(APPEND fprintf_deps libc.hdr.types.FILE libc.src.__support.arg_list + libc.src.__support.CPP.limits + libc.src.__support.libc_errno libc.src.stdio.printf_core.vfprintf_internal + libc.src.stdio.printf_core.core_structs + libc.src.stdio.printf_core.error_mapper ) if(LLVM_LIBC_FULL_BUILD) diff --git a/libc/src/stdio/generic/fprintf.cpp b/libc/src/stdio/generic/fprintf.cpp index 087aeadfc52c5..b2033901557a0 100644 --- a/libc/src/stdio/generic/fprintf.cpp +++ b/libc/src/stdio/generic/fprintf.cpp @@ -8,9 +8,12 @@ #include "src/stdio/fprintf.h" +#include "src/__support/CPP/limits.h" #include "src/__support/File/file.h" #include "src/__support/arg_list.h" #include "src/__support/macros/config.h" +#include "src/stdio/printf_core/core_structs.h" +#include "src/stdio/printf_core/error_mapper.h" #include "src/stdio/printf_core/vfprintf_internal.h" #include "hdr/types/FILE.h" @@ -27,8 +30,18 @@ LLVM_LIBC_FUNCTION(int, fprintf, // and pointer semantics, as well as handling // destruction automatically. va_end(vlist); - int ret_val = printf_core::vfprintf_internal(stream, format, args); - return ret_val; + auto ret_val = printf_core::vfprintf_internal(stream, format, args); + if (!ret_val.has_value()) { + libc_errno = printf_core::internal_error_to_errno(ret_val.error()); + return -1; + } + if (ret_val.value() > static_cast(cpp::numeric_limits::max())) { + libc_errno = + printf_core::internal_error_to_errno(-printf_core::OVERFLOW_ERROR); + return -1; + } + + return static_cast(ret_val.value()); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/generic/printf.cpp b/libc/src/stdio/generic/printf.cpp index bb7c7c86f843f..8d159d5c70870 100644 --- a/libc/src/stdio/generic/printf.cpp +++ b/libc/src/stdio/generic/printf.cpp @@ -8,9 +8,12 @@ #include "src/stdio/printf.h" +#include "src/__support/CPP/limits.h" #include "src/__support/File/file.h" #include "src/__support/arg_list.h" #include "src/__support/macros/config.h" +#include "src/stdio/printf_core/core_structs.h" +#include "src/stdio/printf_core/error_mapper.h" #include "src/stdio/printf_core/vfprintf_internal.h" #include "hdr/types/FILE.h" @@ -31,9 +34,19 @@ LLVM_LIBC_FUNCTION(int, printf, (const char *__restrict format, ...)) { // and pointer semantics, as well as handling // destruction automatically. va_end(vlist); - int ret_val = printf_core::vfprintf_internal( + auto ret_val = printf_core::vfprintf_internal( reinterpret_cast<::FILE *>(PRINTF_STDOUT), format, args); - return ret_val; + if (!ret_val.has_value()) { + libc_errno = printf_core::internal_error_to_errno(ret_val.error()); + return -1; + } + if (ret_val.value() > static_cast(cpp::numeric_limits::max())) { + libc_errno = + printf_core::internal_error_to_errno(-printf_core::OVERFLOW_ERROR); + return -1; + } + + return static_cast(ret_val.value()); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/generic/vfprintf.cpp b/libc/src/stdio/generic/vfprintf.cpp index 01f4265f118a6..a26f082ed9347 100644 --- a/libc/src/stdio/generic/vfprintf.cpp +++ b/libc/src/stdio/generic/vfprintf.cpp @@ -8,9 +8,12 @@ #include "src/stdio/vfprintf.h" +#include "src/__support/CPP/limits.h" #include "src/__support/File/file.h" #include "src/__support/arg_list.h" #include "src/__support/macros/config.h" +#include "src/stdio/printf_core/core_structs.h" +#include "src/stdio/printf_core/error_mapper.h" #include "src/stdio/printf_core/vfprintf_internal.h" #include "hdr/types/FILE.h" @@ -24,8 +27,18 @@ LLVM_LIBC_FUNCTION(int, vfprintf, internal::ArgList args(vlist); // This holder class allows for easier copying // and pointer semantics, as well as handling // destruction automatically. - int ret_val = printf_core::vfprintf_internal(stream, format, args); - return ret_val; + auto ret_val = printf_core::vfprintf_internal(stream, format, args); + if (!ret_val.has_value()) { + libc_errno = printf_core::internal_error_to_errno(ret_val.error()); + return -1; + } + if (ret_val.value() > static_cast(cpp::numeric_limits::max())) { + libc_errno = + printf_core::internal_error_to_errno(-printf_core::OVERFLOW_ERROR); + return -1; + } + + return static_cast(ret_val.value()); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/generic/vprintf.cpp b/libc/src/stdio/generic/vprintf.cpp index 08d71515646ed..ae2160219f2bb 100644 --- a/libc/src/stdio/generic/vprintf.cpp +++ b/libc/src/stdio/generic/vprintf.cpp @@ -8,9 +8,12 @@ #include "src/stdio/vprintf.h" +#include "src/__support/CPP/limits.h" #include "src/__support/File/file.h" #include "src/__support/arg_list.h" #include "src/__support/macros/config.h" +#include "src/stdio/printf_core/core_structs.h" +#include "src/stdio/printf_core/error_mapper.h" #include "src/stdio/printf_core/vfprintf_internal.h" #include "hdr/types/FILE.h" @@ -29,9 +32,19 @@ LLVM_LIBC_FUNCTION(int, vprintf, internal::ArgList args(vlist); // This holder class allows for easier copying // and pointer semantics, as well as handling // destruction automatically. - int ret_val = printf_core::vfprintf_internal( + auto ret_val = printf_core::vfprintf_internal( reinterpret_cast<::FILE *>(PRINTF_STDOUT), format, args); - return ret_val; + if (!ret_val.has_value()) { + libc_errno = printf_core::internal_error_to_errno(ret_val.error()); + return -1; + } + if (ret_val.value() > static_cast(cpp::numeric_limits::max())) { + libc_errno = + printf_core::internal_error_to_errno(-printf_core::OVERFLOW_ERROR); + return -1; + } + + return static_cast(ret_val.value()); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt index ee66145e60156..624129b2b36e7 100644 --- a/libc/src/stdio/printf_core/CMakeLists.txt +++ b/libc/src/stdio/printf_core/CMakeLists.txt @@ -32,6 +32,17 @@ if(printf_config_copts) list(PREPEND printf_config_copts "COMPILE_OPTIONS") endif() +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS}) + add_subdirectory(${LIBC_TARGET_OS}) +else() + add_subdirectory(generic) +endif() + +set(target_error_mapper libc.src.stdio.printf_core.${LIBC_TARGET_OS}.error_mapper) +if(NOT TARGET ${target_error_mapper}) + set(target_error_mapper libc.src.stdio.printf_core.generic.error_mapper) +endif() + add_header_library( printf_config HDRS @@ -47,6 +58,7 @@ add_header_library( libc.include.inttypes libc.src.__support.CPP.string_view libc.src.__support.FPUtil.fp_bits + libc.hdr.errno_macros ) add_header_library( @@ -125,6 +137,7 @@ add_header_library( .writer .core_structs libc.src.__support.arg_list + libc.src.__support.error_or ) add_header_library( @@ -136,10 +149,20 @@ add_header_library( libc.hdr.func.free libc.hdr.func.realloc libc.src.__support.arg_list + libc.src.__support.error_or libc.src.stdio.printf_core.printf_main libc.src.stdio.printf_core.writer ) +add_header_library( + error_mapper + HDRS + error_mapper.h + DEPENDS + ${target_error_mapper} + libc.src.__support.macros.properties.architectures +) + if(NOT (TARGET libc.src.__support.File.file) AND LLVM_LIBC_FULL_BUILD) # Not all platforms have a file implementation. If file is unvailable, and a # full build is requested, then we must skip all file based printf sections. @@ -152,8 +175,10 @@ add_header_library( vfprintf_internal.h DEPENDS libc.src.__support.File.file + libc.src.__support.error_or libc.src.__support.arg_list libc.src.stdio.printf_core.printf_main libc.src.stdio.printf_core.writer ${use_system_file} ) + diff --git a/libc/src/stdio/printf_core/core_structs.h b/libc/src/stdio/printf_core/core_structs.h index e27f77b6b594a..0d41f2244d8da 100644 --- a/libc/src/stdio/printf_core/core_structs.h +++ b/libc/src/stdio/printf_core/core_structs.h @@ -132,14 +132,17 @@ template LIBC_INLINE constexpr TypeDesc type_desc_from_type() { // This is the value to be returned by conversions when no error has occurred. constexpr int WRITE_OK = 0; -// These are the printf return values for when an error has occurred. They are -// all negative, and should be distinct. -constexpr int FILE_WRITE_ERROR = -1; -constexpr int FILE_STATUS_ERROR = -2; -constexpr int NULLPTR_WRITE_ERROR = -3; -constexpr int INT_CONVERSION_ERROR = -4; -constexpr int FIXED_POINT_CONVERSION_ERROR = -5; -constexpr int ALLOCATION_ERROR = -6; +// These are the error return values used by the printf engine when an +// error has occurred. They are all large negative, distinct values starting +// from -1000 to not overlap with system errors. +constexpr int FILE_WRITE_ERROR = -1001; +constexpr int FILE_STATUS_ERROR = -1002; +constexpr int NULLPTR_WRITE_ERROR = -1003; +constexpr int INT_CONVERSION_ERROR = -1004; +constexpr int FIXED_POINT_CONVERSION_ERROR = -1005; +constexpr int ALLOCATION_ERROR = -1006; +constexpr int OVERFLOW_ERROR = -1007; + } // namespace printf_core } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/printf_core/error_mapper.h b/libc/src/stdio/printf_core/error_mapper.h new file mode 100644 index 0000000000000..23030930133a1 --- /dev/null +++ b/libc/src/stdio/printf_core/error_mapper.h @@ -0,0 +1,21 @@ +//===-- Error mapper for printf ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_ERROR_MAPPER_H +#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_ERROR_MAPPER_H + +#include "src/__support/macros/properties/architectures.h" + +// Maps internal errors to the available errnos on the platform. +#if defined(__linux__) +#include "linux/error_mapper.h" +#else +#include "generic/error_mapper.h" +#endif + +#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_ERROR_MAPPER_H diff --git a/libc/src/stdio/printf_core/generic/CMakeLists.txt b/libc/src/stdio/printf_core/generic/CMakeLists.txt new file mode 100644 index 0000000000000..2f0143d992e31 --- /dev/null +++ b/libc/src/stdio/printf_core/generic/CMakeLists.txt @@ -0,0 +1,8 @@ +add_header_library( + error_mapper + HDRS + error_mapper.h + DEPENDS + libc.src.stdio.printf_core.core_structs + libc.hdr.errno_macros +) diff --git a/libc/src/stdio/printf_core/generic/error_mapper.h b/libc/src/stdio/printf_core/generic/error_mapper.h new file mode 100644 index 0000000000000..d8cdd2cc2dbaa --- /dev/null +++ b/libc/src/stdio/printf_core/generic/error_mapper.h @@ -0,0 +1,49 @@ +//===-- Generic implementation of error mapper ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_GENERIC_ERROR_MAPPER_H +#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_GENERIC_ERROR_MAPPER_H + +#include "hdr/errno_macros.h" +#include "src/stdio/printf_core/core_structs.h" +#include "src/stdio/printf_core/error_mapper.h" + +namespace LIBC_NAMESPACE_DECL { +namespace printf_core { + +LIBC_INLINE static int internal_error_to_errno(int internal_error) { + // System error occured, return error as is. + if (internal_error < 1001 && internal_error > 0) { + return internal_error; + } + + // Map internal error to the available C standard errnos. + switch (-internal_error) { + case WRITE_OK: + return 0; + case FILE_WRITE_ERROR: + case FILE_STATUS_ERROR: + case NULLPTR_WRITE_ERROR: + case ALLOCATION_ERROR: + return EDOM; + case INT_CONVERSION_ERROR: + case FIXED_POINT_CONVERSION_ERROR: + case OVERFLOW_ERROR: + return ERANGE; + default: + LIBC_ASSERT( + false && + "Invalid internal printf error code passed to internal_error_to_errno"); + return EDOM; + } +} + +} // namespace printf_core +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_GENERIC_ERROR_MAPPER_H diff --git a/libc/src/stdio/printf_core/linux/CMakeLists.txt b/libc/src/stdio/printf_core/linux/CMakeLists.txt new file mode 100644 index 0000000000000..2f0143d992e31 --- /dev/null +++ b/libc/src/stdio/printf_core/linux/CMakeLists.txt @@ -0,0 +1,8 @@ +add_header_library( + error_mapper + HDRS + error_mapper.h + DEPENDS + libc.src.stdio.printf_core.core_structs + libc.hdr.errno_macros +) diff --git a/libc/src/stdio/printf_core/linux/error_mapper.h b/libc/src/stdio/printf_core/linux/error_mapper.h new file mode 100644 index 0000000000000..3c2fe663072d0 --- /dev/null +++ b/libc/src/stdio/printf_core/linux/error_mapper.h @@ -0,0 +1,54 @@ +//===-- Linux implementation of error mapper --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_LINUX_ERROR_MAPPER_H +#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_LINUX_ERROR_MAPPER_H + +#include "hdr/errno_macros.h" +#include "src/stdio/printf_core/core_structs.h" +#include "src/stdio/printf_core/error_mapper.h" + +namespace LIBC_NAMESPACE_DECL { +namespace printf_core { + +LIBC_INLINE static int internal_error_to_errno(int internal_error) { + // System error occured, return error as is. + if (internal_error < 1001 && internal_error > 0) { + return internal_error; + } + + // Map internal error to POSIX errnos. + switch (-internal_error) { + case WRITE_OK: + return 0; + case FILE_WRITE_ERROR: + return EIO; + case FILE_STATUS_ERROR: + return EIO; + case NULLPTR_WRITE_ERROR: + return EINVAL; + case INT_CONVERSION_ERROR: + return ERANGE; + case FIXED_POINT_CONVERSION_ERROR: + return EINVAL; + case ALLOCATION_ERROR: + return ENOMEM; + case OVERFLOW_ERROR: + return EOVERFLOW; + default: + LIBC_ASSERT( + false && + "Invalid internal printf error code passed to internal_error_to_errno"); + return EINVAL; + } +} + +} // namespace printf_core +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_LINUX_ERROR_MAPPER_H diff --git a/libc/src/stdio/printf_core/printf_main.h b/libc/src/stdio/printf_core/printf_main.h index 57f29858d5298..1c7a7237c097d 100644 --- a/libc/src/stdio/printf_core/printf_main.h +++ b/libc/src/stdio/printf_core/printf_main.h @@ -10,6 +10,7 @@ #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PRINTF_MAIN_H #include "src/__support/arg_list.h" +#include "src/__support/error_or.h" #include "src/__support/macros/config.h" #include "src/stdio/printf_core/converter.h" #include "src/stdio/printf_core/core_structs.h" @@ -22,8 +23,9 @@ namespace LIBC_NAMESPACE_DECL { namespace printf_core { template -int printf_main(Writer *writer, const char *__restrict str, - internal::ArgList &args) { +ErrorOr printf_main(Writer *writer, + const char *__restrict str, + internal::ArgList &args) { Parser parser(str, args); int result = 0; for (FormatSection cur_section = parser.get_next_section(); @@ -33,9 +35,8 @@ int printf_main(Writer *writer, const char *__restrict str, result = convert(writer, cur_section); else result = writer->write(cur_section.raw_string); - if (result < 0) - return result; + return Error(-result); } return writer->get_chars_written(); diff --git a/libc/src/stdio/printf_core/vasprintf_internal.h b/libc/src/stdio/printf_core/vasprintf_internal.h index 283d8df2810fb..41df17b67f35b 100644 --- a/libc/src/stdio/printf_core/vasprintf_internal.h +++ b/libc/src/stdio/printf_core/vasprintf_internal.h @@ -10,6 +10,7 @@ #include "hdr/func/malloc.h" #include "hdr/func/realloc.h" #include "src/__support/arg_list.h" +#include "src/__support/error_or.h" #include "src/stdio/printf_core/core_structs.h" #include "src/stdio/printf_core/printf_main.h" #include "src/stdio/printf_core/writer.h" @@ -29,7 +30,7 @@ LIBC_INLINE int resize_overflow_hook(cpp::string_view new_str, void *target) { if (new_buff == nullptr) { if (wb->buff != wb->init_buff) free(wb->buff); - return printf_core::ALLOCATION_ERROR; + return ALLOCATION_ERROR; } if (isBuffOnStack) inline_memcpy(new_buff, wb->buff, wb->buff_cur); @@ -42,27 +43,28 @@ LIBC_INLINE int resize_overflow_hook(cpp::string_view new_str, void *target) { constexpr size_t DEFAULT_BUFFER_SIZE = 200; -LIBC_INLINE int vasprintf_internal(char **ret, const char *__restrict format, - internal::ArgList args) { +LIBC_INLINE ErrorOr vasprintf_internal(char **ret, + const char *__restrict format, + internal::ArgList args) { char init_buff_on_stack[DEFAULT_BUFFER_SIZE]; printf_core::WriteBuffer::value> wb( init_buff_on_stack, DEFAULT_BUFFER_SIZE, resize_overflow_hook); printf_core::Writer writer(wb); auto ret_val = printf_core::printf_main(&writer, format, args); - if (ret_val < 0) { + if (!ret_val.has_value()) { *ret = nullptr; - return -1; + return ret_val; } if (wb.buff == init_buff_on_stack) { - *ret = static_cast(malloc(ret_val + 1)); + *ret = static_cast(malloc(ret_val.value() + 1)); if (ret == nullptr) - return printf_core::ALLOCATION_ERROR; - inline_memcpy(*ret, wb.buff, ret_val); + return Error(ALLOCATION_ERROR); + inline_memcpy(*ret, wb.buff, ret_val.value()); } else { *ret = wb.buff; } - (*ret)[ret_val] = '\0'; + (*ret)[ret_val.value()] = '\0'; return ret_val; } } // namespace printf_core diff --git a/libc/src/stdio/printf_core/vfprintf_internal.h b/libc/src/stdio/printf_core/vfprintf_internal.h index 630de9d9d43dd..564441d3bf51a 100644 --- a/libc/src/stdio/printf_core/vfprintf_internal.h +++ b/libc/src/stdio/printf_core/vfprintf_internal.h @@ -11,6 +11,7 @@ #include "src/__support/File/file.h" #include "src/__support/arg_list.h" +#include "src/__support/error_or.h" #include "src/__support/macros/attributes.h" // For LIBC_INLINE #include "src/__support/macros/config.h" #include "src/stdio/printf_core/core_structs.h" @@ -35,8 +36,8 @@ LIBC_INLINE void funlockfile(FILE *f) { reinterpret_cast(f)->unlock(); } -LIBC_INLINE size_t fwrite_unlocked(const void *ptr, size_t size, size_t nmemb, - FILE *f) { +LIBC_INLINE FileIOResult fwrite_unlocked(const void *ptr, size_t size, + size_t nmemb, FILE *f) { return reinterpret_cast(f)->write_unlocked( ptr, size * nmemb); } @@ -47,9 +48,11 @@ LIBC_INLINE void flockfile(::FILE *f) { ::flockfile(f); } LIBC_INLINE void funlockfile(::FILE *f) { ::funlockfile(f); } -LIBC_INLINE size_t fwrite_unlocked(const void *ptr, size_t size, size_t nmemb, - ::FILE *f) { - return ::fwrite_unlocked(ptr, size, nmemb, f); +LIBC_INLINE FileIOResult fwrite_unlocked(const void *ptr, size_t size, + size_t nmemb, ::FILE *f) { + // Need to use system errno in this case, as system write will set this errno + // which we need to propagate back into our code. + return {::fwrite_unlocked(ptr, size, nmemb, f), errno}; } #endif // LIBC_COPT_STDIO_USE_SYSTEM_FILE } // namespace internal @@ -60,26 +63,38 @@ LIBC_INLINE int file_write_hook(cpp::string_view new_str, void *fp) { ::FILE *target_file = reinterpret_cast<::FILE *>(fp); // Write new_str to the target file. The logic preventing a zero-length write // is in the writer, so we don't check here. - size_t written = internal::fwrite_unlocked(new_str.data(), sizeof(char), - new_str.size(), target_file); - if (written != new_str.size() || internal::ferror_unlocked(target_file)) + auto write_result = internal::fwrite_unlocked(new_str.data(), sizeof(char), + new_str.size(), target_file); + // Propagate actual system error in FileIOResult. + if (write_result.has_error()) + return -write_result.error; + + // In case short write occured or error was not set on FileIOResult for some + // reason. + if (write_result.value != new_str.size() || + internal::ferror_unlocked(target_file)) return FILE_WRITE_ERROR; + return WRITE_OK; } -LIBC_INLINE int vfprintf_internal(::FILE *__restrict stream, - const char *__restrict format, - internal::ArgList &args) { +LIBC_INLINE ErrorOr vfprintf_internal(::FILE *__restrict stream, + const char *__restrict format, + internal::ArgList &args) { constexpr size_t BUFF_SIZE = 1024; char buffer[BUFF_SIZE]; printf_core::WriteBuffer::value> wb( buffer, BUFF_SIZE, &file_write_hook, reinterpret_cast(stream)); Writer writer(wb); internal::flockfile(stream); - int retval = printf_main(&writer, format, args); + auto retval = printf_main(&writer, format, args); + if (!retval.has_value()) { + internal::funlockfile(stream); + return retval; + } int flushval = wb.overflow_write(""); if (flushval != WRITE_OK) - retval = flushval; + retval = Error(-flushval); internal::funlockfile(stream); return retval; } diff --git a/libc/src/stdio/printf_core/write_int_converter.h b/libc/src/stdio/printf_core/write_int_converter.h index efcff278bd284..04b2bef05bc7b 100644 --- a/libc/src/stdio/printf_core/write_int_converter.h +++ b/libc/src/stdio/printf_core/write_int_converter.h @@ -29,11 +29,11 @@ LIBC_INLINE int convert_write_int(Writer *writer, return NULLPTR_WRITE_ERROR; #endif // LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS - int written = writer->get_chars_written(); + size_t written = writer->get_chars_written(); switch (to_conv.length_modifier) { case LengthModifier::none: - *reinterpret_cast(to_conv.conv_val_ptr) = written; + *reinterpret_cast(to_conv.conv_val_ptr) = static_cast(written); break; case LengthModifier::l: *reinterpret_cast(to_conv.conv_val_ptr) = written; diff --git a/libc/src/stdio/printf_core/writer.h b/libc/src/stdio/printf_core/writer.h index 1d4734a51b9b8..9de108ece510f 100644 --- a/libc/src/stdio/printf_core/writer.h +++ b/libc/src/stdio/printf_core/writer.h @@ -127,7 +127,7 @@ template struct WriteBuffer { template class Writer final { WriteBuffer &wb; - int chars_written = 0; + size_t chars_written = 0; LIBC_INLINE int pad(char new_char, size_t length) { // First, fill as much of the buffer as possible with the padding char. @@ -161,7 +161,7 @@ template class Writer final { // Takes a string, copies it into the buffer if there is space, else passes it // to the overflow mechanism to be handled separately. LIBC_INLINE int write(cpp::string_view new_string) { - chars_written += static_cast(new_string.size()); + chars_written += new_string.size(); if (LIBC_LIKELY(wb.buff_cur + new_string.size() <= wb.buff_len)) { inline_memcpy(wb.buff + wb.buff_cur, new_string.data(), new_string.size()); @@ -175,7 +175,7 @@ template class Writer final { // if there is space, else calls pad which will loop and call the overflow // mechanism on a secondary buffer. LIBC_INLINE int write(char new_char, size_t length) { - chars_written += static_cast(length); + chars_written += length; if (LIBC_LIKELY(wb.buff_cur + length <= wb.buff_len)) { inline_memset(wb.buff + wb.buff_cur, static_cast(new_char), @@ -199,7 +199,7 @@ template class Writer final { return wb.overflow_write(char_string_view); } - LIBC_INLINE int get_chars_written() { return chars_written; } + LIBC_INLINE size_t get_chars_written() { return chars_written; } }; // Class-template auto deduction helpers. diff --git a/libc/src/stdio/snprintf.cpp b/libc/src/stdio/snprintf.cpp index c8940862f711f..d95195f6f485f 100644 --- a/libc/src/stdio/snprintf.cpp +++ b/libc/src/stdio/snprintf.cpp @@ -8,8 +8,12 @@ #include "src/stdio/snprintf.h" +#include "src/__support/CPP/limits.h" #include "src/__support/arg_list.h" +#include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" +#include "src/stdio/printf_core/core_structs.h" +#include "src/stdio/printf_core/error_mapper.h" #include "src/stdio/printf_core/printf_main.h" #include "src/stdio/printf_core/writer.h" @@ -32,10 +36,21 @@ LLVM_LIBC_FUNCTION(int, snprintf, wb(buffer, (buffsz > 0 ? buffsz - 1 : 0)); printf_core::Writer writer(wb); - int ret_val = printf_core::printf_main(&writer, format, args); + auto ret_val = printf_core::printf_main(&writer, format, args); + if (!ret_val.has_value()) { + libc_errno = printf_core::internal_error_to_errno(ret_val.error()); + return -1; + } if (buffsz > 0) // if the buffsz is 0 the buffer may be a null pointer. wb.buff[wb.buff_cur] = '\0'; - return ret_val; + + if (ret_val.value() > static_cast(cpp::numeric_limits::max())) { + libc_errno = + printf_core::internal_error_to_errno(-printf_core::OVERFLOW_ERROR); + return -1; + } + + return static_cast(ret_val.value()); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/sprintf.cpp b/libc/src/stdio/sprintf.cpp index 7be97d3591aaf..2a9b6ea7c5e50 100644 --- a/libc/src/stdio/sprintf.cpp +++ b/libc/src/stdio/sprintf.cpp @@ -10,7 +10,10 @@ #include "src/__support/CPP/limits.h" #include "src/__support/arg_list.h" +#include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" +#include "src/stdio/printf_core/core_structs.h" +#include "src/stdio/printf_core/error_mapper.h" #include "src/stdio/printf_core/printf_main.h" #include "src/stdio/printf_core/writer.h" @@ -33,9 +36,20 @@ LLVM_LIBC_FUNCTION(int, sprintf, wb(buffer, cpp::numeric_limits::max()); printf_core::Writer writer(wb); - int ret_val = printf_core::printf_main(&writer, format, args); + auto ret_val = printf_core::printf_main(&writer, format, args); + if (!ret_val.has_value()) { + libc_errno = printf_core::internal_error_to_errno(ret_val.error()); + return -1; + } wb.buff[wb.buff_cur] = '\0'; - return ret_val; + + if (ret_val.value() > static_cast(cpp::numeric_limits::max())) { + libc_errno = + printf_core::internal_error_to_errno(-printf_core::OVERFLOW_ERROR); + return -1; + } + + return static_cast(ret_val.value()); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/vasprintf.cpp b/libc/src/stdio/vasprintf.cpp index 4a44d4a0f8842..bd77cd8864312 100644 --- a/libc/src/stdio/vasprintf.cpp +++ b/libc/src/stdio/vasprintf.cpp @@ -7,7 +7,11 @@ //===----------------------------------------------------------------------===// #include "src/stdio/vasprintf.h" +#include "src/__support/CPP/limits.h" #include "src/__support/arg_list.h" +#include "src/__support/libc_errno.h" +#include "src/stdio/printf_core/core_structs.h" +#include "src/stdio/printf_core/error_mapper.h" #include "src/stdio/printf_core/vasprintf_internal.h" namespace LIBC_NAMESPACE_DECL { @@ -18,7 +22,17 @@ LLVM_LIBC_FUNCTION(int, vasprintf, internal::ArgList args(vlist); // This holder class allows for easier copying // and pointer semantics, as well as handling // destruction automatically. - return printf_core::vasprintf_internal(ret, format, args); + auto ret_val = printf_core::vasprintf_internal(ret, format, args); + if (!ret_val.has_value()) { + libc_errno = printf_core::internal_error_to_errno(ret_val.error()); + return -1; + } + if (ret_val.value() > static_cast(cpp::numeric_limits::max())) { + libc_errno = + printf_core::internal_error_to_errno(-printf_core::OVERFLOW_ERROR); + return -1; + } + return static_cast(ret_val.value()); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/vsnprintf.cpp b/libc/src/stdio/vsnprintf.cpp index b07a2499a0dd3..5d936360c0857 100644 --- a/libc/src/stdio/vsnprintf.cpp +++ b/libc/src/stdio/vsnprintf.cpp @@ -8,8 +8,12 @@ #include "src/stdio/vsnprintf.h" +#include "src/__support/CPP/limits.h" #include "src/__support/arg_list.h" +#include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" +#include "src/stdio/printf_core/core_structs.h" +#include "src/stdio/printf_core/error_mapper.h" #include "src/stdio/printf_core/printf_main.h" #include "src/stdio/printf_core/writer.h" @@ -29,10 +33,21 @@ LLVM_LIBC_FUNCTION(int, vsnprintf, wb(buffer, (buffsz > 0 ? buffsz - 1 : 0)); printf_core::Writer writer(wb); - int ret_val = printf_core::printf_main(&writer, format, args); + auto ret_val = printf_core::printf_main(&writer, format, args); + if (!ret_val.has_value()) { + libc_errno = printf_core::internal_error_to_errno(ret_val.error()); + return -1; + } if (buffsz > 0) // if the buffsz is 0 the buffer may be a null pointer. wb.buff[wb.buff_cur] = '\0'; - return ret_val; + + if (ret_val.value() > static_cast(cpp::numeric_limits::max())) { + libc_errno = + printf_core::internal_error_to_errno(-printf_core::OVERFLOW_ERROR); + return -1; + } + + return static_cast(ret_val.value()); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/vsprintf.cpp b/libc/src/stdio/vsprintf.cpp index 26d497be42125..f9cf8118534f6 100644 --- a/libc/src/stdio/vsprintf.cpp +++ b/libc/src/stdio/vsprintf.cpp @@ -10,7 +10,10 @@ #include "src/__support/CPP/limits.h" #include "src/__support/arg_list.h" +#include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" +#include "src/stdio/printf_core/core_structs.h" +#include "src/stdio/printf_core/error_mapper.h" #include "src/stdio/printf_core/printf_main.h" #include "src/stdio/printf_core/writer.h" @@ -30,9 +33,19 @@ LLVM_LIBC_FUNCTION(int, vsprintf, wb(buffer, cpp::numeric_limits::max()); printf_core::Writer writer(wb); - int ret_val = printf_core::printf_main(&writer, format, args); + auto ret_val = printf_core::printf_main(&writer, format, args); + if (!ret_val.has_value()) { + libc_errno = printf_core::internal_error_to_errno(ret_val.error()); + return -1; + } wb.buff[wb.buff_cur] = '\0'; - return ret_val; + + if (ret_val.value() > static_cast(cpp::numeric_limits::max())) { + libc_errno = + printf_core::internal_error_to_errno(-printf_core::OVERFLOW_ERROR); + return -1; + } + return static_cast(ret_val.value()); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt index c464f82dcbda7..1ccdcc8bec148 100644 --- a/libc/src/stdlib/CMakeLists.txt +++ b/libc/src/stdlib/CMakeLists.txt @@ -73,6 +73,8 @@ add_entrypoint_object( strfromf.h DEPENDS .str_from_util + libc.src.__support.CPP.limits + libc.src.stdio.printf_core.error_mapper ) add_entrypoint_object( @@ -83,6 +85,8 @@ add_entrypoint_object( strfromd.h DEPENDS .str_from_util + libc.src.__support.CPP.limits + libc.src.stdio.printf_core.error_mapper ) add_entrypoint_object( @@ -93,6 +97,8 @@ add_entrypoint_object( strfroml.h DEPENDS .str_from_util + libc.src.__support.CPP.limits + libc.src.stdio.printf_core.error_mapper ) add_header_library( diff --git a/libc/src/stdlib/strfromd.cpp b/libc/src/stdlib/strfromd.cpp index f51e6d4c7f1df..71e257f08645b 100644 --- a/libc/src/stdlib/strfromd.cpp +++ b/libc/src/stdlib/strfromd.cpp @@ -7,7 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/stdlib/strfromd.h" +#include "src/__support/CPP/limits.h" #include "src/__support/macros/config.h" +#include "src/stdio/printf_core/core_structs.h" +#include "src/stdio/printf_core/error_mapper.h" #include "src/stdlib/str_from_util.h" namespace LIBC_NAMESPACE_DECL { @@ -36,7 +39,13 @@ LLVM_LIBC_FUNCTION(int, strfromd, if (n > 0) wb.buff[wb.buff_cur] = '\0'; - return writer.get_chars_written(); + if (writer.get_chars_written() > + static_cast(cpp::numeric_limits::max())) { + libc_errno = + printf_core::internal_error_to_errno(-printf_core::OVERFLOW_ERROR); + return -1; + } + return static_cast(writer.get_chars_written()); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdlib/strfromf.cpp b/libc/src/stdlib/strfromf.cpp index 14dbfdb25bab6..65f242b200f18 100644 --- a/libc/src/stdlib/strfromf.cpp +++ b/libc/src/stdlib/strfromf.cpp @@ -7,7 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/stdlib/strfromf.h" +#include "src/__support/CPP/limits.h" #include "src/__support/macros/config.h" +#include "src/stdio/printf_core/core_structs.h" +#include "src/stdio/printf_core/error_mapper.h" #include "src/stdlib/str_from_util.h" namespace LIBC_NAMESPACE_DECL { @@ -36,7 +39,13 @@ LLVM_LIBC_FUNCTION(int, strfromf, if (n > 0) wb.buff[wb.buff_cur] = '\0'; - return writer.get_chars_written(); + if (writer.get_chars_written() > + static_cast(cpp::numeric_limits::max())) { + libc_errno = + printf_core::internal_error_to_errno(-printf_core::OVERFLOW_ERROR); + return -1; + } + return static_cast(writer.get_chars_written()); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdlib/strfroml.cpp b/libc/src/stdlib/strfroml.cpp index 12f22a8a2fb65..31668a0323c93 100644 --- a/libc/src/stdlib/strfroml.cpp +++ b/libc/src/stdlib/strfroml.cpp @@ -7,7 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/stdlib/strfroml.h" +#include "src/__support/CPP/limits.h" #include "src/__support/macros/config.h" +#include "src/stdio/printf_core/core_structs.h" +#include "src/stdio/printf_core/error_mapper.h" #include "src/stdlib/str_from_util.h" namespace LIBC_NAMESPACE_DECL { @@ -41,7 +44,13 @@ LLVM_LIBC_FUNCTION(int, strfroml, if (n > 0) wb.buff[wb.buff_cur] = '\0'; - return writer.get_chars_written(); + if (writer.get_chars_written() > + static_cast(cpp::numeric_limits::max())) { + libc_errno = + printf_core::internal_error_to_errno(-printf_core::OVERFLOW_ERROR); + return -1; + } + return static_cast(writer.get_chars_written()); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/time/strftime_core/strftime_main.h b/libc/src/time/strftime_core/strftime_main.h index c7e590627094a..2b136d83234cd 100644 --- a/libc/src/time/strftime_core/strftime_main.h +++ b/libc/src/time/strftime_core/strftime_main.h @@ -36,7 +36,8 @@ int strftime_main(printf_core::Writer *writer, return result; } - return writer->get_chars_written(); + // TODO: Use ErrorOr + return static_cast(writer->get_chars_written()); } } // namespace strftime_core diff --git a/libc/test/src/stdio/CMakeLists.txt b/libc/test/src/stdio/CMakeLists.txt index eec108bc12ca5..a39428fb8d16c 100644 --- a/libc/test/src/stdio/CMakeLists.txt +++ b/libc/test/src/stdio/CMakeLists.txt @@ -186,6 +186,9 @@ add_libc_test( fprintf_test.cpp DEPENDS libc.src.stdio.fprintf + libc.test.UnitTest.ErrnoCheckingTest + libc.test.UnitTest.ErrnoSetterMatcher + libc.src.__support.macros.properties.architectures ${fprintf_test_deps} COMPILE_OPTIONS ${use_system_file} diff --git a/libc/test/src/stdio/fprintf_test.cpp b/libc/test/src/stdio/fprintf_test.cpp index 6799323cc6ad9..b035b6d9bd45d 100644 --- a/libc/test/src/stdio/fprintf_test.cpp +++ b/libc/test/src/stdio/fprintf_test.cpp @@ -15,6 +15,10 @@ #include "src/stdio/fprintf.h" +#include "src/__support/CPP/limits.h" +#include "src/__support/macros/properties/architectures.h" +#include "test/UnitTest/ErrnoCheckingTest.h" +#include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" namespace printf_test { @@ -31,6 +35,8 @@ using ::fread; #endif // LIBC_COPT_STDIO_USE_SYSTEM_FILE } // namespace printf_test +using LlvmLibcFPrintfTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest; + TEST(LlvmLibcFPrintfTest, WriteToFile) { const char *FILENAME = APPEND_LIBC_TEST("fprintf_output.test"); auto FILE_PATH = libc_make_test_file_path(FILENAME); @@ -78,6 +84,26 @@ TEST(LlvmLibcFPrintfTest, WriteToFile) { written = LIBC_NAMESPACE::fprintf(file, "Writing to a read only file should fail."); EXPECT_LT(written, 0); + ASSERT_ERRNO_FAILURE(); + + ASSERT_EQ(printf_test::fclose(file), 0); +} + +#if !defined(LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS) && \ + !defined(LIBC_COPT_PRINTF_DISABLE_WRITE_INT) && \ + !defined(LIBC_TARGET_ARCH_IS_GPU) +TEST(LlvmLibcFPrintfTest, NullPtrCheck) { + const char *FILENAME = APPEND_LIBC_TEST("fprintf_nullptr.test"); + auto FILE_PATH = libc_make_test_file_path(FILENAME); + + ::FILE *file = printf_test::fopen(FILE_PATH, "w"); + ASSERT_FALSE(file == nullptr); + + int ret = + LIBC_NAMESPACE::fprintf(file, "hello %n", static_cast(nullptr)); + EXPECT_LT(ret, 0); + ASSERT_ERRNO_FAILURE(); ASSERT_EQ(printf_test::fclose(file), 0); } +#endif // LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS diff --git a/libc/test/src/stdio/printf_core/converter_test.cpp b/libc/test/src/stdio/printf_core/converter_test.cpp index bf088937e4104..2dae2a22c864c 100644 --- a/libc/test/src/stdio/printf_core/converter_test.cpp +++ b/libc/test/src/stdio/printf_core/converter_test.cpp @@ -38,7 +38,7 @@ TEST_F(LlvmLibcPrintfConverterTest, SimpleRawConversion) { wb.buff[wb.buff_cur] = '\0'; ASSERT_STREQ(str, "abc"); - ASSERT_EQ(writer.get_chars_written(), 3); + ASSERT_EQ(writer.get_chars_written(), size_t{3}); } TEST_F(LlvmLibcPrintfConverterTest, PercentConversion) { @@ -52,7 +52,7 @@ TEST_F(LlvmLibcPrintfConverterTest, PercentConversion) { wb.buff[wb.buff_cur] = '\0'; ASSERT_STREQ(str, "%"); - ASSERT_EQ(writer.get_chars_written(), 1); + ASSERT_EQ(writer.get_chars_written(), size_t{1}); } TEST_F(LlvmLibcPrintfConverterTest, CharConversionSimple) { @@ -70,7 +70,7 @@ TEST_F(LlvmLibcPrintfConverterTest, CharConversionSimple) { wb.buff[wb.buff_cur] = '\0'; ASSERT_STREQ(str, "D"); - ASSERT_EQ(writer.get_chars_written(), 1); + ASSERT_EQ(writer.get_chars_written(), size_t{1}); } TEST_F(LlvmLibcPrintfConverterTest, CharConversionRightJustified) { @@ -85,7 +85,7 @@ TEST_F(LlvmLibcPrintfConverterTest, CharConversionRightJustified) { wb.buff[wb.buff_cur] = '\0'; ASSERT_STREQ(str, " E"); - ASSERT_EQ(writer.get_chars_written(), 4); + ASSERT_EQ(writer.get_chars_written(), size_t{4}); } TEST_F(LlvmLibcPrintfConverterTest, CharConversionLeftJustified) { @@ -102,7 +102,7 @@ TEST_F(LlvmLibcPrintfConverterTest, CharConversionLeftJustified) { wb.buff[wb.buff_cur] = '\0'; ASSERT_STREQ(str, "F "); - ASSERT_EQ(writer.get_chars_written(), 4); + ASSERT_EQ(writer.get_chars_written(), size_t{4}); } TEST_F(LlvmLibcPrintfConverterTest, StringConversionSimple) { @@ -118,7 +118,7 @@ TEST_F(LlvmLibcPrintfConverterTest, StringConversionSimple) { wb.buff[wb.buff_cur] = '\0'; ASSERT_STREQ(str, "DEF"); - ASSERT_EQ(writer.get_chars_written(), 3); + ASSERT_EQ(writer.get_chars_written(), size_t{3}); } TEST_F(LlvmLibcPrintfConverterTest, StringConversionPrecisionHigh) { @@ -133,7 +133,7 @@ TEST_F(LlvmLibcPrintfConverterTest, StringConversionPrecisionHigh) { wb.buff[wb.buff_cur] = '\0'; ASSERT_STREQ(str, "456"); - ASSERT_EQ(writer.get_chars_written(), 3); + ASSERT_EQ(writer.get_chars_written(), size_t{3}); } TEST_F(LlvmLibcPrintfConverterTest, StringConversionPrecisionLow) { @@ -148,7 +148,7 @@ TEST_F(LlvmLibcPrintfConverterTest, StringConversionPrecisionLow) { wb.buff[wb.buff_cur] = '\0'; ASSERT_STREQ(str, "xy"); - ASSERT_EQ(writer.get_chars_written(), 2); + ASSERT_EQ(writer.get_chars_written(), size_t{2}); } TEST_F(LlvmLibcPrintfConverterTest, StringConversionRightJustified) { @@ -163,7 +163,7 @@ TEST_F(LlvmLibcPrintfConverterTest, StringConversionRightJustified) { wb.buff[wb.buff_cur] = '\0'; ASSERT_STREQ(str, " 789"); - ASSERT_EQ(writer.get_chars_written(), 4); + ASSERT_EQ(writer.get_chars_written(), size_t{4}); } TEST_F(LlvmLibcPrintfConverterTest, StringConversionLeftJustified) { @@ -180,7 +180,7 @@ TEST_F(LlvmLibcPrintfConverterTest, StringConversionLeftJustified) { wb.buff[wb.buff_cur] = '\0'; ASSERT_STREQ(str, "ghi "); - ASSERT_EQ(writer.get_chars_written(), 4); + ASSERT_EQ(writer.get_chars_written(), size_t{4}); } TEST_F(LlvmLibcPrintfConverterTest, IntConversionSimple) { @@ -194,7 +194,7 @@ TEST_F(LlvmLibcPrintfConverterTest, IntConversionSimple) { wb.buff[wb.buff_cur] = '\0'; ASSERT_STREQ(str, "12345"); - ASSERT_EQ(writer.get_chars_written(), 5); + ASSERT_EQ(writer.get_chars_written(), size_t{5}); } TEST_F(LlvmLibcPrintfConverterTest, HexConversion) { @@ -211,7 +211,7 @@ TEST_F(LlvmLibcPrintfConverterTest, HexConversion) { wb.buff[wb.buff_cur] = '\0'; ASSERT_STREQ(str, "0x00000000123456ab"); - ASSERT_EQ(writer.get_chars_written(), 18); + ASSERT_EQ(writer.get_chars_written(), size_t{18}); } TEST_F(LlvmLibcPrintfConverterTest, BinaryConversion) { @@ -225,7 +225,7 @@ TEST_F(LlvmLibcPrintfConverterTest, BinaryConversion) { wb.buff[wb.buff_cur] = '\0'; ASSERT_STREQ(str, "101010"); - ASSERT_EQ(writer.get_chars_written(), 6); + ASSERT_EQ(writer.get_chars_written(), size_t{6}); } TEST_F(LlvmLibcPrintfConverterTest, PointerConversion) { @@ -239,7 +239,7 @@ TEST_F(LlvmLibcPrintfConverterTest, PointerConversion) { wb.buff[wb.buff_cur] = '\0'; ASSERT_STREQ(str, "0x123456ab"); - ASSERT_EQ(writer.get_chars_written(), 10); + ASSERT_EQ(writer.get_chars_written(), size_t{10}); } TEST_F(LlvmLibcPrintfConverterTest, OctConversion) { @@ -253,5 +253,5 @@ TEST_F(LlvmLibcPrintfConverterTest, OctConversion) { wb.buff[wb.buff_cur] = '\0'; ASSERT_STREQ(str, "1234"); - ASSERT_EQ(writer.get_chars_written(), 4); + ASSERT_EQ(writer.get_chars_written(), size_t{4}); } diff --git a/libc/test/src/stdio/printf_core/writer_test.cpp b/libc/test/src/stdio/printf_core/writer_test.cpp index d036341be7981..d263cf55aa474 100644 --- a/libc/test/src/stdio/printf_core/writer_test.cpp +++ b/libc/test/src/stdio/printf_core/writer_test.cpp @@ -39,7 +39,7 @@ TEST(LlvmLibcPrintfWriterTest, Write) { wb.buff[wb.buff_cur] = '\0'; ASSERT_STREQ("abc", str); - ASSERT_EQ(writer.get_chars_written(), 3); + ASSERT_EQ(writer.get_chars_written(), size_t{3}); } TEST(LlvmLibcPrintfWriterTest, WriteMultipleTimes) { @@ -53,7 +53,7 @@ TEST(LlvmLibcPrintfWriterTest, WriteMultipleTimes) { wb.buff[wb.buff_cur] = '\0'; ASSERT_STREQ("abcDEF123", str); - ASSERT_EQ(writer.get_chars_written(), 9); + ASSERT_EQ(writer.get_chars_written(), size_t{9}); } TEST(LlvmLibcPrintfWriterTest, WriteChars) { @@ -66,7 +66,7 @@ TEST(LlvmLibcPrintfWriterTest, WriteChars) { wb.buff[wb.buff_cur] = '\0'; ASSERT_STREQ("aaa", str); - ASSERT_EQ(writer.get_chars_written(), 3); + ASSERT_EQ(writer.get_chars_written(), size_t{3}); } TEST(LlvmLibcPrintfWriterTest, WriteCharsMultipleTimes) { @@ -80,7 +80,7 @@ TEST(LlvmLibcPrintfWriterTest, WriteCharsMultipleTimes) { wb.buff[wb.buff_cur] = '\0'; ASSERT_STREQ("aaaDDD111", str); - ASSERT_EQ(writer.get_chars_written(), 9); + ASSERT_EQ(writer.get_chars_written(), size_t{9}); } TEST(LlvmLibcPrintfWriterTest, WriteManyChars) { @@ -102,7 +102,7 @@ TEST(LlvmLibcPrintfWriterTest, WriteManyChars) { "ZZZZZZZZZZ" "ZZZZZZZZZ", str); - ASSERT_EQ(writer.get_chars_written(), 99); + ASSERT_EQ(writer.get_chars_written(), size_t{99}); } TEST(LlvmLibcPrintfWriterTest, MixedWrites) { @@ -117,7 +117,7 @@ TEST(LlvmLibcPrintfWriterTest, MixedWrites) { wb.buff[wb.buff_cur] = '\0'; ASSERT_STREQ("aaaDEF111456", str); - ASSERT_EQ(writer.get_chars_written(), 12); + ASSERT_EQ(writer.get_chars_written(), size_t{12}); } TEST(LlvmLibcPrintfWriterTest, WriteWithMaxLength) { @@ -129,7 +129,7 @@ TEST(LlvmLibcPrintfWriterTest, WriteWithMaxLength) { wb.buff[wb.buff_cur] = '\0'; ASSERT_STREQ("abcDEF1234", str); - ASSERT_EQ(writer.get_chars_written(), 12); + ASSERT_EQ(writer.get_chars_written(), size_t{12}); } TEST(LlvmLibcPrintfWriterTest, WriteCharsWithMaxLength) { @@ -141,7 +141,7 @@ TEST(LlvmLibcPrintfWriterTest, WriteCharsWithMaxLength) { wb.buff[wb.buff_cur] = '\0'; ASSERT_STREQ("1111111111", str); - ASSERT_EQ(writer.get_chars_written(), 15); + ASSERT_EQ(writer.get_chars_written(), size_t{15}); } TEST(LlvmLibcPrintfWriterTest, MixedWriteWithMaxLength) { @@ -157,7 +157,7 @@ TEST(LlvmLibcPrintfWriterTest, MixedWriteWithMaxLength) { wb.buff[wb.buff_cur] = '\0'; ASSERT_STREQ("aaaDEF1114", str); - ASSERT_EQ(writer.get_chars_written(), 12); + ASSERT_EQ(writer.get_chars_written(), size_t{12}); } TEST(LlvmLibcPrintfWriterTest, StringWithMaxLengthOne) { @@ -175,7 +175,7 @@ TEST(LlvmLibcPrintfWriterTest, StringWithMaxLengthOne) { wb.buff[wb.buff_cur] = '\0'; ASSERT_STREQ("", str); - ASSERT_EQ(writer.get_chars_written(), 12); + ASSERT_EQ(writer.get_chars_written(), size_t{12}); } TEST(LlvmLibcPrintfWriterTest, NullStringWithZeroMaxLength) { @@ -187,7 +187,7 @@ TEST(LlvmLibcPrintfWriterTest, NullStringWithZeroMaxLength) { writer.write('1', 3); writer.write({"456", 3}); - ASSERT_EQ(writer.get_chars_written(), 12); + ASSERT_EQ(writer.get_chars_written(), size_t{12}); } struct OutBuff { @@ -226,7 +226,7 @@ TEST(LlvmLibcPrintfWriterTest, WriteWithMaxLengthWithCallback) { str[out_buff.cur_pos] = '\0'; ASSERT_STREQ("abcDEF123456", str); - ASSERT_EQ(writer.get_chars_written(), 12); + ASSERT_EQ(writer.get_chars_written(), size_t{12}); } TEST(LlvmLibcPrintfWriterTest, WriteCharsWithMaxLengthWithCallback) { @@ -246,7 +246,7 @@ TEST(LlvmLibcPrintfWriterTest, WriteCharsWithMaxLengthWithCallback) { str[out_buff.cur_pos] = '\0'; ASSERT_STREQ("111111111111111", str); - ASSERT_EQ(writer.get_chars_written(), 15); + ASSERT_EQ(writer.get_chars_written(), size_t{15}); } TEST(LlvmLibcPrintfWriterTest, MixedWriteWithMaxLengthWithCallback) { @@ -269,7 +269,7 @@ TEST(LlvmLibcPrintfWriterTest, MixedWriteWithMaxLengthWithCallback) { str[out_buff.cur_pos] = '\0'; ASSERT_STREQ("aaaDEF111456", str); - ASSERT_EQ(writer.get_chars_written(), 12); + ASSERT_EQ(writer.get_chars_written(), size_t{12}); } TEST(LlvmLibcPrintfWriterTest, ZeroLengthBufferWithCallback) { @@ -292,7 +292,7 @@ TEST(LlvmLibcPrintfWriterTest, ZeroLengthBufferWithCallback) { str[out_buff.cur_pos] = '\0'; ASSERT_STREQ("aaaDEF111456", str); - ASSERT_EQ(writer.get_chars_written(), 12); + ASSERT_EQ(writer.get_chars_written(), size_t{12}); } TEST(LlvmLibcPrintfWriterTest, NullStringWithZeroMaxLengthWithCallback) { @@ -312,7 +312,7 @@ TEST(LlvmLibcPrintfWriterTest, NullStringWithZeroMaxLengthWithCallback) { wb.overflow_write(""); str[out_buff.cur_pos] = '\0'; - ASSERT_EQ(writer.get_chars_written(), 12); + ASSERT_EQ(writer.get_chars_written(), size_t{12}); ASSERT_STREQ("aaaDEF111456", str); } diff --git a/libc/test/src/stdio/snprintf_test.cpp b/libc/test/src/stdio/snprintf_test.cpp index baaa664cdc9ee..95507e0885dbf 100644 --- a/libc/test/src/stdio/snprintf_test.cpp +++ b/libc/test/src/stdio/snprintf_test.cpp @@ -8,8 +8,12 @@ #include "src/stdio/snprintf.h" +#include "test/UnitTest/ErrnoCheckingTest.h" +#include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" +using LlvmLibcSNPrintfTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest; + // The sprintf test cases cover testing the shared printf functionality, so // these tests will focus on snprintf exclusive features. @@ -59,3 +63,14 @@ TEST(LlvmLibcSNPrintfTest, NoCutOff) { EXPECT_EQ(written, 10); ASSERT_STREQ(buff, "1234567890"); } + +TEST(LlvmLibcSNPrintfTest, CharsWrittenOverflow) { + char buff[0]; + + // Trigger an overflow in the return value of snprintf by writing more than + // INT_MAX bytes. + int int_max = LIBC_NAMESPACE::cpp::numeric_limits::max(); + int written = LIBC_NAMESPACE::snprintf(buff, 0, "%*stest", int_max, ""); + EXPECT_LT(written, 0); + ASSERT_ERRNO_FAILURE(); +} diff --git a/libc/test/src/stdio/vfprintf_test.cpp b/libc/test/src/stdio/vfprintf_test.cpp index f50565a0f68ca..0e003f5de5bee 100644 --- a/libc/test/src/stdio/vfprintf_test.cpp +++ b/libc/test/src/stdio/vfprintf_test.cpp @@ -19,6 +19,8 @@ #include "src/stdio/vfprintf.h" +#include "test/UnitTest/ErrnoCheckingTest.h" +#include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" namespace printf_test { @@ -44,6 +46,8 @@ int call_vfprintf(::FILE *__restrict stream, const char *__restrict format, return ret; } +using LlvmLibcVFPrintfTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest; + TEST(LlvmLibcVFPrintfTest, WriteToFile) { const char *FILENAME = APPEND_LIBC_TEST("vfprintf_output.test"); auto FILE_PATH = libc_make_test_file_path(FILENAME); @@ -90,6 +94,7 @@ TEST(LlvmLibcVFPrintfTest, WriteToFile) { written = call_vfprintf(file, "Writing to a read only file should fail."); EXPECT_LT(written, 0); + ASSERT_ERRNO_FAILURE(); ASSERT_EQ(printf_test::fclose(file), 0); } diff --git a/libc/test/src/stdlib/StrfromTest.h b/libc/test/src/stdlib/StrfromTest.h index e82c94499aa11..fd2e0f120e90e 100644 --- a/libc/test/src/stdlib/StrfromTest.h +++ b/libc/test/src/stdlib/StrfromTest.h @@ -8,6 +8,8 @@ #include "src/__support/CPP/type_traits.h" #include "src/__support/FPUtil/FPBits.h" +#include "test/UnitTest/ErrnoCheckingTest.h" +#include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" #define ASSERT_STREQ_LEN(actual_written, actual_str, expected_str) \ @@ -15,7 +17,7 @@ EXPECT_STREQ(actual_str, expected_str); template -class StrfromTest : public LIBC_NAMESPACE::testing::Test { +class StrfromTest : public LIBC_NAMESPACE::testing::ErrnoCheckingTest { static constexpr bool is_single_prec = LIBC_NAMESPACE::cpp::is_same::value; @@ -481,6 +483,16 @@ class StrfromTest : public LIBC_NAMESPACE::testing::Test { written = func(buff, 10, "%A", -ld_nan); ASSERT_STREQ_LEN(written, buff, "-NAN"); } + + void charsWrittenOverflow(FunctionT func) { + char buff[100]; + // Trigger an overflow in the return value of strfrom by writing more than + // INT_MAX bytes. + int result = func(buff, sizeof(buff), "%.2147483647f", 1.0f); + + EXPECT_LT(result, 0); + ASSERT_ERRNO_FAILURE(); + } }; #define STRFROM_TEST(InputType, name, func) \ @@ -501,4 +513,7 @@ class StrfromTest : public LIBC_NAMESPACE::testing::Test { TEST_F(LlvmLibc##name##Test, InsufficientBufferSize) { \ insufficentBufsize(func); \ } \ - TEST_F(LlvmLibc##name##Test, InfAndNanValues) { infNanValues(func); } + TEST_F(LlvmLibc##name##Test, InfAndNanValues) { infNanValues(func); } \ + TEST_F(LlvmLibc##name##Test, CharsWrittenOverflow) { \ + charsWrittenOverflow(func); \ + } diff --git a/lldb/bindings/interface/SBFrameListExtensions.i b/lldb/bindings/interface/SBFrameListExtensions.i new file mode 100644 index 0000000000000..1c6ac8d50a54c --- /dev/null +++ b/lldb/bindings/interface/SBFrameListExtensions.i @@ -0,0 +1,41 @@ +%extend lldb::SBFrameList { + +#ifdef SWIGPYTHON + %nothreadallow; +#endif + std::string lldb::SBFrameList::__str__ (){ + lldb::SBStream description; + if (!$self->GetDescription(description)) + return std::string(" lldb.SBFrameList()"); + const char *desc = description.GetData(); + size_t desc_len = description.GetSize(); + if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) + --desc_len; + return std::string(desc, desc_len); + } +#ifdef SWIGPYTHON + %clearnothreadallow; +#endif + +#ifdef SWIGPYTHON + %pythoncode %{ + def __iter__(self): + '''Iterate over all frames in a lldb.SBFrameList object.''' + return lldb_iter(self, 'GetSize', 'GetFrameAtIndex') + + def __len__(self): + return int(self.GetSize()) + + def __getitem__(self, key): + if type(key) is not int: + return None + if key < 0: + count = len(self) + if -count <= key < count: + key %= count + + frame = self.GetFrameAtIndex(key) + return frame if frame.IsValid() else None + %} +#endif +} diff --git a/lldb/bindings/interface/SBThreadExtensions.i b/lldb/bindings/interface/SBThreadExtensions.i index 4ec9f10b1a256..c9ae4103d7b60 100644 --- a/lldb/bindings/interface/SBThreadExtensions.i +++ b/lldb/bindings/interface/SBThreadExtensions.i @@ -41,7 +41,8 @@ STRING_EXTENSION_OUTSIDE(SBThread) def get_thread_frames(self): '''An accessor function that returns a list() that contains all frames in a lldb.SBThread object.''' frames = [] - for frame in self: + frame_list = self.GetFrames() + for frame in frame_list: frames.append(frame) return frames diff --git a/lldb/bindings/interfaces.swig b/lldb/bindings/interfaces.swig index b3d44979c916c..fddbedf02e835 100644 --- a/lldb/bindings/interfaces.swig +++ b/lldb/bindings/interfaces.swig @@ -119,6 +119,7 @@ %include "lldb/API/SBFileSpecList.h" %include "lldb/API/SBFormat.h" %include "lldb/API/SBFrame.h" +%include "lldb/API/SBFrameList.h" %include "lldb/API/SBFunction.h" %include "lldb/API/SBHostOS.h" %include "lldb/API/SBInstruction.h" @@ -193,6 +194,7 @@ %include "./interface/SBFileSpecExtensions.i" %include "./interface/SBFileSpecListExtensions.i" %include "./interface/SBFrameExtensions.i" +%include "./interface/SBFrameListExtensions.i" %include "./interface/SBFunctionExtensions.i" %include "./interface/SBInstructionExtensions.i" %include "./interface/SBInstructionListExtensions.i" diff --git a/lldb/include/lldb/API/LLDB.h b/lldb/include/lldb/API/LLDB.h index 6485f35302a1c..6ac35bb4a364b 100644 --- a/lldb/include/lldb/API/LLDB.h +++ b/lldb/include/lldb/API/LLDB.h @@ -37,6 +37,7 @@ #include "lldb/API/SBFileSpecList.h" #include "lldb/API/SBFormat.h" #include "lldb/API/SBFrame.h" +#include "lldb/API/SBFrameList.h" #include "lldb/API/SBFunction.h" #include "lldb/API/SBHostOS.h" #include "lldb/API/SBInstruction.h" diff --git a/lldb/include/lldb/API/SBDefines.h b/lldb/include/lldb/API/SBDefines.h index 85f6bbeea5bf9..5fcc685050c0b 100644 --- a/lldb/include/lldb/API/SBDefines.h +++ b/lldb/include/lldb/API/SBDefines.h @@ -76,6 +76,7 @@ class LLDB_API SBFileSpec; class LLDB_API SBFileSpecList; class LLDB_API SBFormat; class LLDB_API SBFrame; +class LLDB_API SBFrameList; class LLDB_API SBFunction; class LLDB_API SBHostOS; class LLDB_API SBInstruction; diff --git a/lldb/include/lldb/API/SBFrame.h b/lldb/include/lldb/API/SBFrame.h index 92917e57fc125..5283cdfe53faa 100644 --- a/lldb/include/lldb/API/SBFrame.h +++ b/lldb/include/lldb/API/SBFrame.h @@ -222,6 +222,7 @@ class LLDB_API SBFrame { protected: friend class SBBlock; friend class SBExecutionContext; + friend class SBFrameList; friend class SBInstruction; friend class SBThread; friend class SBValue; diff --git a/lldb/include/lldb/API/SBFrameList.h b/lldb/include/lldb/API/SBFrameList.h new file mode 100644 index 0000000000000..dba1c1de5d191 --- /dev/null +++ b/lldb/include/lldb/API/SBFrameList.h @@ -0,0 +1,82 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_API_SBFRAMELIST_H +#define LLDB_API_SBFRAMELIST_H + +#include "lldb/API/SBDefines.h" + +namespace lldb { + +/// Represents a list of SBFrame objects. +/// +/// SBFrameList provides a way to iterate over stack frames lazily, +/// materializing frames on-demand as they are accessed. This is more +/// efficient than eagerly creating all frames upfront. +class LLDB_API SBFrameList { +public: + SBFrameList(); + + SBFrameList(const lldb::SBFrameList &rhs); + + ~SBFrameList(); + + const lldb::SBFrameList &operator=(const lldb::SBFrameList &rhs); + + explicit operator bool() const; + + bool IsValid() const; + + /// Returns the number of frames in the list. + uint32_t GetSize() const; + + /// Returns the frame at the given index. + /// + /// \param[in] idx + /// The index of the frame to retrieve (0-based). + /// + /// \return + /// An SBFrame object for the frame at the specified index. + /// Returns an invalid SBFrame if idx is out of range. + lldb::SBFrame GetFrameAtIndex(uint32_t idx) const; + + /// Get the thread associated with this frame list. + /// + /// \return + /// An SBThread object representing the thread. + lldb::SBThread GetThread() const; + + /// Clear all frames from this list. + void Clear(); + + /// Get a description of this frame list. + /// + /// \param[in] description + /// The stream to write the description to. + /// + /// \return + /// True if the description was successfully written. + bool GetDescription(lldb::SBStream &description) const; + +protected: + friend class SBThread; + +private: + SBFrameList(const lldb::StackFrameListSP &frame_list_sp); + + void SetFrameList(const lldb::StackFrameListSP &frame_list_sp); + + // This needs to be a shared_ptr since an SBFrameList can be passed to + // scripting affordances like ScriptedFrameProviders but also out of + // convenience because Thread::GetStackFrameList returns a StackFrameListSP. + lldb::StackFrameListSP m_opaque_sp; +}; + +} // namespace lldb + +#endif // LLDB_API_SBFRAMELIST_H diff --git a/lldb/include/lldb/API/SBStream.h b/lldb/include/lldb/API/SBStream.h index d230da6123fb3..21f9d21e0e717 100644 --- a/lldb/include/lldb/API/SBStream.h +++ b/lldb/include/lldb/API/SBStream.h @@ -81,6 +81,7 @@ class LLDB_API SBStream { friend class SBFileSpec; friend class SBFileSpecList; friend class SBFrame; + friend class SBFrameList; friend class SBFunction; friend class SBInstruction; friend class SBInstructionList; diff --git a/lldb/include/lldb/API/SBThread.h b/lldb/include/lldb/API/SBThread.h index 2411dfd376519..f6a6d19935b83 100644 --- a/lldb/include/lldb/API/SBThread.h +++ b/lldb/include/lldb/API/SBThread.h @@ -186,6 +186,8 @@ class LLDB_API SBThread { lldb::SBFrame GetFrameAtIndex(uint32_t idx); + lldb::SBFrameList GetFrames(); + lldb::SBFrame GetSelectedFrame(); lldb::SBFrame SetSelectedFrame(uint32_t frame_idx); @@ -244,6 +246,7 @@ class LLDB_API SBThread { friend class SBSaveCoreOptions; friend class SBExecutionContext; friend class SBFrame; + friend class SBFrameList; friend class SBProcess; friend class SBDebugger; friend class SBValue; diff --git a/lldb/include/lldb/Target/StackFrameList.h b/lldb/include/lldb/Target/StackFrameList.h index ea9aab86b8ea1..5b0df0ddb3e29 100644 --- a/lldb/include/lldb/Target/StackFrameList.h +++ b/lldb/include/lldb/Target/StackFrameList.h @@ -101,6 +101,9 @@ class StackFrameList { /// Returns whether we have currently fetched all the frames of a stack. bool WereAllFramesFetched() const; + /// Get the thread associated with this frame list. + Thread &GetThread() const { return m_thread; } + protected: friend class Thread; friend class ScriptedThread; diff --git a/lldb/include/lldb/Target/Thread.h b/lldb/include/lldb/Target/Thread.h index 688c056da2633..841f80cd1b1eb 100644 --- a/lldb/include/lldb/Target/Thread.h +++ b/lldb/include/lldb/Target/Thread.h @@ -1295,6 +1295,8 @@ class Thread : public std::enable_shared_from_this, /// an empty std::optional is returned in that case. std::optional GetPreviousFrameZeroPC(); + lldb::StackFrameListSP GetStackFrameList(); + protected: friend class ThreadPlan; friend class ThreadList; @@ -1336,8 +1338,6 @@ class Thread : public std::enable_shared_from_this, return StructuredData::ObjectSP(); } - lldb::StackFrameListSP GetStackFrameList(); - void SetTemporaryResumeState(lldb::StateType new_state) { m_temporary_resume_state = new_state; } diff --git a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules index b3822db162a0b..63a35224b0435 100644 --- a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules +++ b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules @@ -334,7 +334,7 @@ endif # library to make ASAN tests work for most users, including the bots. ifeq "$(OS)" "Darwin" ifneq "$(ASAN_OPTIONS)" "" -LD_FLAGS += -Wl,-lto_library -Wl,$(shell dirname $(shell xcrun -find clang))/../lib/libLTO.dylib +LDFLAGS += -Wl,-lto_library -Wl,$(shell dirname $(shell xcrun -find clang))/../lib/libLTO.dylib endif endif OBJECTS = diff --git a/lldb/source/API/CMakeLists.txt b/lldb/source/API/CMakeLists.txt index ce59ee505cd3d..ac47580d60840 100644 --- a/lldb/source/API/CMakeLists.txt +++ b/lldb/source/API/CMakeLists.txt @@ -69,6 +69,7 @@ add_lldb_library(liblldb SHARED ${option_framework} SBFileSpecList.cpp SBFormat.cpp SBFrame.cpp + SBFrameList.cpp SBFunction.cpp SBHostOS.cpp SBInstruction.cpp diff --git a/lldb/source/API/SBFrameList.cpp b/lldb/source/API/SBFrameList.cpp new file mode 100644 index 0000000000000..d5fa955c10f70 --- /dev/null +++ b/lldb/source/API/SBFrameList.cpp @@ -0,0 +1,97 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception. +// +//===----------------------------------------------------------------------===// + +#include "lldb/API/SBFrameList.h" +#include "lldb/API/SBFrame.h" +#include "lldb/API/SBStream.h" +#include "lldb/API/SBThread.h" +#include "lldb/Target/StackFrameList.h" +#include "lldb/Target/Thread.h" +#include "lldb/Utility/Instrumentation.h" + +using namespace lldb; +using namespace lldb_private; + +SBFrameList::SBFrameList() : m_opaque_sp() { LLDB_INSTRUMENT_VA(this); } + +SBFrameList::SBFrameList(const SBFrameList &rhs) + : m_opaque_sp(rhs.m_opaque_sp) { + LLDB_INSTRUMENT_VA(this, rhs); +} + +SBFrameList::~SBFrameList() = default; + +const SBFrameList &SBFrameList::operator=(const SBFrameList &rhs) { + LLDB_INSTRUMENT_VA(this, rhs); + + if (this != &rhs) + m_opaque_sp = rhs.m_opaque_sp; + return *this; +} + +SBFrameList::SBFrameList(const lldb::StackFrameListSP &frame_list_sp) + : m_opaque_sp(frame_list_sp) {} + +void SBFrameList::SetFrameList(const lldb::StackFrameListSP &frame_list_sp) { + m_opaque_sp = frame_list_sp; +} + +SBFrameList::operator bool() const { + LLDB_INSTRUMENT_VA(this); + + return m_opaque_sp.get() != nullptr; +} + +bool SBFrameList::IsValid() const { + LLDB_INSTRUMENT_VA(this); + return this->operator bool(); +} + +uint32_t SBFrameList::GetSize() const { + LLDB_INSTRUMENT_VA(this); + + if (m_opaque_sp) + return m_opaque_sp->GetNumFrames(); + return 0; +} + +SBFrame SBFrameList::GetFrameAtIndex(uint32_t idx) const { + LLDB_INSTRUMENT_VA(this, idx); + + SBFrame sb_frame; + if (m_opaque_sp) + sb_frame.SetFrameSP(m_opaque_sp->GetFrameAtIndex(idx)); + return sb_frame; +} + +SBThread SBFrameList::GetThread() const { + LLDB_INSTRUMENT_VA(this); + + SBThread sb_thread; + if (m_opaque_sp) + sb_thread.SetThread(m_opaque_sp->GetThread().shared_from_this()); + return sb_thread; +} + +void SBFrameList::Clear() { + LLDB_INSTRUMENT_VA(this); + + if (m_opaque_sp) + m_opaque_sp->Clear(); +} + +bool SBFrameList::GetDescription(SBStream &description) const { + LLDB_INSTRUMENT_VA(this, description); + + if (!m_opaque_sp) + return false; + + Stream &strm = description.ref(); + m_opaque_sp->Dump(&strm); + return true; +} diff --git a/lldb/source/API/SBThread.cpp b/lldb/source/API/SBThread.cpp index f58a1b52afa91..f32c5c56444cd 100644 --- a/lldb/source/API/SBThread.cpp +++ b/lldb/source/API/SBThread.cpp @@ -14,6 +14,7 @@ #include "lldb/API/SBFileSpec.h" #include "lldb/API/SBFormat.h" #include "lldb/API/SBFrame.h" +#include "lldb/API/SBFrameList.h" #include "lldb/API/SBProcess.h" #include "lldb/API/SBStream.h" #include "lldb/API/SBStructuredData.h" @@ -1102,6 +1103,26 @@ SBFrame SBThread::GetFrameAtIndex(uint32_t idx) { return sb_frame; } +lldb::SBFrameList SBThread::GetFrames() { + LLDB_INSTRUMENT_VA(this); + + SBFrameList sb_frame_list; + llvm::Expected exe_ctx = + GetStoppedExecutionContext(m_opaque_sp); + if (!exe_ctx) { + LLDB_LOG_ERROR(GetLog(LLDBLog::API), exe_ctx.takeError(), "{0}"); + return SBFrameList(); + } + + if (exe_ctx->HasThreadScope()) { + StackFrameListSP frame_list_sp = + exe_ctx->GetThreadPtr()->GetStackFrameList(); + sb_frame_list.SetFrameList(frame_list_sp); + } + + return sb_frame_list; +} + lldb::SBFrame SBThread::GetSelectedFrame() { LLDB_INSTRUMENT_VA(this); diff --git a/lldb/test/API/python_api/frame_list/Makefile b/lldb/test/API/python_api/frame_list/Makefile new file mode 100644 index 0000000000000..99998b20bcb05 --- /dev/null +++ b/lldb/test/API/python_api/frame_list/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/python_api/frame_list/TestSBFrameList.py b/lldb/test/API/python_api/frame_list/TestSBFrameList.py new file mode 100644 index 0000000000000..f348ce492e547 --- /dev/null +++ b/lldb/test/API/python_api/frame_list/TestSBFrameList.py @@ -0,0 +1,194 @@ +""" +Test SBFrameList API. +""" + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class FrameListAPITestCase(TestBase): + def test_frame_list_api(self): + """Test SBThread.GetFrames() returns a valid SBFrameList.""" + self.build() + self.frame_list_api() + + def test_frame_list_iterator(self): + """Test SBFrameList iterator functionality.""" + self.build() + self.frame_list_iterator() + + def test_frame_list_indexing(self): + """Test SBFrameList indexing and length.""" + self.build() + self.frame_list_indexing() + + def test_frame_list_get_thread(self): + """Test SBFrameList.GetThread() returns correct thread.""" + self.build() + self.frame_list_get_thread() + + def setUp(self): + TestBase.setUp(self) + self.main_source = "main.cpp" + + def frame_list_api(self): + """Test SBThread.GetFrames() returns a valid SBFrameList.""" + exe = self.getBuildArtifact("a.out") + + target, process, thread, bkpt = lldbutil.run_to_source_breakpoint( + self, "Set break point at this line", lldb.SBFileSpec(self.main_source) + ) + + self.assertTrue( + thread.IsValid(), "There should be a thread stopped due to breakpoint" + ) + + # Test GetFrames() returns a valid SBFrameList + frame_list = thread.GetFrames() + self.assertTrue(frame_list.IsValid(), "Frame list should be valid") + self.assertGreater( + frame_list.GetSize(), 0, "Frame list should have at least one frame" + ) + + # Verify frame list size matches thread frame count + self.assertEqual( + frame_list.GetSize(), + thread.GetNumFrames(), + "Frame list size should match thread frame count", + ) + + # Verify frames are the same + for i in range(frame_list.GetSize()): + frame_from_list = frame_list.GetFrameAtIndex(i) + frame_from_thread = thread.GetFrameAtIndex(i) + self.assertTrue( + frame_from_list.IsValid(), f"Frame {i} from list should be valid" + ) + self.assertEqual( + frame_from_list.GetPC(), + frame_from_thread.GetPC(), + f"Frame {i} PC should match", + ) + + def frame_list_iterator(self): + """Test SBFrameList iterator functionality.""" + exe = self.getBuildArtifact("a.out") + + target, process, thread, bkpt = lldbutil.run_to_source_breakpoint( + self, "Set break point at this line", lldb.SBFileSpec(self.main_source) + ) + + self.assertTrue( + thread.IsValid(), "There should be a thread stopped due to breakpoint" + ) + + frame_list = thread.GetFrames() + + # Test iteration + frame_count = 0 + for frame in frame_list: + self.assertTrue(frame.IsValid(), "Each frame should be valid") + frame_count += 1 + + self.assertEqual( + frame_count, + frame_list.GetSize(), + "Iterator should visit all frames", + ) + + # Test that we can iterate multiple times + second_count = 0 + for frame in frame_list: + second_count += 1 + + self.assertEqual( + frame_count, second_count, "Should be able to iterate multiple times" + ) + + def frame_list_indexing(self): + """Test SBFrameList indexing and length.""" + exe = self.getBuildArtifact("a.out") + + target, process, thread, bkpt = lldbutil.run_to_source_breakpoint( + self, "Set break point at this line", lldb.SBFileSpec(self.main_source) + ) + + self.assertTrue( + thread.IsValid(), "There should be a thread stopped due to breakpoint" + ) + + frame_list = thread.GetFrames() + + # Test len() + self.assertEqual( + len(frame_list), frame_list.GetSize(), "len() should return frame count" + ) + + # Test positive indexing + first_frame = frame_list[0] + self.assertTrue(first_frame.IsValid(), "First frame should be valid") + self.assertEqual( + first_frame.GetPC(), + thread.GetFrameAtIndex(0).GetPC(), + "Indexed frame should match", + ) + + # Test negative indexing + if len(frame_list) > 0: + last_frame = frame_list[-1] + self.assertTrue(last_frame.IsValid(), "Last frame should be valid") + self.assertEqual( + last_frame.GetPC(), + thread.GetFrameAtIndex(len(frame_list) - 1).GetPC(), + "Negative indexing should work", + ) + + # Test out of bounds returns None + out_of_bounds = frame_list[10000] + self.assertIsNone(out_of_bounds, "Out of bounds index should return None") + + # Test bool conversion + self.assertTrue(bool(frame_list), "Non-empty frame list should be truthy") + + # Test Clear() + frame_list.Clear() + # Note: Clear() clears the underlying StackFrameList cache, + # but the frame list object itself should still be valid + self.assertTrue( + frame_list.IsValid(), "Frame list should still be valid after Clear()" + ) + + def frame_list_get_thread(self): + """Test SBFrameList.GetThread() returns correct thread.""" + exe = self.getBuildArtifact("a.out") + + target, process, thread, bkpt = lldbutil.run_to_source_breakpoint( + self, "Set break point at this line", lldb.SBFileSpec(self.main_source) + ) + + self.assertTrue( + thread.IsValid(), "There should be a thread stopped due to breakpoint" + ) + + frame_list = thread.GetFrames() + self.assertTrue(frame_list.IsValid(), "Frame list should be valid") + + # Test GetThread() returns the correct thread + thread_from_list = frame_list.GetThread() + self.assertTrue( + thread_from_list.IsValid(), "Thread from frame list should be valid" + ) + self.assertEqual( + thread_from_list.GetThreadID(), + thread.GetThreadID(), + "Frame list should return the correct thread", + ) + + # Verify it's the same thread object + self.assertEqual( + thread_from_list.GetProcess().GetProcessID(), + thread.GetProcess().GetProcessID(), + "Thread should belong to same process", + ) diff --git a/lldb/test/API/python_api/frame_list/main.cpp b/lldb/test/API/python_api/frame_list/main.cpp new file mode 100644 index 0000000000000..e39944654a23e --- /dev/null +++ b/lldb/test/API/python_api/frame_list/main.cpp @@ -0,0 +1,22 @@ +#include + +int c(int val) { + // Set break point at this line + return val + 3; +} + +int b(int val) { + int result = c(val); + return result; +} + +int a(int val) { + int result = b(val); + return result; +} + +int main() { + int result = a(1); + printf("Result: %d\n", result); + return 0; +} diff --git a/lldb/tools/debugserver/source/MacOSX/MachTask.h b/lldb/tools/debugserver/source/MacOSX/MachTask.h index c4a20b80fda95..915f65a8160ee 100644 --- a/lldb/tools/debugserver/source/MacOSX/MachTask.h +++ b/lldb/tools/debugserver/source/MacOSX/MachTask.h @@ -81,9 +81,7 @@ class MachTask { void TaskPortChanged(task_t task); task_t TaskPort() const { return m_task; } task_t TaskPortForProcessID(DNBError &err, bool force = false); - static task_t TaskPortForProcessID(pid_t pid, DNBError &err, - uint32_t num_retries = 10, - uint32_t usec_interval = 10000); + static task_t TaskPortForProcessID(pid_t pid, DNBError &err); MachProcess *Process() { return m_process; } const MachProcess *Process() const { return m_process; } diff --git a/lldb/tools/debugserver/source/MacOSX/MachTask.mm b/lldb/tools/debugserver/source/MacOSX/MachTask.mm index 21156feecba2c..e5bbab830b187 100644 --- a/lldb/tools/debugserver/source/MacOSX/MachTask.mm +++ b/lldb/tools/debugserver/source/MacOSX/MachTask.mm @@ -523,14 +523,15 @@ static void get_threads_profile_data(DNBProfileDataScanType scanType, //---------------------------------------------------------------------- // MachTask::TaskPortForProcessID //---------------------------------------------------------------------- -task_t MachTask::TaskPortForProcessID(pid_t pid, DNBError &err, - uint32_t num_retries, - uint32_t usec_interval) { +task_t MachTask::TaskPortForProcessID(pid_t pid, DNBError &err) { + static constexpr uint32_t k_num_retries = 10; + static constexpr uint32_t k_usec_delay = 10000; + if (pid != INVALID_NUB_PROCESS) { DNBError err; mach_port_t task_self = mach_task_self(); task_t task = TASK_NULL; - for (uint32_t i = 0; i < num_retries; i++) { + for (uint32_t i = 0; i < k_num_retries; i++) { DNBLog("[LaunchAttach] (%d) about to task_for_pid(%d)", getpid(), pid); err = ::task_for_pid(task_self, pid, &task); @@ -557,7 +558,7 @@ static void get_threads_profile_data(DNBProfileDataScanType scanType, } // Sleep a bit and try again - ::usleep(usec_interval); + ::usleep(k_usec_delay); } } return TASK_NULL; diff --git a/llvm/include/llvm/Analysis/RuntimeLibcallInfo.h b/llvm/include/llvm/Analysis/RuntimeLibcallInfo.h new file mode 100644 index 0000000000000..a3e1014b417e5 --- /dev/null +++ b/llvm/include/llvm/Analysis/RuntimeLibcallInfo.h @@ -0,0 +1,60 @@ +//===-- RuntimeLibcallInfo.h - Runtime library information ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_RUNTIMELIBCALLINFO_H +#define LLVM_ANALYSIS_RUNTIMELIBCALLINFO_H + +#include "llvm/IR/RuntimeLibcalls.h" +#include "llvm/Pass.h" + +namespace llvm { + +class LLVM_ABI RuntimeLibraryAnalysis + : public AnalysisInfoMixin { +public: + using Result = RTLIB::RuntimeLibcallsInfo; + + RuntimeLibraryAnalysis() = default; + RuntimeLibraryAnalysis(RTLIB::RuntimeLibcallsInfo &&BaselineInfoImpl) + : LibcallsInfo(std::move(BaselineInfoImpl)) {} + explicit RuntimeLibraryAnalysis(const Triple &T) : LibcallsInfo(T) {} + + LLVM_ABI RTLIB::RuntimeLibcallsInfo run(const Module &M, + ModuleAnalysisManager &); + +private: + friend AnalysisInfoMixin; + LLVM_ABI static AnalysisKey Key; + + RTLIB::RuntimeLibcallsInfo LibcallsInfo; +}; + +class LLVM_ABI RuntimeLibraryInfoWrapper : public ImmutablePass { + RuntimeLibraryAnalysis RTLA; + std::optional RTLCI; + +public: + static char ID; + RuntimeLibraryInfoWrapper(); + explicit RuntimeLibraryInfoWrapper(const Triple &T); + explicit RuntimeLibraryInfoWrapper(const RTLIB::RuntimeLibcallsInfo &RTLCI); + + const RTLIB::RuntimeLibcallsInfo &getRTLCI(const Module &M) { + ModuleAnalysisManager DummyMAM; + RTLCI = RTLA.run(M, DummyMAM); + return *RTLCI; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +LLVM_ABI ModulePass *createRuntimeLibraryInfoWrapperPass(); + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def index 014988299d37f..76b89dcb3f25d 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def @@ -1951,6 +1951,36 @@ TLI_DEFINE_ENUM_INTERNAL(nearbyintl) TLI_DEFINE_STRING_INTERNAL("nearbyintl") TLI_DEFINE_SIG_INTERNAL(LDbl, LDbl) +/// double nextafter(double x, double y); +TLI_DEFINE_ENUM_INTERNAL(nextafter) +TLI_DEFINE_STRING_INTERNAL("nextafter") +TLI_DEFINE_SIG_INTERNAL(Dbl, Dbl, Dbl) + +/// float nextafterf(float x, float y); +TLI_DEFINE_ENUM_INTERNAL(nextafterf) +TLI_DEFINE_STRING_INTERNAL("nextafterf") +TLI_DEFINE_SIG_INTERNAL(Flt, Flt, Flt) + +/// long double nextafterl(long double x, long double y); +TLI_DEFINE_ENUM_INTERNAL(nextafterl) +TLI_DEFINE_STRING_INTERNAL("nextafterl") +TLI_DEFINE_SIG_INTERNAL(LDbl, LDbl, LDbl) + +/// double nexttoward(double x, long double y); +TLI_DEFINE_ENUM_INTERNAL(nexttoward) +TLI_DEFINE_STRING_INTERNAL("nexttoward") +TLI_DEFINE_SIG_INTERNAL(Dbl, Dbl, LDbl) + +/// float nexttowardf(float x, long double y); +TLI_DEFINE_ENUM_INTERNAL(nexttowardf) +TLI_DEFINE_STRING_INTERNAL("nexttowardf") +TLI_DEFINE_SIG_INTERNAL(Flt, Flt, LDbl) + +/// long double nexttowardl(long double x, long double y); +TLI_DEFINE_ENUM_INTERNAL(nexttowardl) +TLI_DEFINE_STRING_INTERNAL("nexttowardl") +TLI_DEFINE_SIG_INTERNAL(LDbl, LDbl, LDbl) + /// uint32_t ntohl(uint32_t netlong); TLI_DEFINE_ENUM_INTERNAL(ntohl) TLI_DEFINE_STRING_INTERNAL("ntohl") diff --git a/llvm/include/llvm/CodeGen/Register.h b/llvm/include/llvm/CodeGen/Register.h index e462a814562dc..790db8a11e390 100644 --- a/llvm/include/llvm/CodeGen/Register.h +++ b/llvm/include/llvm/CodeGen/Register.h @@ -10,6 +10,7 @@ #define LLVM_CODEGEN_REGISTER_H #include "llvm/MC/MCRegister.h" +#include "llvm/Support/MathExtras.h" #include namespace llvm { @@ -35,19 +36,23 @@ class Register { // DenseMapInfo uses -1u and -2u. static_assert(std::numeric_limits::max() >= 0xFFFFFFFF, "Reg isn't large enough to hold full range."); - static constexpr unsigned FirstStackSlot = 1u << 30; - static_assert(FirstStackSlot >= MCRegister::LastPhysicalReg); + static constexpr unsigned MaxFrameIndexBitwidth = 30; + static constexpr unsigned StackSlotZero = 1u << MaxFrameIndexBitwidth; + static constexpr const unsigned StackSlotMask = StackSlotZero - 1; + static_assert(StackSlotZero >= MCRegister::LastPhysicalReg); static constexpr unsigned VirtualRegFlag = 1u << 31; /// Return true if this is a stack slot. constexpr bool isStack() const { - return Register::FirstStackSlot <= Reg && Reg < Register::VirtualRegFlag; + return Register::StackSlotZero <= Reg && Reg < Register::VirtualRegFlag; } /// Convert a non-negative frame index to a stack slot register value. static Register index2StackSlot(int FI) { - assert(FI >= 0 && "Cannot hold a negative frame index."); - return Register(FI + Register::FirstStackSlot); + assert(isInt(FI) && + "Frame index must be at most 30 bits."); + unsigned FIMasked = FI & Register::StackSlotMask; + return Register(FIMasked | Register::StackSlotZero); } /// Return true if the specified register number is in @@ -87,7 +92,7 @@ class Register { /// Compute the frame index from a register value representing a stack slot. int stackSlotIndex() const { assert(isStack() && "Not a stack slot"); - return static_cast(Reg - Register::FirstStackSlot); + return SignExtend32(Reg & Register::StackSlotMask); } constexpr operator unsigned() const { return Reg; } diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h index 5241a51dd8cd8..d7921c3eb3f7c 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h @@ -46,6 +46,7 @@ class SelectionDAGISel { public: TargetMachine &TM; const TargetLibraryInfo *LibInfo; + const RTLIB::RuntimeLibcallsInfo *RuntimeLibCallInfo; std::unique_ptr FuncInfo; std::unique_ptr SwiftError; MachineFunction *MF; diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h b/llvm/include/llvm/IR/RuntimeLibcalls.h index 78e4b1723aafa..c822b6530a441 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.h +++ b/llvm/include/llvm/IR/RuntimeLibcalls.h @@ -9,6 +9,8 @@ // This file implements a common interface to work with library calls into a // runtime that may be emitted by a given backend. // +// FIXME: This should probably move to Analysis +// //===----------------------------------------------------------------------===// #ifndef LLVM_IR_RUNTIME_LIBCALLS_H @@ -20,6 +22,7 @@ #include "llvm/ADT/StringTable.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/InstrTypes.h" +#include "llvm/IR/PassManager.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Compiler.h" @@ -74,6 +77,8 @@ struct RuntimeLibcallsInfo { public: friend class llvm::LibcallLoweringInfo; + RuntimeLibcallsInfo() = default; + explicit RuntimeLibcallsInfo( const Triple &TT, ExceptionHandling ExceptionModel = ExceptionHandling::None, @@ -89,6 +94,11 @@ struct RuntimeLibcallsInfo { initLibcalls(TT, ExceptionModel, FloatABI, EABIVersion, ABIName); } + explicit RuntimeLibcallsInfo(const Module &M); + + bool invalidate(Module &M, const PreservedAnalyses &PA, + ModuleAnalysisManager::Invalidator &); + /// Get the libcall routine name for the specified libcall implementation. static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl) { if (CallImpl == RTLIB::Unsupported) diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index c8196d8a7ef48..10a4d8525a9e8 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -290,6 +290,7 @@ LLVM_ABI void initializeRemoveRedundantDebugValuesLegacyPass(PassRegistry &); LLVM_ABI void initializeRenameIndependentSubregsLegacyPass(PassRegistry &); LLVM_ABI void initializeReplaceWithVeclibLegacyPass(PassRegistry &); LLVM_ABI void initializeResetMachineFunctionPass(PassRegistry &); +LLVM_ABI void initializeRuntimeLibraryInfoWrapperPass(PassRegistry &); LLVM_ABI void initializeSCEVAAWrapperPassPass(PassRegistry &); LLVM_ABI void initializeSROALegacyPassPass(PassRegistry &); LLVM_ABI void initializeSafeStackLegacyPassPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 2f20792568e63..bd7cd39ebb743 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -20,6 +20,7 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/RuntimeLibcallInfo.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" @@ -638,6 +639,8 @@ Error CodeGenPassBuilder::buildPipeline( /*Force=*/true); addIRPass(RequireAnalysisPass(), /*Force=*/true); + addIRPass(RequireAnalysisPass(), + /*Force=*/true); addISelPasses(addIRPass); } diff --git a/llvm/lib/Analysis/Analysis.cpp b/llvm/lib/Analysis/Analysis.cpp index 9f5daf32be9a0..aaac2cf187281 100644 --- a/llvm/lib/Analysis/Analysis.cpp +++ b/llvm/lib/Analysis/Analysis.cpp @@ -63,6 +63,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeRegionPrinterPass(Registry); initializeRegionOnlyViewerPass(Registry); initializeRegionOnlyPrinterPass(Registry); + initializeRuntimeLibraryInfoWrapperPass(Registry); initializeSCEVAAWrapperPassPass(Registry); initializeScalarEvolutionWrapperPassPass(Registry); initializeStackSafetyGlobalInfoWrapperPassPass(Registry); diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index 16dd6f8b86006..88ebd65ec46af 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -137,6 +137,7 @@ add_llvm_component_library(LLVMAnalysis RegionPass.cpp RegionPrinter.cpp ReplayInlineAdvisor.cpp + RuntimeLibcallInfo.cpp ScalarEvolution.cpp ScalarEvolutionAliasAnalysis.cpp ScalarEvolutionDivision.cpp diff --git a/llvm/lib/Analysis/RuntimeLibcallInfo.cpp b/llvm/lib/Analysis/RuntimeLibcallInfo.cpp new file mode 100644 index 0000000000000..6fb4119aa73f2 --- /dev/null +++ b/llvm/lib/Analysis/RuntimeLibcallInfo.cpp @@ -0,0 +1,43 @@ +//===- RuntimeLibcallInfo.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/RuntimeLibcallInfo.h" +#include "llvm/InitializePasses.h" + +using namespace llvm; + +AnalysisKey RuntimeLibraryAnalysis::Key; + +RTLIB::RuntimeLibcallsInfo +RuntimeLibraryAnalysis::run(const Module &M, ModuleAnalysisManager &) { + return RTLIB::RuntimeLibcallsInfo(M); +} + +INITIALIZE_PASS(RuntimeLibraryInfoWrapper, "runtime-library-info", + "Runtime Library Function Analysis", false, true) + +RuntimeLibraryInfoWrapper::RuntimeLibraryInfoWrapper() + : ImmutablePass(ID), RTLA(RTLIB::RuntimeLibcallsInfo(Triple())) {} + +char RuntimeLibraryInfoWrapper::ID = 0; + +ModulePass *llvm::createRuntimeLibraryInfoWrapperPass() { + return new RuntimeLibraryInfoWrapper(); +} + +void RuntimeLibraryInfoWrapper::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); +} + +// Assume this is stable unless explicitly invalidated. +bool RTLIB::RuntimeLibcallsInfo::invalidate( + Module &M, const PreservedAnalyses &PA, + ModuleAnalysisManager::Invalidator &) { + auto PAC = PA.getChecker(); + return !PAC.preservedWhenStateless(); +} diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index 813632c375308..74f3a7d131c35 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -388,6 +388,10 @@ static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setAvailableWithName(LibFunc_logbf, "_logbf"); else TLI.setUnavailable(LibFunc_logbf); + TLI.setUnavailable(LibFunc_nextafter); + TLI.setUnavailable(LibFunc_nextafterf); + TLI.setUnavailable(LibFunc_nexttoward); + TLI.setUnavailable(LibFunc_nexttowardf); TLI.setUnavailable(LibFunc_rint); TLI.setUnavailable(LibFunc_rintf); TLI.setUnavailable(LibFunc_round); @@ -418,6 +422,8 @@ static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_logbl); TLI.setUnavailable(LibFunc_ilogbl); TLI.setUnavailable(LibFunc_nearbyintl); + TLI.setUnavailable(LibFunc_nextafterl); + TLI.setUnavailable(LibFunc_nexttowardl); TLI.setUnavailable(LibFunc_rintl); TLI.setUnavailable(LibFunc_roundl); TLI.setUnavailable(LibFunc_scalblnl); diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp index d6f23b62519fe..c1fb8b6d78ff8 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp @@ -643,6 +643,38 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known, Known.Zero.setBitsFrom(LowBits); break; } + case TargetOpcode::G_EXTRACT_VECTOR_ELT: { + GExtractVectorElement &Extract = cast(MI); + Register InVec = Extract.getVectorReg(); + Register EltNo = Extract.getIndexReg(); + + auto ConstEltNo = getIConstantVRegVal(EltNo, MRI); + + LLT VecVT = MRI.getType(InVec); + // computeKnownBits not yet implemented for scalable vectors. + if (VecVT.isScalableVector()) + break; + + const unsigned EltBitWidth = VecVT.getScalarSizeInBits(); + const unsigned NumSrcElts = VecVT.getNumElements(); + // A return type different from the vector's element type may lead to + // issues with pattern selection. Bail out to avoid that. + if (BitWidth > EltBitWidth) + break; + + Known.Zero.setAllBits(); + Known.One.setAllBits(); + + // If we know the element index, just demand that vector element, else for + // an unknown element index, ignore DemandedElts and demand them all. + APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts); + if (ConstEltNo && ConstEltNo->ult(NumSrcElts)) + DemandedSrcElts = + APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue()); + + computeKnownBitsImpl(InVec, Known, DemandedSrcElts, Depth + 1); + break; + } case TargetOpcode::G_SHUFFLE_VECTOR: { APInt DemandedLHS, DemandedRHS; // Collect the known bits that are shared by every vector element referenced diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp index 40a89078bcf59..61706e13b8e91 100644 --- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -193,7 +193,6 @@ void ReachingDefInfo::processDefs(MachineInstr *MI) { for (auto &MO : MI->operands()) { if (MO.isFI()) { int FrameIndex = MO.getIndex(); - assert(FrameIndex >= 0 && "Can't handle negative frame indicies yet!"); if (!isFIDef(*MI, FrameIndex, TII)) continue; MBBFrameObjsReachingDefs[{MBBNumber, FrameIndex}].push_back(CurInstr); @@ -302,8 +301,6 @@ void ReachingDefInfo::print(raw_ostream &OS) { Register Reg; if (MO.isFI()) { int FrameIndex = MO.getIndex(); - assert(FrameIndex >= 0 && - "Can't handle negative frame indicies yet!"); Reg = Register::index2StackSlot(FrameIndex); } else if (MO.isReg()) { if (MO.isDef()) diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index 2fb01a4f95fea..f4c5c6ff35af6 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -9,7 +9,7 @@ #include "llvm/IR/RuntimeLibcalls.h" #include "llvm/ADT/FloatingPointMode.h" #include "llvm/ADT/StringTable.h" -#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/xxhash.h" #include "llvm/TargetParser/ARMTargetParser.h" @@ -25,6 +25,11 @@ using namespace RTLIB; #define DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME #include "llvm/IR/RuntimeLibcalls.inc" +RuntimeLibcallsInfo::RuntimeLibcallsInfo(const Module &M) + : RuntimeLibcallsInfo(M.getTargetTriple()) { + // TODO: Consider module flags +} + /// Set default libcall names. If a target wants to opt-out of a libcall it /// should be placed here. void RuntimeLibcallsInfo::initLibcalls(const Triple &TT, diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 3c9a27ac24015..40ceb6f6ae28f 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -67,6 +67,7 @@ #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/RegionInfo.h" +#include "llvm/Analysis/RuntimeLibcallInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionDivision.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index dfc47626a1113..51d9c87f25290 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -35,6 +35,7 @@ MODULE_ANALYSIS("no-op-module", NoOpModuleAnalysis()) MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) MODULE_ANALYSIS("profile-summary", ProfileSummaryAnalysis()) MODULE_ANALYSIS("reg-usage", PhysicalRegisterUsageAnalysis()) +MODULE_ANALYSIS("runtime-libcall-info", RuntimeLibraryAnalysis()) MODULE_ANALYSIS("stack-safety", StackSafetyGlobalAnalysis()) MODULE_ANALYSIS("verify", VerifierAnalysis()) diff --git a/llvm/lib/Target/Sparc/Sparc.td b/llvm/lib/Target/Sparc/Sparc.td index 7137e5fbff4ff..38b0508885069 100644 --- a/llvm/lib/Target/Sparc/Sparc.td +++ b/llvm/lib/Target/Sparc/Sparc.td @@ -95,6 +95,9 @@ def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true", def TuneSlowRDPC : SubtargetFeature<"slow-rdpc", "HasSlowRDPC", "true", "rd %pc, %XX is slow", [FeatureV9]>; +def TuneNoPredictor : SubtargetFeature<"no-predictor", "HasNoPredictor", "true", + "Processor has no branch predictor, branches stall execution", []>; + //==== Features added predmoninantly for LEON subtarget support include "LeonFeatures.td" @@ -174,12 +177,15 @@ def : Proc<"ultrasparc3", [FeatureV9, FeatureV8Deprecated, FeatureVIS, FeatureVIS2], [TuneSlowRDPC]>; def : Proc<"niagara", [FeatureV9, FeatureV8Deprecated, FeatureVIS, - FeatureVIS2, FeatureUA2005]>; + FeatureVIS2, FeatureUA2005], + [TuneNoPredictor]>; def : Proc<"niagara2", [FeatureV9, FeatureV8Deprecated, UsePopc, - FeatureVIS, FeatureVIS2, FeatureUA2005]>; + FeatureVIS, FeatureVIS2, FeatureUA2005], + [TuneNoPredictor]>; def : Proc<"niagara3", [FeatureV9, FeatureV8Deprecated, UsePopc, FeatureVIS, FeatureVIS2, FeatureVIS3, - FeatureUA2005, FeatureUA2007]>; + FeatureUA2005, FeatureUA2007], + [TuneNoPredictor]>; def : Proc<"niagara4", [FeatureV9, FeatureV8Deprecated, UsePopc, FeatureVIS, FeatureVIS2, FeatureVIS3, FeatureUA2005, FeatureUA2007, FeatureOSA2011, diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp index cbb7db68f7e7c..ae3c32687c207 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -2000,6 +2000,14 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM, setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + // Some processors have no branch predictor and have pipelines longer than + // what can be covered by the delay slot. This results in a stall, so mark + // branches to be expensive on those processors. + setJumpIsExpensive(Subtarget->hasNoPredictor()); + // The high cost of branching means that using conditional moves will + // still be profitable even if the condition is predictable. + PredictableSelectIsExpensive = !isJumpExpensive(); + setMinFunctionAlignment(Align(4)); computeRegisterProperties(Subtarget->getRegisterInfo()); diff --git a/llvm/lib/Target/Target.cpp b/llvm/lib/Target/Target.cpp index ec673ef4cda52..7387571418c8d 100644 --- a/llvm/lib/Target/Target.cpp +++ b/llvm/lib/Target/Target.cpp @@ -37,6 +37,7 @@ inline LLVMTargetLibraryInfoRef wrap(const TargetLibraryInfoImpl *P) { void llvm::initializeTarget(PassRegistry &Registry) { initializeTargetLibraryInfoWrapperPassPass(Registry); + initializeRuntimeLibraryInfoWrapperPass(Registry); initializeTargetTransformInfoWrapperPassPass(Registry); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td index fc82e5b4a61da..304c4f3fcb028 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td @@ -41,6 +41,11 @@ defm REF_TEST_FUNCREF : I<(outs I32:$res), (ins TypeIndex:$type, FUNCREF:$ref), "ref.test\t$type, $ref", "ref.test $type", 0xfb14>, Requires<[HasGC]>; +defm REF_FUNC : I<(outs FUNCREF:$res), (ins function32_op:$func), + (outs), (ins function32_op:$func), [], + "ref.func\t$func", "ref.func $func", 0xd2>, + Requires<[HasReferenceTypes]>; + defm "" : REF_I; defm "" : REF_I; defm "" : REF_I; diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index d4418c8563780..6c16fcfb282e8 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -4728,9 +4728,9 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) { auto tryPeelOuterNotWrappingLogic = [&](SDNode *Op) { if (Op->getOpcode() == ISD::XOR && Op->hasOneUse() && ISD::isBuildVectorAllOnes(Op->getOperand(1).getNode())) { - SDValue InnerOp = Op->getOperand(0); + SDValue InnerOp = getFoldableLogicOp(Op->getOperand(0)); - if (!getFoldableLogicOp(InnerOp)) + if (!InnerOp) return SDValue(); N0 = InnerOp.getOperand(0); diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 019536ca91ae0..9070d252ae09f 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -72,6 +72,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -105,6 +106,7 @@ STATISTIC( STATISTIC(NumShiftUntilZero, "Number of uncountable loops recognized as 'shift until zero' idiom"); +namespace llvm { bool DisableLIRP::All; static cl::opt DisableLIRPAll("disable-" DEBUG_TYPE "-all", @@ -163,6 +165,10 @@ static cl::opt ForceMemsetPatternIntrinsic( cl::desc("Use memset.pattern intrinsic whenever possible"), cl::init(false), cl::Hidden); +extern cl::opt ProfcheckDisableMetadataFixes; + +} // namespace llvm + namespace { class LoopIdiomRecognize { @@ -3199,7 +3205,21 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() { // The loop trip count check. auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount, CurLoop->getName() + ".ivcheck"); - Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB); + SmallVector BranchWeights; + const bool HasBranchWeights = + !ProfcheckDisableMetadataFixes && + extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights); + + auto *BI = Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB); + if (HasBranchWeights) { + if (SuccessorBB == LoopHeaderBB->getTerminator()->getSuccessor(1)) + std::swap(BranchWeights[0], BranchWeights[1]); + // We're not changing the loop profile, so we can reuse the original loop's + // profile. + setBranchWeights(*BI, BranchWeights, + /*IsExpected=*/false); + } + LoopHeaderBB->getTerminator()->eraseFromParent(); // Populate the IV PHI. @@ -3368,10 +3388,10 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, ScalarEvolution *SE, /// %start = <...> /// %extraoffset = <...> /// <...> -/// br label %for.cond +/// br label %loop /// /// loop: -/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ] +/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ] /// %nbits = add nsw i8 %iv, %extraoffset /// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits /// %val.shifted.iszero = icmp eq i8 %val.shifted, 0 @@ -3533,7 +3553,19 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() { // The loop terminator. Builder.SetInsertPoint(LoopHeaderBB->getTerminator()); - Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB); + SmallVector BranchWeights; + const bool HasBranchWeights = + !ProfcheckDisableMetadataFixes && + extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights); + + auto *BI = Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB); + if (HasBranchWeights) { + if (InvertedCond) + std::swap(BranchWeights[0], BranchWeights[1]); + // We're not changing the loop profile, so we can reuse the original loop's + // profile. + setBranchWeights(*BI, BranchWeights, /*IsExpected=*/false); + } LoopHeaderBB->getTerminator()->eraseFromParent(); // Populate the IV PHI. diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 573a78150ff3d..02b73e85d783f 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -1283,6 +1283,12 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, case LibFunc_ilogbl: case LibFunc_logf: case LibFunc_logl: + case LibFunc_nextafter: + case LibFunc_nextafterf: + case LibFunc_nextafterl: + case LibFunc_nexttoward: + case LibFunc_nexttowardf: + case LibFunc_nexttowardl: case LibFunc_pow: case LibFunc_powf: case LibFunc_powl: diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 428a8f4c1348f..dd26a059d56ad 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -304,18 +304,7 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) { } bool IsSingleScalar = vputils::isSingleScalar(Def); - VPLane LastLane(IsSingleScalar ? 0 : VF.getFixedValue() - 1); - // Check if there is a scalar value for the selected lane. - if (!hasScalarValue(Def, LastLane)) { - // At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and - // VPExpandSCEVRecipes can also be a single scalar. - assert((isa(Def->getDefiningRecipe())) && - "unexpected recipe found to be invariant"); - IsSingleScalar = true; - LastLane = 0; - } // We need to construct the vector value for a single-scalar value by // broadcasting the scalar to all lanes. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index cfe1f1e9d7528..22ea0830bcd32 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1725,7 +1725,9 @@ class VPHistogramRecipe : public VPRecipeBase { #endif }; -/// A recipe for widening select instructions. +/// A recipe for widening select instructions. Supports both wide vector and +/// single-scalar conditions, matching the behavior of LLVM IR's select +/// instruction. struct LLVM_ABI_FOR_TEST VPWidenSelectRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { VPWidenSelectRecipe(SelectInst &I, ArrayRef Operands) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 1ee405a62aa68..f792d0a86d50d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -659,7 +659,9 @@ Value *VPInstruction::generate(VPTransformState &State) { } case Instruction::Select: { bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); - Value *Cond = State.get(getOperand(0), OnlyFirstLaneUsed); + Value *Cond = + State.get(getOperand(0), + OnlyFirstLaneUsed || vputils::isSingleScalar(getOperand(0))); Value *Op1 = State.get(getOperand(1), OnlyFirstLaneUsed); Value *Op2 = State.get(getOperand(2), OnlyFirstLaneUsed); return Builder.CreateSelect(Cond, Op1, Op2, Name); @@ -1968,16 +1970,13 @@ void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent, getOperand(1)->printAsOperand(O, SlotTracker); O << ", "; getOperand(2)->printAsOperand(O, SlotTracker); - O << (isInvariantCond() ? " (condition is loop invariant)" : ""); + O << (vputils::isSingleScalar(getCond()) ? " (condition is single-scalar)" + : ""); } #endif void VPWidenSelectRecipe::execute(VPTransformState &State) { - // The condition can be loop invariant but still defined inside the - // loop. This means that we can't just use the original 'cond' value. - // We have to take the 'vectorized' value and pick the first lane. - // Instcombine will make this a no-op. - Value *Cond = State.get(getCond(), isInvariantCond()); + Value *Cond = State.get(getCond(), vputils::isSingleScalar(getCond())); Value *Op0 = State.get(getOperand(1)); Value *Op1 = State.get(getOperand(2)); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 9e65399e75dc7..8ad772fdbf1c5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1286,6 +1286,15 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) { return; } + // Look through broadcast of single-scalar when used as select conditions; in + // that case the scalar condition can be used directly. + if (match(Def, + m_Select(m_Broadcast(m_VPValue(C)), m_VPValue(), m_VPValue())) && + vputils::isSingleScalar(C)) { + Def->setOperand(0, C); + return; + } + if (auto *Phi = dyn_cast(Def)) { if (Phi->getNumOperands() == 1) Phi->replaceAllUsesWith(Phi->getOperand(0)); @@ -4174,6 +4183,59 @@ static bool isAlreadyNarrow(VPValue *VPV) { return RepR && RepR->isSingleScalar(); } +// Convert a wide recipe defining a VPValue \p V feeding an interleave group to +// a narrow variant. +static VPValue * +narrowInterleaveGroupOp(VPValue *V, SmallPtrSetImpl &NarrowedOps) { + auto *R = V->getDefiningRecipe(); + if (!R || NarrowedOps.contains(V)) + return V; + + if (isAlreadyNarrow(V)) + return V; + + if (auto *WideMember0 = dyn_cast(R)) { + for (unsigned Idx = 0, E = WideMember0->getNumOperands(); Idx != E; ++Idx) + WideMember0->setOperand( + Idx, + narrowInterleaveGroupOp(WideMember0->getOperand(Idx), NarrowedOps)); + return V; + } + + if (auto *LoadGroup = dyn_cast(R)) { + // Narrow interleave group to wide load, as transformed VPlan will only + // process one original iteration. + auto *LI = cast(LoadGroup->getInterleaveGroup()->getInsertPos()); + auto *L = new VPWidenLoadRecipe( + *LI, LoadGroup->getAddr(), LoadGroup->getMask(), /*Consecutive=*/true, + /*Reverse=*/false, LI->getAlign(), {}, LoadGroup->getDebugLoc()); + L->insertBefore(LoadGroup); + NarrowedOps.insert(L); + return L; + } + + if (auto *RepR = dyn_cast(R)) { + assert(RepR->isSingleScalar() && + isa(RepR->getUnderlyingInstr()) && + "must be a single scalar load"); + NarrowedOps.insert(RepR); + return RepR; + } + + auto *WideLoad = cast(R); + VPValue *PtrOp = WideLoad->getAddr(); + if (auto *VecPtr = dyn_cast(PtrOp)) + PtrOp = VecPtr->getOperand(0); + // Narrow wide load to uniform scalar load, as transformed VPlan will only + // process one original iteration. + auto *N = new VPReplicateRecipe(&WideLoad->getIngredient(), {PtrOp}, + /*IsUniform*/ true, + /*Mask*/ nullptr, *WideLoad); + N->insertBefore(WideLoad); + NarrowedOps.insert(N); + return N; +} + void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, unsigned VectorRegWidth) { VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion(); @@ -4275,60 +4337,10 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, // Convert InterleaveGroup \p R to a single VPWidenLoadRecipe. SmallPtrSet NarrowedOps; - auto NarrowOp = [&NarrowedOps](VPValue *V) -> VPValue * { - auto *R = V->getDefiningRecipe(); - if (!R || NarrowedOps.contains(V)) - return V; - if (auto *LoadGroup = dyn_cast(R)) { - // Narrow interleave group to wide load, as transformed VPlan will only - // process one original iteration. - auto *LI = - cast(LoadGroup->getInterleaveGroup()->getInsertPos()); - auto *L = new VPWidenLoadRecipe( - *LI, LoadGroup->getAddr(), LoadGroup->getMask(), /*Consecutive=*/true, - /*Reverse=*/false, LI->getAlign(), {}, LoadGroup->getDebugLoc()); - L->insertBefore(LoadGroup); - NarrowedOps.insert(L); - return L; - } - - if (auto *RepR = dyn_cast(R)) { - assert(RepR->isSingleScalar() && - isa(RepR->getUnderlyingInstr()) && - "must be a single scalar load"); - NarrowedOps.insert(RepR); - return RepR; - } - auto *WideLoad = cast(R); - - VPValue *PtrOp = WideLoad->getAddr(); - if (auto *VecPtr = dyn_cast(PtrOp)) - PtrOp = VecPtr->getOperand(0); - // Narrow wide load to uniform scalar load, as transformed VPlan will only - // process one original iteration. - auto *N = new VPReplicateRecipe(&WideLoad->getIngredient(), {PtrOp}, - /*IsUniform*/ true, - /*Mask*/ nullptr, *WideLoad); - N->insertBefore(WideLoad); - NarrowedOps.insert(N); - return N; - }; - // Narrow operation tree rooted at store groups. for (auto *StoreGroup : StoreGroups) { - VPValue *Res = nullptr; - VPValue *Member0 = StoreGroup->getStoredValues()[0]; - if (isAlreadyNarrow(Member0)) { - Res = Member0; - } else if (auto *WideMember0 = - dyn_cast(Member0->getDefiningRecipe())) { - for (unsigned Idx = 0, E = WideMember0->getNumOperands(); Idx != E; ++Idx) - WideMember0->setOperand(Idx, NarrowOp(WideMember0->getOperand(Idx))); - Res = WideMember0; - } else { - Res = NarrowOp(Member0); - } - + VPValue *Res = + narrowInterleaveGroupOp(StoreGroup->getStoredValues()[0], NarrowedOps); auto *SI = cast(StoreGroup->getInterleaveGroup()->getInsertPos()); auto *S = new VPWidenStoreRecipe( diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-buildvector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-buildvector.mir index 3f2bb1eed572b..94ea12d3c66d9 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-buildvector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-buildvector.mir @@ -22,7 +22,7 @@ body: | ; CHECK-NEXT: %1:_ KnownBits:00001010 SignBits:4 ; CHECK-NEXT: %2:_ KnownBits:0000?01? SignBits:4 ; CHECK-NEXT: %idx:_ KnownBits:0000000000000000000000000000000000000000000000000000000000000001 SignBits:63 - ; CHECK-NEXT: %4:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %4:_ KnownBits:00001010 SignBits:4 %0:_(s8) = G_CONSTANT i8 3 %1:_(s8) = G_CONSTANT i8 10 %2:_(<2 x s8>) = G_BUILD_VECTOR %0, %1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-extract-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-extract-vector.mir new file mode 100644 index 0000000000000..ab576dfccc40c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-extract-vector.mir @@ -0,0 +1,133 @@ +# NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mtriple aarch64 -passes="print" %s -filetype=null 2>&1 | FileCheck %s + +--- +name: all_knownbits_const_idx +body: | + bb.0: + ; CHECK-LABEL: name: @all_knownbits_const_idx + ; CHECK-NEXT: %0:_ KnownBits:00000011 SignBits:6 + ; CHECK-NEXT: %1:_ KnownBits:00001010 SignBits:4 + ; CHECK-NEXT: %2:_ KnownBits:0000?01? SignBits:4 + ; CHECK-NEXT: %idx:_ KnownBits:0000000000000000000000000000000000000000000000000000000000000001 SignBits:63 + ; CHECK-NEXT: %4:_ KnownBits:00001010 SignBits:4 + %0:_(s8) = G_CONSTANT i8 3 + %1:_(s8) = G_CONSTANT i8 10 + %2:_(<2 x s8>) = G_BUILD_VECTOR %0, %1 + %idx:_(s64) = G_CONSTANT i64 1 + %3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %idx +... +--- +name: all_knownbits +body: | + bb.0: + ; CHECK-LABEL: name: @all_knownbits + ; CHECK-NEXT: %0:_ KnownBits:00000011 SignBits:6 + ; CHECK-NEXT: %1:_ KnownBits:00001010 SignBits:4 + ; CHECK-NEXT: %2:_ KnownBits:0000?01? SignBits:4 + ; CHECK-NEXT: %idx:_ KnownBits:???????????????????????????????????????????????????????????????? SignBits:1 + ; CHECK-NEXT: %4:_ KnownBits:0000?01? SignBits:4 + %0:_(s8) = G_CONSTANT i8 3 + %1:_(s8) = G_CONSTANT i8 10 + %2:_(<2 x s8>) = G_BUILD_VECTOR %0, %1 + %idx:_(s64) = COPY $d0 + %3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %idx +... +--- +name: no_knownbits_const_idx +body: | + bb.0: + ; CHECK-LABEL: name: @no_knownbits_const_idx + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %idx:_ KnownBits:0000000000000000000000000000000000000000000000000000000000000001 SignBits:63 + ; CHECK-NEXT: %2:_ KnownBits:???????? SignBits:1 + %0:_(<2 x s8>) = COPY $h0 + %idx:_(s64) = G_CONSTANT i64 1 + %1:_(s8) = G_EXTRACT_VECTOR_ELT %0, %idx +... +--- +name: no_knownbits +body: | + bb.0: + ; CHECK-LABEL: name: @no_knownbits + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %idx:_ KnownBits:???????????????????????????????????????????????????????????????? SignBits:1 + ; CHECK-NEXT: %2:_ KnownBits:???????? SignBits:1 + %0:_(<2 x s8>) = COPY $h0 + %idx:_(s64) = COPY $d1 + %1:_(s8) = G_EXTRACT_VECTOR_ELT %0, %idx +... +--- +name: zext_const_idx +body: | + bb.0: + ; CHECK-LABEL: name: @zext_const_idx + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %zext0:_ KnownBits:00000000???????? SignBits:8 + ; CHECK-NEXT: %idx:_ KnownBits:0000000000000000000000000000000000000000000000000000000000000001 SignBits:63 + ; CHECK-NEXT: %3:_ KnownBits:00000000???????? SignBits:8 + %0:_(<2 x s8>) = COPY $h0 + %zext0:_(<2 x s16>) = G_ZEXT %0 + %idx:_(s64) = G_CONSTANT i64 1 + %1:_(s16) = G_EXTRACT_VECTOR_ELT %zext0, %idx +... +--- +name: zext +body: | + bb.0: + + ; CHECK-LABEL: name: @zext + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %zext0:_ KnownBits:00000000???????? SignBits:8 + ; CHECK-NEXT: %idx:_ KnownBits:???????????????????????????????????????????????????????????????? SignBits:1 + ; CHECK-NEXT: %3:_ KnownBits:00000000???????? SignBits:8 + %0:_(<2 x s8>) = COPY $h0 + %zext0:_(<2 x s16>) = G_ZEXT %0 + %idx:_(s64) = COPY $d1 + %1:_(s16) = G_EXTRACT_VECTOR_ELT %zext0, %idx +... +--- +name: sext_const_idx +body: | + bb.0: + ; CHECK-LABEL: name: @sext_const_idx + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %sext0:_ KnownBits:???????????????? SignBits:9 + ; CHECK-NEXT: %idx:_ KnownBits:0000000000000000000000000000000000000000000000000000000000000001 SignBits:63 + ; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1 + %0:_(<2 x s8>) = COPY $h0 + %sext0:_(<2 x s16>) = G_SEXT %0 + %idx:_(s64) = G_CONSTANT i64 1 + %1:_(s16) = G_EXTRACT_VECTOR_ELT %sext0, %idx +... +--- +name: sext +body: | + bb.0: + ; CHECK-LABEL: name: @sext + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %sext0:_ KnownBits:???????????????? SignBits:9 + ; CHECK-NEXT: %idx:_ KnownBits:???????????????????????????????????????????????????????????????? SignBits:1 + ; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1 + %0:_(<2 x s8>) = COPY $h0 + %sext0:_(<2 x s16>) = G_SEXT %0 + %idx:_(s64) = COPY $d1 + %1:_(s16) = G_EXTRACT_VECTOR_ELT %sext0, %idx +... +--- +# Verifies known bit computation bails if return type differs from vector +# element type. Without bailing, the 8 lowest bits of %4 would be known. +name: bail_on_different_return_type +body: | + bb.0: + ; CHECK-LABEL: name: @bail_on_different_return_type + ; CHECK-NEXT: %0:_ KnownBits:00000011 SignBits:6 + ; CHECK-NEXT: %1:_ KnownBits:00001010 SignBits:4 + ; CHECK-NEXT: %2:_ KnownBits:0000?01? SignBits:4 + ; CHECK-NEXT: %idx:_ KnownBits:0000000000000000000000000000000000000000000000000000000000000001 SignBits:63 + ; CHECK-NEXT: %4:_ KnownBits:???????????????? SignBits:1 + %0:_(s8) = G_CONSTANT i8 3 + %1:_(s8) = G_CONSTANT i8 10 + %2:_(<2 x s8>) = G_BUILD_VECTOR %0, %1 + %idx:_(s64) = G_CONSTANT i64 1 + %3:_(s16) = G_EXTRACT_VECTOR_ELT %2, %idx diff --git a/llvm/test/CodeGen/AArch64/neon-extadd-extract.ll b/llvm/test/CodeGen/AArch64/neon-extadd-extract.ll index 93a50ec305e1e..64cb3603f53a1 100644 --- a/llvm/test/CodeGen/AArch64/neon-extadd-extract.ll +++ b/llvm/test/CodeGen/AArch64/neon-extadd-extract.ll @@ -734,7 +734,7 @@ define <1 x i64> @mullu_v2i32_0(<2 x i32> %s0, <2 x i32> %s1) { ; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0 ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: fmov x9, d1 -; CHECK-GI-NEXT: mul x8, x8, x9 +; CHECK-GI-NEXT: umull x8, w8, w9 ; CHECK-GI-NEXT: fmov d0, x8 ; CHECK-GI-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll index 01c601f0646b5..2843f72353db1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -2757,47 +2757,47 @@ define <2 x i64> @v_sdiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CGP-NEXT: v_and_b32_e32 v3, 0xffffff, v4 ; CGP-NEXT: v_cvt_f32_u32_e32 v1, v3 -; CGP-NEXT: v_and_b32_e32 v4, 0xffffff, v6 -; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v3 +; CGP-NEXT: v_and_b32_e32 v4, 0xffffff, v0 +; CGP-NEXT: v_and_b32_e32 v5, 0xffffff, v6 +; CGP-NEXT: v_and_b32_e32 v9, 0xffffff, v2 ; CGP-NEXT: v_rcp_f32_e32 v1, v1 -; CGP-NEXT: v_and_b32_e32 v8, 0xffffff, v0 -; CGP-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 -; CGP-NEXT: v_cvt_u32_f32_e32 v5, v1 -; CGP-NEXT: v_cvt_f32_u32_e32 v1, v4 -; CGP-NEXT: v_mul_lo_u32 v6, v6, v5 -; CGP-NEXT: v_rcp_f32_e32 v7, v1 -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v6, 0 -; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v7 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v1 ; CGP-NEXT: v_cvt_u32_f32_e32 v6, v0 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v1 -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v5, 0 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v5, v0, v6 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v3 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v5 +; CGP-NEXT: v_mul_lo_u32 v7, v1, v6 +; CGP-NEXT: v_rcp_f32_e32 v8, v0 +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v7, 0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v1 +; CGP-NEXT: v_cvt_u32_f32_e32 v2, v0 +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v6, 0 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, 0, v5 +; CGP-NEXT: v_mul_lo_u32 v6, v0, v2 ; CGP-NEXT: v_mul_lo_u32 v0, v1, v3 ; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v1 -; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v0 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v3 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v4, v0 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v3 ; CGP-NEXT: v_cndmask_b32_e32 v7, v1, v7, vcc -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v5, 0 -; CGP-NEXT: v_and_b32_e32 v5, 0xffffff, v2 -; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v8, v3 -; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v6, v1 -; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v5, v6, 0 -; CGP-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v6, 0 +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v4, v3 +; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v2, v1 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v6, 0 +; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc ; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v7 -; CGP-NEXT: v_mul_lo_u32 v6, v2, v4 +; CGP-NEXT: v_mul_lo_u32 v4, v2, v5 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 ; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v1, vcc ; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v0 -; CGP-NEXT: v_sub_i32_e32 v3, vcc, v5, v6 -; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v4 -; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc -; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v3, v4 -; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v4 -; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v9, v4 +; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v5 +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v5 +; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v5 +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; CGP-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; CGP-NEXT: s_setpc_b64 s[30:31] %num.mask = and <2 x i64> %num, diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll index 704ea37117f32..8e7389ace9c5c 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -9,11 +9,11 @@ ; RUN: | FileCheck -check-prefix=GCN-O3 %s -; GCN-O0: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,unreachableblockelim,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,si-post-ra-bundler,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function)) +; GCN-O0: require,require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,unreachableblockelim,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,si-post-ra-bundler,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function)) -; GCN-O2: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt,amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,early-cse<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments,codegenprepare,load-store-vectorizer),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require,si-opt-vgpr-liverange,require,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,amdgpu-insert-delay-alu,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function)) +; GCN-O2: require,require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt,amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,early-cse<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments,codegenprepare,load-store-vectorizer),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require,si-opt-vgpr-liverange,require,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,amdgpu-insert-delay-alu,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function)) -; GCN-O3: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt,amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,gvn<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments,codegenprepare,load-store-vectorizer),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require,si-opt-vgpr-liverange,require,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,amdgpu-insert-delay-alu,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function)) +; GCN-O3: require,require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt,amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,gvn<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments,codegenprepare,load-store-vectorizer),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require,si-opt-vgpr-liverange,require,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,amdgpu-insert-delay-alu,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function)) define void @empty() { ret void diff --git a/llvm/test/CodeGen/SPARC/predictable-select.ll b/llvm/test/CodeGen/SPARC/predictable-select.ll new file mode 100644 index 0000000000000..cf200a121d0f1 --- /dev/null +++ b/llvm/test/CodeGen/SPARC/predictable-select.ll @@ -0,0 +1,80 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O3 < %s -relocation-model=pic -mtriple=sparc -mcpu=v9 | FileCheck --check-prefix=SPARC %s +; RUN: llc -O3 < %s -relocation-model=pic -mtriple=sparcv9 | FileCheck --check-prefix=SPARC64 %s +; RUN: llc -O3 < %s -relocation-model=pic -mtriple=sparc -mcpu=v9 -mattr=+no-predictor | FileCheck --check-prefix=SPARC-NO-PREDICTOR %s +; RUN: llc -O3 < %s -relocation-model=pic -mtriple=sparcv9 -mattr=+no-predictor | FileCheck --check-prefix=SPARC64-NO-PREDICTOR %s + +;; Normally, highly predictable selects should be turned into branches. +;; On the other hand, early Niagara processors should prefer conditional moves +;; over branches even when it's predictable. + +define i32 @cdiv(i32 %cond, i32 %num) #0 { +; SPARC-LABEL: cdiv: +; SPARC: ! %bb.0: ! %entry +; SPARC-NEXT: cmp %o0, 0 +; SPARC-NEXT: be %icc, .LBB0_2 +; SPARC-NEXT: mov %o1, %o0 +; SPARC-NEXT: ! %bb.1: ! %select.end +; SPARC-NEXT: retl +; SPARC-NEXT: nop +; SPARC-NEXT: .LBB0_2: ! %select.true.sink +; SPARC-NEXT: sethi 1398101, %o1 +; SPARC-NEXT: or %o1, 342, %o1 +; SPARC-NEXT: smul %o0, %o1, %o0 +; SPARC-NEXT: rd %y, %o0 +; SPARC-NEXT: srl %o0, 31, %o1 +; SPARC-NEXT: retl +; SPARC-NEXT: add %o0, %o1, %o0 +; +; SPARC64-LABEL: cdiv: +; SPARC64: ! %bb.0: ! %entry +; SPARC64-NEXT: cmp %o0, 0 +; SPARC64-NEXT: be %icc, .LBB0_2 +; SPARC64-NEXT: mov %o1, %o0 +; SPARC64-NEXT: ! %bb.1: ! %select.end +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop +; SPARC64-NEXT: .LBB0_2: ! %select.true.sink +; SPARC64-NEXT: sra %o0, 0, %o0 +; SPARC64-NEXT: sethi 1398101, %o1 +; SPARC64-NEXT: or %o1, 342, %o1 +; SPARC64-NEXT: mulx %o0, %o1, %o0 +; SPARC64-NEXT: srlx %o0, 63, %o1 +; SPARC64-NEXT: srlx %o0, 32, %o0 +; SPARC64-NEXT: retl +; SPARC64-NEXT: add %o0, %o1, %o0 +; +; SPARC-NO-PREDICTOR-LABEL: cdiv: +; SPARC-NO-PREDICTOR: ! %bb.0: ! %entry +; SPARC-NO-PREDICTOR-NEXT: sethi 1398101, %o2 +; SPARC-NO-PREDICTOR-NEXT: or %o2, 342, %o2 +; SPARC-NO-PREDICTOR-NEXT: smul %o1, %o2, %o2 +; SPARC-NO-PREDICTOR-NEXT: rd %y, %o2 +; SPARC-NO-PREDICTOR-NEXT: srl %o2, 31, %o3 +; SPARC-NO-PREDICTOR-NEXT: add %o2, %o3, %o2 +; SPARC-NO-PREDICTOR-NEXT: cmp %o0, 0 +; SPARC-NO-PREDICTOR-NEXT: move %icc, %o2, %o1 +; SPARC-NO-PREDICTOR-NEXT: retl +; SPARC-NO-PREDICTOR-NEXT: mov %o1, %o0 +; +; SPARC64-NO-PREDICTOR-LABEL: cdiv: +; SPARC64-NO-PREDICTOR: ! %bb.0: ! %entry +; SPARC64-NO-PREDICTOR-NEXT: sra %o1, 0, %o2 +; SPARC64-NO-PREDICTOR-NEXT: sethi 1398101, %o3 +; SPARC64-NO-PREDICTOR-NEXT: or %o3, 342, %o3 +; SPARC64-NO-PREDICTOR-NEXT: mulx %o2, %o3, %o2 +; SPARC64-NO-PREDICTOR-NEXT: srlx %o2, 63, %o3 +; SPARC64-NO-PREDICTOR-NEXT: srlx %o2, 32, %o2 +; SPARC64-NO-PREDICTOR-NEXT: add %o2, %o3, %o2 +; SPARC64-NO-PREDICTOR-NEXT: cmp %o0, 0 +; SPARC64-NO-PREDICTOR-NEXT: move %icc, %o2, %o1 +; SPARC64-NO-PREDICTOR-NEXT: retl +; SPARC64-NO-PREDICTOR-NEXT: mov %o1, %o0 +entry: + %div = sdiv i32 %num, 3 + %cmp = icmp eq i32 %cond, 0 + %ret = select i1 %cmp, i32 %div, i32 %num + ret i32 %ret +} + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/X86/issue163738.ll b/llvm/test/CodeGen/X86/vpternlog.ll similarity index 59% rename from llvm/test/CodeGen/X86/issue163738.ll rename to llvm/test/CodeGen/X86/vpternlog.ll index 61fe043a970dd..bd7478d3a82d5 100644 --- a/llvm/test/CodeGen/X86/issue163738.ll +++ b/llvm/test/CodeGen/X86/vpternlog.ll @@ -11,3 +11,15 @@ define <8 x i64> @foo(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c) { %and3 = xor <8 x i64> %and3.demorgan, splat (i64 -1) ret <8 x i64> %and3 } + +define <8 x i64> @xorbitcast(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c) { +; CHECK-LABEL: xorbitcast: +; CHECK: # %bb.0: +; CHECK-NEXT: vpternlogq {{.*#+}} zmm0 = ~(zmm0 | zmm2 | zmm1) +; CHECK-NEXT: retq + %or1 = or <64 x i8> %a, %b + %or2 = or <64 x i8> %or1, %c + %cast = bitcast <64 x i8> %or2 to <8 x i64> + %xor = xor <8 x i64> %cast, splat (i64 -1) + ret <8 x i64> %xor +} diff --git a/llvm/test/MC/WebAssembly/reference-types.s b/llvm/test/MC/WebAssembly/reference-types.s index 7a838fc519493..a694abf25826b 100644 --- a/llvm/test/MC/WebAssembly/reference-types.s +++ b/llvm/test/MC/WebAssembly/reference-types.s @@ -105,3 +105,12 @@ ref_block_test: end_block drop end_function + +# CHECK-LABEL: ref_func_test: +# CHECK-NEXT: .functype ref_func_test () -> (funcref) +# CHECK-NEXT: ref.func ref_func_test # encoding: [0xd2,0x80'A',0x80'A',0x80'A',0x80'A',A] +# CHECK-NEXT: # fixup A - offset: 1, value: ref_func_test, kind: fixup_uleb128_i32 +ref_func_test: + .functype ref_func_test () -> (funcref) + ref.func ref_func_test + end_function diff --git a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll index 51e22bb86f331..25a70a026a0b7 100644 --- a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll +++ b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll @@ -762,6 +762,24 @@ declare float @nearbyintf(float) ; CHECK: declare x86_fp80 @nearbyintl(x86_fp80) [[MEMNONE_NOFREE_NOUNWIND_WILLRETURN:#[0-9]+]] declare x86_fp80 @nearbyintl(x86_fp80) +; CHECK: declare double @nextafter(double, double) [[ERRNOMEMONLY_NOFREE_NOUNWIND_WILLRETURN:#[0-9]+]] +declare double @nextafter(double, double) + +; CHECK: declare float @nextafterf(float, float) [[ERRNOMEMONLY_NOFREE_NOUNWIND_WILLRETURN:#[0-9]+]] +declare float @nextafterf(float, float) + +; CHECK: declare x86_fp80 @nextafterl(x86_fp80, x86_fp80) [[ERRNOMEMONLY_NOFREE_NOUNWIND_WILLRETURN:#[0-9]+]] +declare x86_fp80 @nextafterl(x86_fp80, x86_fp80) + +; CHECK: declare double @nexttoward(double, x86_fp80) [[ERRNOMEMONLY_NOFREE_NOUNWIND_WILLRETURN:#[0-9]+]] +declare double @nexttoward(double, x86_fp80) + +; CHECK: declare float @nexttowardf(float, x86_fp80) [[ERRNOMEMONLY_NOFREE_NOUNWIND_WILLRETURN:#[0-9]+]] +declare float @nexttowardf(float, x86_fp80) + +; CHECK: declare x86_fp80 @nexttowardl(x86_fp80, x86_fp80) [[ERRNOMEMONLY_NOFREE_NOUNWIND_WILLRETURN:#[0-9]+]] +declare x86_fp80 @nexttowardl(x86_fp80, x86_fp80) + ; CHECK-LINUX: declare noundef i32 @open(ptr noundef readonly captures(none), i32 noundef, ...) [[NOFREE]] ; CHECK-OPEN: declare noundef i32 @open(ptr noundef readonly captures(none), i32 noundef, ...) [[NOFREE:#[0-9]+]] declare i32 @open(ptr, i32, ...) diff --git a/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll b/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll new file mode 100644 index 0000000000000..d01bb748d9422 --- /dev/null +++ b/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll @@ -0,0 +1,70 @@ +; RUN: opt -passes="module(print),function(loop(loop-idiom)),module(print)" -mtriple=x86_64 -mcpu=core-avx2 %s -disable-output 2>&1 | FileCheck --check-prefix=PROFILE %s + +declare void @escape_inner(i8, i8, i8, i1, i8) +declare void @escape_outer(i8, i8, i8, i1, i8) + +declare i8 @gen.i8() + +; Most basic pattern; Note that iff the shift amount is offset, said offsetting +; must not cause an overflow, but `add nsw` is fine. +define i8 @p0(i8 %val, i8 %start, i8 %extraoffset) mustprogress { +entry: + br label %loop + +loop: + %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ] + %nbits = add nsw i8 %iv, %extraoffset + %val.shifted = ashr i8 %val, %nbits + %val.shifted.iszero = icmp eq i8 %val.shifted, 0 + %iv.next = add i8 %iv, 1 + + call void @escape_inner(i8 %iv, i8 %nbits, i8 %val.shifted, i1 %val.shifted.iszero, i8 %iv.next) + + br i1 %val.shifted.iszero, label %end, label %loop, !prof !{!"branch_weights", i32 1, i32 1000 } + +end: + %iv.res = phi i8 [ %iv, %loop ] + %nbits.res = phi i8 [ %nbits, %loop ] + %val.shifted.res = phi i8 [ %val.shifted, %loop ] + %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ] + %iv.next.res = phi i8 [ %iv.next, %loop ] + + call void @escape_outer(i8 %iv.res, i8 %nbits.res, i8 %val.shifted.res, i1 %val.shifted.iszero.res, i8 %iv.next.res) + + ret i8 %iv.res +} + +define i32 @p1(i32 %x, i32 %bit) { +entry: + %bitmask = shl i32 1, %bit + br label %loop + +loop: + %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ] + %x.curr.bitmasked = and i32 %x.curr, %bitmask + %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0 + %x.next = shl i32 %x.curr, 1 + br i1 %x.curr.isbitunset, label %loop, label %end, !prof !{!"branch_weights", i32 500, i32 1 } + +end: + ret i32 %x.curr +} + +; +; PROFILE: Printing analysis results of BFI for function 'p0': +; PROFILE: block-frequency-info: p0 +; PROFILE: - entry: float = 1.0, +; PROFILE: - loop: float = 1001.0, +; PROFILE: - end: float = 1.0, +; PROFILE: block-frequency-info: p1 +; PROFILE: - entry: float = 1.0, +; PROFILE: - loop: float = 501.0, +; PROFILE: - end: float = 1.0, +; PROFILE: block-frequency-info: p0 +; PROFILE: - entry: float = 1.0, +; PROFILE: - loop: float = 1001.0, +; PROFILE: - end: float = 1.0, +; PROFILE: block-frequency-info: p1 +; PROFILE: - entry: float = 1.0, +; PROFILE: - loop: float = 501.0, +; PROFILE: - end: float = 1.0, diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll index dc52e644742e2..a49f089bd2085 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll @@ -150,10 +150,11 @@ define i32 @select_icmp_var_start_iv_trunc(i32 %N, i32 %start) #0 { ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4) -; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP1]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] -; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP1]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI2]] -; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP1]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI3]] -; CHECK-NEXT: [[TMP6]] = select <4 x i1> [[TMP1]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI4]] +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3]] = select i1 [[TMP2]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[TMP4]] = select i1 [[TMP2]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI2]] +; CHECK-NEXT: [[TMP5]] = select i1 [[TMP2]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI3]] +; CHECK-NEXT: [[TMP6]] = select i1 [[TMP2]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI4]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -191,7 +192,8 @@ define i32 @select_icmp_var_start_iv_trunc(i32 %N, i32 %start) #0 { ; CHECK-NEXT: [[INDEX11:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT17:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI12:%.*]] = phi <4 x i32> [ [[DOTSPLAT14]], %[[VEC_EPILOG_PH]] ], [ [[TMP14:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND15:%.*]] = phi <4 x i32> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT16:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP14]] = select <4 x i1> [[TMP11]], <4 x i32> [[VEC_IND15]], <4 x i32> [[VEC_PHI12]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP14]] = select i1 [[TMP13]], <4 x i32> [[VEC_IND15]], <4 x i32> [[VEC_PHI12]] ; CHECK-NEXT: [[INDEX_NEXT17]] = add nuw i64 [[INDEX11]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT16]] = add <4 x i32> [[VEC_IND15]], splat (i32 4) ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT17]], [[N_VEC8]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll index 56a1abd2384c8..3c83c01929aae 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll @@ -66,13 +66,11 @@ define float @fmaxnum(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP15:%.*]] = freeze <4 x i1> [[TMP4]] ; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP18]], [[TMP15]] ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP6]], [[TMP9]] ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]] -; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI1]], <4 x float> [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP6]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP6]], <4 x float> [[VEC_PHI1]], <4 x float> [[TMP8]] ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP6]], i64 [[IV]], i64 [[N_VEC]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP11]], <4 x float> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[RDX_MINMAX_SELECT]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll index d4f1227a38bda..711a9cd03ac15 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll @@ -66,13 +66,11 @@ define float @fminnum(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP4]] ; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP15]], [[TMP18]] ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP6]], [[TMP9]] ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]] -; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI1]], <4 x float> [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP6]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP6]], <4 x float> [[VEC_PHI1]], <4 x float> [[TMP8]] ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP6]], i64 [[IV]], i64 [[N_VEC]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP11]], <4 x float> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[RDX_MINMAX_SELECT]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/invariant-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/AArch64/invariant-replicate-region.ll index d80fdd1ce7270..9dfb987bd24a6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/invariant-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/invariant-replicate-region.ll @@ -11,8 +11,6 @@ define i32 @test_invariant_replicate_region(i32 %x, i1 %c) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_UREM_CONTINUE6:.*]] ] @@ -43,8 +41,8 @@ define i32 @test_invariant_replicate_region(i32 %x, i1 %c) { ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP13]], i32 3 ; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE6]] ; CHECK: [[PRED_UREM_CONTINUE6]]: -; CHECK-NEXT: [[TMP15:%.*]] = phi <4 x i32> [ [[TMP11]], %[[PRED_UREM_CONTINUE4]] ], [ [[TMP14]], %[[PRED_UREM_IF5]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[TMP15]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = phi <4 x i32> [ [[TMP11]], %[[PRED_UREM_CONTINUE4]] ], [ [[TMP14]], %[[PRED_UREM_IF5]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[C]], <4 x i32> [[TMP12]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll index 35589573eed76..3311cbc11881b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll @@ -68,7 +68,8 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFCOMMON-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i64 0 ; TFCOMMON-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer ; TFCOMMON-NEXT: [[TMP9:%.*]] = fcmp ogt <2 x double> [[TMP8]], zeroinitializer -; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00) +; TFCOMMON-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0 +; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP7]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00) ; TFCOMMON-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0 ; TFCOMMON-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; TFCOMMON: pred.store.if: @@ -109,7 +110,8 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i64 0 ; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer ; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = fcmp ogt <2 x double> [[TMP12]], zeroinitializer -; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00) +; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP14]], i32 0 +; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select i1 [[TMP7]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00) ; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0 ; TFA_INTERLEAVE-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; TFA_INTERLEAVE: pred.store.if: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops-chained.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops-chained.ll new file mode 100644 index 0000000000000..23bc21a49a8b3 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops-chained.ll @@ -0,0 +1,659 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph:" --version 6 +; RUN: opt -p loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=VF2 %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "arm64-apple-macosx" + +define void @test_2xi64_mul_add(ptr noalias %data, ptr noalias %factor) { +; VF2-LABEL: define void @test_2xi64_mul_add( +; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8 +; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]] +; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> +; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> +; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]] +; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP3]], splat (i64 2) +; VF2-NEXT: [[TMP5:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]] +; VF2-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP5]], splat (i64 2) +; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv + %l.factor = load i64, ptr %arrayidx, align 8 + %idx.0 = shl nsw i64 %iv, 1 + %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0 + %l.0 = load i64, ptr %data.0, align 8 + %mul.0 = mul i64 %l.factor, %l.0 + %add.0 = add i64 %mul.0, 2 + store i64 %add.0, ptr %data.0, align 8 + %idx.1 = or disjoint i64 %idx.0, 1 + %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1 + %l.1 = load i64, ptr %data.1, align 8 + %mul.1 = mul i64 %l.factor, %l.1 + %add.1 = add i64 %mul.1, 2 + store i64 %add.1, ptr %data.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_mixed_opcodes1(ptr noalias %data, ptr noalias %factor) { +; VF2-LABEL: define void @test_2xi64_mixed_opcodes1( +; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8 +; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]] +; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> +; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> +; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]] +; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP3]], splat (i64 2) +; VF2-NEXT: [[TMP5:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]] +; VF2-NEXT: [[TMP6:%.*]] = xor <2 x i64> [[TMP5]], splat (i64 2) +; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv + %l.factor = load i64, ptr %arrayidx, align 8 + %idx.0 = shl nsw i64 %iv, 1 + %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0 + %l.0 = load i64, ptr %data.0, align 8 + %mul.0 = mul i64 %l.factor, %l.0 + %add.0 = add i64 %mul.0, 2 + store i64 %add.0, ptr %data.0, align 8 + %idx.1 = or disjoint i64 %idx.0, 1 + %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1 + %l.1 = load i64, ptr %data.1, align 8 + %mul.1 = mul i64 %l.factor, %l.1 + %add.1 = xor i64 %mul.1, 2 + store i64 %add.1, ptr %data.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_mixed_opcodes2(ptr noalias %data, ptr noalias %factor) { +; VF2-LABEL: define void @test_2xi64_mixed_opcodes2( +; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8 +; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]] +; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> +; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> +; VF2-NEXT: [[TMP3:%.*]] = xor <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]] +; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP3]], splat (i64 2) +; VF2-NEXT: [[TMP5:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]] +; VF2-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP5]], splat (i64 2) +; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv + %l.factor = load i64, ptr %arrayidx, align 8 + %idx.0 = shl nsw i64 %iv, 1 + %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0 + %l.0 = load i64, ptr %data.0, align 8 + %mul.0 = xor i64 %l.factor, %l.0 + %add.0 = add i64 %mul.0, 2 + store i64 %add.0, ptr %data.0, align 8 + %idx.1 = or disjoint i64 %idx.0, 1 + %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1 + %l.1 = load i64, ptr %data.1, align 8 + %mul.1 = mul i64 %l.factor, %l.1 + %add.1 = add i64 %mul.1, 2 + store i64 %add.1, ptr %data.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_mul_sub(ptr noalias %data, ptr noalias %factor) { +; VF2-LABEL: define void @test_2xi64_mul_sub( +; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8 +; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]] +; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> +; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> +; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]] +; VF2-NEXT: [[TMP4:%.*]] = sub <2 x i64> [[TMP3]], splat (i64 2) +; VF2-NEXT: [[TMP5:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]] +; VF2-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP5]], splat (i64 2) +; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv + %l.factor = load i64, ptr %arrayidx, align 8 + %idx.0 = shl nsw i64 %iv, 1 + %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0 + %l.0 = load i64, ptr %data.0, align 8 + %mul.0 = mul i64 %l.factor, %l.0 + %sub.0 = sub i64 %mul.0, 2 + store i64 %sub.0, ptr %data.0, align 8 + %idx.1 = or disjoint i64 %idx.0, 1 + %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1 + %l.1 = load i64, ptr %data.1, align 8 + %mul.1 = mul i64 %l.factor, %l.1 + %sub.1 = sub i64 %mul.1, 2 + store i64 %sub.1, ptr %data.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_mul_sub_mismatched_ops1(ptr noalias %data, ptr noalias %factor) { +; VF2-LABEL: define void @test_2xi64_mul_sub_mismatched_ops1( +; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8 +; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]] +; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> +; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> +; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]] +; VF2-NEXT: [[TMP4:%.*]] = sub <2 x i64> [[TMP3]], splat (i64 2) +; VF2-NEXT: [[TMP5:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]] +; VF2-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP5]], splat (i64 3) +; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv + %l.factor = load i64, ptr %arrayidx, align 8 + %idx.0 = shl nsw i64 %iv, 1 + %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0 + %l.0 = load i64, ptr %data.0, align 8 + %mul.0 = mul i64 %l.factor, %l.0 + %sub.0 = sub i64 %mul.0, 2 + store i64 %sub.0, ptr %data.0, align 8 + %idx.1 = or disjoint i64 %idx.0, 1 + %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1 + %l.1 = load i64, ptr %data.1, align 8 + %mul.1 = mul i64 %l.factor, %l.1 + %sub.1 = sub i64 %mul.1, 3 + store i64 %sub.1, ptr %data.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_mul_sub_mismatched_ops2(ptr noalias %data, ptr noalias %factor) { +; VF2-LABEL: define void @test_2xi64_mul_sub_mismatched_ops2( +; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8 +; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]] +; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> +; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> +; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], splat (i64 3) +; VF2-NEXT: [[TMP4:%.*]] = sub <2 x i64> [[TMP3]], splat (i64 2) +; VF2-NEXT: [[TMP5:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]] +; VF2-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP5]], splat (i64 2) +; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv + %l.factor = load i64, ptr %arrayidx, align 8 + %idx.0 = shl nsw i64 %iv, 1 + %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0 + %l.0 = load i64, ptr %data.0, align 8 + %mul.0 = mul i64 %l.factor, 3 + %sub.0 = sub i64 %mul.0, 2 + store i64 %sub.0, ptr %data.0, align 8 + %idx.1 = or disjoint i64 %idx.0, 1 + %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1 + %l.1 = load i64, ptr %data.1, align 8 + %mul.1 = mul i64 %l.factor, %l.1 + %sub.1 = sub i64 %mul.1, 2 + store i64 %sub.1, ptr %data.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_mul_sub_mismatched_op_order(ptr noalias %data, ptr noalias %factor) { +; VF2-LABEL: define void @test_2xi64_mul_sub_mismatched_op_order( +; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8 +; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]] +; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> +; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> +; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]] +; VF2-NEXT: [[TMP4:%.*]] = sub <2 x i64> [[TMP3]], splat (i64 2) +; VF2-NEXT: [[TMP5:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]] +; VF2-NEXT: [[TMP6:%.*]] = sub <2 x i64> splat (i64 2), [[TMP5]] +; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv + %l.factor = load i64, ptr %arrayidx, align 8 + %idx.0 = shl nsw i64 %iv, 1 + %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0 + %l.0 = load i64, ptr %data.0, align 8 + %mul.0 = mul i64 %l.factor, %l.0 + %sub.0 = sub i64 %mul.0, 2 + store i64 %sub.0, ptr %data.0, align 8 + %idx.1 = or disjoint i64 %idx.0, 1 + %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1 + %l.1 = load i64, ptr %data.1, align 8 + %mul.1 = mul i64 %l.factor, %l.1 + %sub.1 = sub i64 2, %mul.1 + store i64 %sub.1, ptr %data.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_mul_add_xor(ptr noalias %data, ptr noalias %factor) { +; VF2-LABEL: define void @test_2xi64_mul_add_xor( +; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8 +; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]] +; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> +; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> +; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]] +; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP3]], splat (i64 2) +; VF2-NEXT: [[TMP5:%.*]] = xor <2 x i64> splat (i64 4), [[TMP4]] +; VF2-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]] +; VF2-NEXT: [[TMP7:%.*]] = add <2 x i64> [[TMP6]], splat (i64 2) +; VF2-NEXT: [[TMP8:%.*]] = xor <2 x i64> splat (i64 4), [[TMP7]] +; VF2-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP8]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv + %l.factor = load i64, ptr %arrayidx, align 8 + %idx.0 = shl nsw i64 %iv, 1 + %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0 + %l.0 = load i64, ptr %data.0, align 8 + %mul.0 = mul i64 %l.factor, %l.0 + %add.0 = add i64 %mul.0, 2 + %xor.0 = xor i64 4, %add.0 + store i64 %xor.0, ptr %data.0, align 8 + %idx.1 = or disjoint i64 %idx.0, 1 + %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1 + %l.1 = load i64, ptr %data.1, align 8 + %mul.1 = mul i64 %l.factor, %l.1 + %add.1 = add i64 %mul.1, 2 + %xor.1 = xor i64 4, %add.1 + store i64 %xor.1, ptr %data.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_mul_add_xor_mismatched_opcodes1(ptr noalias %data, ptr noalias %factor) { +; VF2-LABEL: define void @test_2xi64_mul_add_xor_mismatched_opcodes1( +; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8 +; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]] +; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> +; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> +; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]] +; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP3]], splat (i64 2) +; VF2-NEXT: [[TMP5:%.*]] = xor <2 x i64> splat (i64 4), [[TMP4]] +; VF2-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]] +; VF2-NEXT: [[TMP7:%.*]] = add <2 x i64> [[TMP6]], splat (i64 2) +; VF2-NEXT: [[TMP8:%.*]] = xor <2 x i64> splat (i64 4), [[TMP7]] +; VF2-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP8]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv + %l.factor = load i64, ptr %arrayidx, align 8 + %idx.0 = shl nsw i64 %iv, 1 + %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0 + %l.0 = load i64, ptr %data.0, align 8 + %mul.0 = mul i64 %l.factor, %l.0 + %add.0 = add i64 %mul.0, 2 + %xor.0 = xor i64 4, %add.0 + store i64 %xor.0, ptr %data.0, align 8 + %idx.1 = or disjoint i64 %idx.0, 1 + %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1 + %l.1 = load i64, ptr %data.1, align 8 + %mul.1 = sub i64 %l.factor, %l.1 + %add.1 = add i64 %mul.1, 2 + %xor.1 = xor i64 4, %add.1 + store i64 %xor.1, ptr %data.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_mul_add_xor_mismatched_opcodes2(ptr noalias %data, ptr noalias %factor) { +; VF2-LABEL: define void @test_2xi64_mul_add_xor_mismatched_opcodes2( +; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8 +; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]] +; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> +; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> +; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]] +; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP3]], splat (i64 2) +; VF2-NEXT: [[TMP5:%.*]] = xor <2 x i64> splat (i64 4), [[TMP4]] +; VF2-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]] +; VF2-NEXT: [[TMP7:%.*]] = mul <2 x i64> [[TMP6]], splat (i64 2) +; VF2-NEXT: [[TMP8:%.*]] = xor <2 x i64> splat (i64 4), [[TMP7]] +; VF2-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP8]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv + %l.factor = load i64, ptr %arrayidx, align 8 + %idx.0 = shl nsw i64 %iv, 1 + %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0 + %l.0 = load i64, ptr %data.0, align 8 + %mul.0 = mul i64 %l.factor, %l.0 + %add.0 = add i64 %mul.0, 2 + %xor.0 = xor i64 4, %add.0 + store i64 %xor.0, ptr %data.0, align 8 + %idx.1 = or disjoint i64 %idx.0, 1 + %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1 + %l.1 = load i64, ptr %data.1, align 8 + %mul.1 = mul i64 %l.factor, %l.1 + %add.1 = mul i64 %mul.1, 2 + %xor.1 = xor i64 4, %add.1 + store i64 %xor.1, ptr %data.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_mul_add_xor_mismatched_ops(ptr noalias %data, ptr noalias %factor) { +; VF2-LABEL: define void @test_2xi64_mul_add_xor_mismatched_ops( +; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8 +; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]] +; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> +; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> +; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], splat (i64 3) +; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP3]], splat (i64 2) +; VF2-NEXT: [[TMP5:%.*]] = xor <2 x i64> splat (i64 4), [[TMP4]] +; VF2-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]] +; VF2-NEXT: [[TMP7:%.*]] = add <2 x i64> [[TMP6]], splat (i64 2) +; VF2-NEXT: [[TMP8:%.*]] = xor <2 x i64> splat (i64 4), [[TMP7]] +; VF2-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP8]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv + %l.factor = load i64, ptr %arrayidx, align 8 + %idx.0 = shl nsw i64 %iv, 1 + %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0 + %l.0 = load i64, ptr %data.0, align 8 + %mul.0 = mul i64 %l.factor, 3 + %add.0 = add i64 %mul.0, 2 + %xor.0 = xor i64 4, %add.0 + store i64 %xor.0, ptr %data.0, align 8 + %idx.1 = or disjoint i64 %idx.0, 1 + %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1 + %l.1 = load i64, ptr %data.1, align 8 + %mul.1 = mul i64 %l.factor, %l.1 + %add.1 = add i64 %mul.1, 2 + %xor.1 = xor i64 4, %add.1 + store i64 %xor.1, ptr %data.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll index 01b4502308c95..f88778b991b0b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll @@ -285,7 +285,8 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP16:%.*]] = select [[TMP15]], [[TMP6]], zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = select [[TMP16]], [[BROADCAST_SPLAT]], splat (i64 1) ; CHECK-NEXT: [[TMP11:%.*]] = udiv [[WIDE_LOAD]], [[TMP10]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP6]], [[TMP11]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement [[TMP6]], i32 0 +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP9]], [[TMP11]], [[WIDE_LOAD]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[PREDPHI]], ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP13]], [[INDEX]] @@ -304,14 +305,16 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 ; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXED-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer -; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1) +; FIXED-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; FIXED-NEXT: [[TMP5:%.*]] = select i1 [[TMP1]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1) ; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; FIXED-NEXT: [[TMP8:%.*]] = udiv <4 x i64> [[WIDE_LOAD1]], [[TMP5]] -; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[TMP8]], <4 x i64> [[WIDE_LOAD1]] +; FIXED-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; FIXED-NEXT: [[PREDPHI2:%.*]] = select i1 [[TMP6]], <4 x i64> [[TMP8]], <4 x i64> [[WIDE_LOAD1]] ; FIXED-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP2]], align 8 ; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; FIXED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 @@ -366,7 +369,8 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP16:%.*]] = select [[TMP15]], [[TMP6]], zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = select [[TMP16]], [[BROADCAST_SPLAT]], splat (i64 1) ; CHECK-NEXT: [[TMP11:%.*]] = sdiv [[WIDE_LOAD]], [[TMP10]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP6]], [[TMP11]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement [[TMP6]], i32 0 +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP9]], [[TMP11]], [[WIDE_LOAD]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[PREDPHI]], ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP13]], [[INDEX]] @@ -385,14 +389,16 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 ; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXED-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer -; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1) +; FIXED-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; FIXED-NEXT: [[TMP5:%.*]] = select i1 [[TMP1]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1) ; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; FIXED-NEXT: [[TMP8:%.*]] = sdiv <4 x i64> [[WIDE_LOAD1]], [[TMP5]] -; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[TMP8]], <4 x i64> [[WIDE_LOAD1]] +; FIXED-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; FIXED-NEXT: [[PREDPHI2:%.*]] = select i1 [[TMP6]], <4 x i64> [[TMP8]], <4 x i64> [[WIDE_LOAD1]] ; FIXED-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP2]], align 8 ; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; FIXED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 @@ -661,14 +667,12 @@ define i32 @udiv_sdiv_with_invariant_divisors(i8 %x, i16 %y, i1 %c) { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i8 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i1 [[C:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i16 ; CHECK-NEXT: [[TMP4:%.*]] = add i16 -12, [[DOTCAST]] ; CHECK-NEXT: [[DOTCAST5:%.*]] = trunc i32 [[N_VEC]] to i8 ; CHECK-NEXT: [[TMP5:%.*]] = add i8 -12, [[DOTCAST5]] -; CHECK-NEXT: [[TMP6:%.*]] = select [[BROADCAST_SPLAT4]], splat (i8 1), [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[TMP7:%.*]] = select [[BROADCAST_SPLAT4]], splat (i16 1), [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[C:%.*]], splat (i8 1), [[BROADCAST_SPLAT2]] +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[C]], splat (i16 1), [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv2i8() ; CHECK-NEXT: [[TMP9:%.*]] = mul [[TMP8]], splat (i8 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add splat (i8 -12), [[TMP9]] @@ -683,7 +687,7 @@ define i32 @udiv_sdiv_with_invariant_divisors(i8 %x, i16 %y, i1 %c) { ; CHECK-NEXT: [[TMP12:%.*]] = zext [[TMP11]] to ; CHECK-NEXT: [[TMP13:%.*]] = sdiv [[TMP12]], [[TMP7]] ; CHECK-NEXT: [[TMP14:%.*]] = sext [[TMP13]] to -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[BROADCAST_SPLAT4]], zeroinitializer, [[TMP14]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[C]], zeroinitializer, [[TMP14]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT7]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -727,10 +731,8 @@ define i32 @udiv_sdiv_with_invariant_divisors(i8 %x, i16 %y, i1 %c) { ; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer ; FIXED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[X:%.*]], i64 0 ; FIXED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer -; FIXED-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i1> poison, i1 [[C:%.*]], i64 0 -; FIXED-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT3]], <4 x i1> poison, <4 x i32> zeroinitializer -; FIXED-NEXT: [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT4]], <4 x i8> splat (i8 1), <4 x i8> [[BROADCAST_SPLAT2]] -; FIXED-NEXT: [[TMP1:%.*]] = select <4 x i1> [[BROADCAST_SPLAT4]], <4 x i16> splat (i16 1), <4 x i16> [[BROADCAST_SPLAT]] +; FIXED-NEXT: [[TMP0:%.*]] = select i1 [[C:%.*]], <4 x i8> splat (i8 1), <4 x i8> [[BROADCAST_SPLAT2]] +; FIXED-NEXT: [[TMP1:%.*]] = select i1 [[C]], <4 x i16> splat (i16 1), <4 x i16> [[BROADCAST_SPLAT]] ; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -739,7 +741,7 @@ define i32 @udiv_sdiv_with_invariant_divisors(i8 %x, i16 %y, i1 %c) { ; FIXED-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i16> ; FIXED-NEXT: [[TMP4:%.*]] = sdiv <4 x i16> [[TMP3]], [[TMP1]] ; FIXED-NEXT: [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32> -; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT4]], <4 x i32> zeroinitializer, <4 x i32> [[TMP5]] +; FIXED-NEXT: [[PREDPHI:%.*]] = select i1 [[C]], <4 x i32> zeroinitializer, <4 x i32> [[TMP5]] ; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; FIXED-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) ; FIXED-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll index 1c6954c187e5f..212a5c99676f4 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll @@ -40,7 +40,7 @@ define void @predicated_uniform_load(ptr %src, i32 %n, ptr %dst, i1 %cond) { ; CHECK-NEXT: [[AVL:%.*]] = phi i32 [ [[TMP3]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 4, i1 true) ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv4i32.nxv4p0( align 4 [[BROADCAST_SPLAT]], [[TMP13]], i32 [[TMP10]]), !alias.scope [[META0:![0-9]+]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[BROADCAST_SPLAT1]], zeroinitializer, [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[COND]], zeroinitializer, [[WIDE_MASKED_GATHER]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[PREDPHI]], align 4 [[BROADCAST_SPLAT4]], splat (i1 true), i32 [[TMP10]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll index baedf34b5548f..6ec010cdcc248 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll @@ -1193,7 +1193,7 @@ define i64 @test_predicated_udiv(i32 %d, i1 %c) #2 { ; CHECK: pred.udiv.continue62: ; CHECK-NEXT: [[TMP161:%.*]] = phi <32 x i32> [ [[TMP156]], [[PRED_UDIV_CONTINUE60]] ], [ [[TMP160]], [[PRED_UDIV_IF61]] ] ; CHECK-NEXT: [[TMP162:%.*]] = zext <32 x i32> [[TMP161]] to <32 x i64> -; CHECK-NEXT: [[PREDPHI:%.*]] = select <32 x i1> [[BROADCAST_SPLAT]], <32 x i64> zeroinitializer, <32 x i64> [[TMP162]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[C]], <32 x i64> zeroinitializer, <32 x i64> [[TMP162]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i32> [[VEC_IND]], splat (i32 32) ; CHECK-NEXT: [[TMP163:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 @@ -1289,7 +1289,7 @@ define i64 @test_predicated_udiv(i32 %d, i1 %c) #2 { ; CHECK: pred.udiv.continue84: ; CHECK-NEXT: [[TMP206:%.*]] = phi <8 x i32> [ [[TMP201]], [[PRED_UDIV_CONTINUE82]] ], [ [[TMP205]], [[PRED_UDIV_IF83]] ] ; CHECK-NEXT: [[TMP207:%.*]] = zext <8 x i32> [[TMP206]] to <8 x i64> -; CHECK-NEXT: [[PREDPHI85:%.*]] = select <8 x i1> [[BROADCAST_SPLAT64]], <8 x i64> zeroinitializer, <8 x i64> [[TMP207]] +; CHECK-NEXT: [[PREDPHI85:%.*]] = select i1 [[C]], <8 x i64> zeroinitializer, <8 x i64> [[TMP207]] ; CHECK-NEXT: [[INDEX_NEXT86]] = add nuw i32 [[INDEX67]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT87]] = add <8 x i32> [[VEC_IND68]], splat (i32 8) ; CHECK-NEXT: [[TMP208:%.*]] = icmp eq i32 [[INDEX_NEXT86]], 1000 diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll index 0bc86fff9831b..7e5964ac30cba 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll @@ -37,7 +37,8 @@ define i32 @inv_load_conditional(ptr %a, i64 %n, ptr %b, i32 %k) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT5]], ptr [[TMP2]], align 4, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> align 4 [[BROADCAST_SPLAT]], <16 x i1> [[TMP1]], <16 x i32> poison), !alias.scope [[META3]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> [[WIDE_MASKED_GATHER]], <16 x i32> splat (i32 1) +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[TMP1]], i32 0 +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP9]], <16 x i32> [[WIDE_MASKED_GATHER]], <16 x i32> splat (i32 1) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] @@ -63,7 +64,8 @@ define i32 @inv_load_conditional(ptr %a, i64 %n, ptr %b, i32 %k) { ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX10]] ; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT12]], ptr [[TMP6]], align 4, !alias.scope [[META0]], !noalias [[META3]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER13:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> align 4 [[BROADCAST_SPLAT9]], <8 x i1> [[TMP5]], <8 x i32> poison), !alias.scope [[META3]] -; CHECK-NEXT: [[PREDPHI14:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[WIDE_MASKED_GATHER13]], <8 x i32> splat (i32 1) +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP5]], i32 0 +; CHECK-NEXT: [[PREDPHI14:%.*]] = select i1 [[TMP10]], <8 x i32> [[WIDE_MASKED_GATHER13]], <8 x i32> splat (i32 1) ; CHECK-NEXT: [[INDEX_NEXT15]] = add nuw i64 [[INDEX10]], 8 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT15]], [[N_VEC7]] ; CHECK-NEXT: br i1 [[TMP7]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll b/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll index 619693abf51e4..57cbe7f4c241b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll @@ -97,8 +97,7 @@ define i8 @pr141968(i1 %cond, i8 %v) { ; CHECK: [[PRED_SDIV_IF29]]: ; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE30]] ; CHECK: [[PRED_SDIV_CONTINUE30]]: -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i1> [[BROADCAST_SPLAT]], i32 0 -; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP18]], i8 0, i8 [[V]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[COND]], i8 0, i8 [[V]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll index 4590dfc5326b5..715d6db50488f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll @@ -199,8 +199,6 @@ define float @uniform_load_replicating_select(ptr %A, ptr %B, i64 %1) { ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 7 ; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[A]], align 4 ; CHECK-NEXT: [[TMP10:%.*]] = fcmp ogt float [[TMP6]], 0.000000e+00 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP10]], i64 0 -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]] @@ -209,7 +207,7 @@ define float @uniform_load_replicating_select(ptr %A, ptr %B, i64 %1) { ; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP10]], ptr [[A]], ptr [[TMP16]] ; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP10]], ptr [[A]], ptr [[TMP17]] ; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP10]], ptr [[A]], ptr [[TMP18]] -; CHECK-NEXT: [[TMP23:%.*]] = select <4 x i1> [[TMP14]], <4 x float> splat (float 1.000000e+01), <4 x float> splat (float 1.000000e+00) +; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP10]], <4 x float> splat (float 1.000000e+01), <4 x float> splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP19]], align 4 ; CHECK-NEXT: [[TMP25:%.*]] = load float, ptr [[TMP20]], align 4 ; CHECK-NEXT: [[TMP26:%.*]] = load float, ptr [[TMP21]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll index 22eb0ca380033..9cd5625e5f8e6 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll @@ -23,8 +23,8 @@ define void @scalarselect(i1 %cond) { %7 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv ; CHECK: cost of 1 for VF 1 {{.*}} select i1 %cond, i32 %6, i32 0 -; CHECK: Cost of 2 for VF 2: WIDEN-SELECT ir<%sel> = select ir<%cond>, ir<%6>, ir<0> (condition is loop invariant) -; CHECK: Cost of 2 for VF 4: WIDEN-SELECT ir<%sel> = select ir<%cond>, ir<%6>, ir<0> (condition is loop invariant) +; CHECK: Cost of 2 for VF 2: WIDEN-SELECT ir<%sel> = select ir<%cond>, ir<%6>, ir<0> (condition is single-scalar) +; CHECK: Cost of 2 for VF 4: WIDEN-SELECT ir<%sel> = select ir<%cond>, ir<%6>, ir<0> (condition is single-scalar) %sel = select i1 %cond, i32 %6, i32 zeroinitializer store i32 %sel, ptr %7, align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll index deef94aa3fe9d..67fe87a328976 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll @@ -39,7 +39,7 @@ define i32 @predicated_sdiv_masked_load(ptr %a, ptr %b, i32 %x, i1 %c) { ; CHECK: pred.sdiv.continue2: ; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ [[TMP9]], [[PRED_SDIV_CONTINUE]] ], [ [[TMP13]], [[PRED_SDIV_IF1]] ] ; CHECK-NEXT: [[TMP15:%.*]] = add nsw <2 x i32> [[TMP14]], [[WIDE_LOAD]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP15]], <2 x i32> [[WIDE_LOAD]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[C]], <2 x i32> [[TMP15]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP17]] = add <2 x i32> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 @@ -127,7 +127,7 @@ define i32 @predicated_sdiv_masked_load(ptr %a, ptr %b, i32 %x, i1 %c) { ; SINK-GATHER: pred.sdiv.continue14: ; SINK-GATHER-NEXT: [[TMP44:%.*]] = phi <8 x i32> [ [[TMP39]], [[PRED_SDIV_CONTINUE12]] ], [ [[TMP43]], [[PRED_SDIV_IF13]] ] ; SINK-GATHER-NEXT: [[TMP45:%.*]] = add nsw <8 x i32> [[TMP44]], [[WIDE_LOAD]] -; SINK-GATHER-NEXT: [[PREDPHI:%.*]] = select <8 x i1> [[BROADCAST_SPLAT]], <8 x i32> [[TMP45]], <8 x i32> [[WIDE_LOAD]] +; SINK-GATHER-NEXT: [[PREDPHI:%.*]] = select i1 [[C]], <8 x i32> [[TMP45]], <8 x i32> [[WIDE_LOAD]] ; SINK-GATHER-NEXT: [[TMP47]] = add <8 x i32> [[VEC_PHI]], [[PREDPHI]] ; SINK-GATHER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; SINK-GATHER-NEXT: [[TMP48:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 @@ -179,15 +179,13 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <2 x i1> poison, i1 [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT4]], <2 x i1> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE4:%.*]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[PRED_UDIV_CONTINUE4]] ] -; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] +; CHECK-NEXT: br i1 [[TMP1:%.*]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; CHECK: pred.udiv.if: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP0]], 777 @@ -199,7 +197,7 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; CHECK: pred.udiv.continue: ; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_UDIV_IF]] ] ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4]] -; CHECK: pred.udiv.if3: +; CHECK: pred.udiv.if1: ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP7]], 777 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]] @@ -207,9 +205,9 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; CHECK-NEXT: [[TMP13:%.*]] = udiv i32 [[TMP12]], [[X]] ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP13]], i32 1 ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE4]] -; CHECK: pred.udiv.continue4: +; CHECK: pred.udiv.continue2: ; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x i32> [ [[TMP8]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP14]], [[PRED_UDIV_IF3]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP16]], <2 x i32> [[BROADCAST_SPLAT4]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP1]], <2 x i32> [[TMP16]], <2 x i32> [[BROADCAST_SPLAT4]] ; CHECK-NEXT: [[TMP18]] = add <2 x i32> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -250,8 +248,6 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; SINK-GATHER: vector.ph: ; SINK-GATHER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 8 ; SINK-GATHER-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]] -; SINK-GATHER-NEXT: [[BROADCAST_SPLATINSERT16:%.*]] = insertelement <8 x i1> poison, i1 [[TMP1:%.*]], i64 0 -; SINK-GATHER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i1> [[BROADCAST_SPLATINSERT16]], <8 x i1> poison, <8 x i32> zeroinitializer ; SINK-GATHER-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[X:%.*]], i64 0 ; SINK-GATHER-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer ; SINK-GATHER-NEXT: br label [[VECTOR_BODY:%.*]] @@ -260,7 +256,7 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; SINK-GATHER-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UDIV_CONTINUE16]] ] ; SINK-GATHER-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP66:%.*]], [[PRED_UDIV_CONTINUE16]] ] ; SINK-GATHER-NEXT: [[TMP0:%.*]] = mul <8 x i64> [[VEC_IND]], splat (i64 777) -; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] +; SINK-GATHER-NEXT: br i1 [[TMP1:%.*]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; SINK-GATHER: pred.udiv.if: ; SINK-GATHER-NEXT: [[TMP2:%.*]] = extractelement <8 x i64> [[TMP0]], i32 0 ; SINK-GATHER-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP2]] @@ -271,76 +267,76 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; SINK-GATHER: pred.udiv.continue: ; SINK-GATHER-NEXT: [[TMP8:%.*]] = phi <8 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_UDIV_IF]] ] ; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]] -; SINK-GATHER: pred.udiv.if3: +; SINK-GATHER: pred.udiv.if1: ; SINK-GATHER-NEXT: [[TMP10:%.*]] = extractelement <8 x i64> [[TMP0]], i32 1 ; SINK-GATHER-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]] ; SINK-GATHER-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 ; SINK-GATHER-NEXT: [[TMP13:%.*]] = udiv i32 [[TMP12]], [[X]] ; SINK-GATHER-NEXT: [[TMP14:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[TMP13]], i32 1 ; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE4]] -; SINK-GATHER: pred.udiv.continue4: +; SINK-GATHER: pred.udiv.continue2: ; SINK-GATHER-NEXT: [[TMP16:%.*]] = phi <8 x i32> [ [[TMP8]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP14]], [[PRED_UDIV_IF5]] ] ; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]] -; SINK-GATHER: pred.udiv.if5: +; SINK-GATHER: pred.udiv.if3: ; SINK-GATHER-NEXT: [[TMP18:%.*]] = extractelement <8 x i64> [[TMP0]], i32 2 ; SINK-GATHER-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP18]] ; SINK-GATHER-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 ; SINK-GATHER-NEXT: [[TMP21:%.*]] = udiv i32 [[TMP20]], [[X]] ; SINK-GATHER-NEXT: [[TMP22:%.*]] = insertelement <8 x i32> [[TMP16]], i32 [[TMP21]], i32 2 ; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE6]] -; SINK-GATHER: pred.udiv.continue6: +; SINK-GATHER: pred.udiv.continue4: ; SINK-GATHER-NEXT: [[TMP24:%.*]] = phi <8 x i32> [ [[TMP16]], [[PRED_UDIV_CONTINUE4]] ], [ [[TMP22]], [[PRED_UDIV_IF6]] ] ; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8:%.*]] -; SINK-GATHER: pred.udiv.if7: +; SINK-GATHER: pred.udiv.if5: ; SINK-GATHER-NEXT: [[TMP26:%.*]] = extractelement <8 x i64> [[TMP0]], i32 3 ; SINK-GATHER-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP26]] ; SINK-GATHER-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 ; SINK-GATHER-NEXT: [[TMP29:%.*]] = udiv i32 [[TMP28]], [[X]] ; SINK-GATHER-NEXT: [[TMP30:%.*]] = insertelement <8 x i32> [[TMP24]], i32 [[TMP29]], i32 3 ; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE8]] -; SINK-GATHER: pred.udiv.continue8: +; SINK-GATHER: pred.udiv.continue6: ; SINK-GATHER-NEXT: [[TMP32:%.*]] = phi <8 x i32> [ [[TMP24]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP30]], [[PRED_UDIV_IF7]] ] ; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF9:%.*]], label [[PRED_UDIV_CONTINUE10:%.*]] -; SINK-GATHER: pred.udiv.if9: +; SINK-GATHER: pred.udiv.if7: ; SINK-GATHER-NEXT: [[TMP34:%.*]] = extractelement <8 x i64> [[TMP0]], i32 4 ; SINK-GATHER-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP34]] ; SINK-GATHER-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 ; SINK-GATHER-NEXT: [[TMP37:%.*]] = udiv i32 [[TMP36]], [[X]] ; SINK-GATHER-NEXT: [[TMP38:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP37]], i32 4 ; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE10]] -; SINK-GATHER: pred.udiv.continue10: +; SINK-GATHER: pred.udiv.continue8: ; SINK-GATHER-NEXT: [[TMP40:%.*]] = phi <8 x i32> [ [[TMP32]], [[PRED_UDIV_CONTINUE8]] ], [ [[TMP38]], [[PRED_UDIV_IF9]] ] ; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF11:%.*]], label [[PRED_UDIV_CONTINUE12:%.*]] -; SINK-GATHER: pred.udiv.if11: +; SINK-GATHER: pred.udiv.if9: ; SINK-GATHER-NEXT: [[TMP42:%.*]] = extractelement <8 x i64> [[TMP0]], i32 5 ; SINK-GATHER-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP42]] ; SINK-GATHER-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 ; SINK-GATHER-NEXT: [[TMP45:%.*]] = udiv i32 [[TMP44]], [[X]] ; SINK-GATHER-NEXT: [[TMP46:%.*]] = insertelement <8 x i32> [[TMP40]], i32 [[TMP45]], i32 5 ; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE12]] -; SINK-GATHER: pred.udiv.continue12: +; SINK-GATHER: pred.udiv.continue10: ; SINK-GATHER-NEXT: [[TMP48:%.*]] = phi <8 x i32> [ [[TMP40]], [[PRED_UDIV_CONTINUE10]] ], [ [[TMP46]], [[PRED_UDIV_IF11]] ] ; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF13:%.*]], label [[PRED_UDIV_CONTINUE14:%.*]] -; SINK-GATHER: pred.udiv.if13: +; SINK-GATHER: pred.udiv.if11: ; SINK-GATHER-NEXT: [[TMP50:%.*]] = extractelement <8 x i64> [[TMP0]], i32 6 ; SINK-GATHER-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP50]] ; SINK-GATHER-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 ; SINK-GATHER-NEXT: [[TMP53:%.*]] = udiv i32 [[TMP52]], [[X]] ; SINK-GATHER-NEXT: [[TMP54:%.*]] = insertelement <8 x i32> [[TMP48]], i32 [[TMP53]], i32 6 ; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE14]] -; SINK-GATHER: pred.udiv.continue14: +; SINK-GATHER: pred.udiv.continue12: ; SINK-GATHER-NEXT: [[TMP56:%.*]] = phi <8 x i32> [ [[TMP48]], [[PRED_UDIV_CONTINUE12]] ], [ [[TMP54]], [[PRED_UDIV_IF13]] ] ; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF15:%.*]], label [[PRED_UDIV_CONTINUE16]] -; SINK-GATHER: pred.udiv.if15: +; SINK-GATHER: pred.udiv.if13: ; SINK-GATHER-NEXT: [[TMP58:%.*]] = extractelement <8 x i64> [[TMP0]], i32 7 ; SINK-GATHER-NEXT: [[TMP59:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP58]] ; SINK-GATHER-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP59]], align 4 ; SINK-GATHER-NEXT: [[TMP61:%.*]] = udiv i32 [[TMP60]], [[X]] ; SINK-GATHER-NEXT: [[TMP62:%.*]] = insertelement <8 x i32> [[TMP56]], i32 [[TMP61]], i32 7 ; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE16]] -; SINK-GATHER: pred.udiv.continue16: +; SINK-GATHER: pred.udiv.continue14: ; SINK-GATHER-NEXT: [[TMP64:%.*]] = phi <8 x i32> [ [[TMP56]], [[PRED_UDIV_CONTINUE14]] ], [ [[TMP62]], [[PRED_UDIV_IF15]] ] -; SINK-GATHER-NEXT: [[PREDPHI:%.*]] = select <8 x i1> [[BROADCAST_SPLAT]], <8 x i32> [[TMP64]], <8 x i32> [[BROADCAST_SPLAT16]] +; SINK-GATHER-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP1]], <8 x i32> [[TMP64]], <8 x i32> [[BROADCAST_SPLAT16]] ; SINK-GATHER-NEXT: [[TMP66]] = add <8 x i32> [[VEC_PHI]], [[PREDPHI]] ; SINK-GATHER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; SINK-GATHER-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll index 198a30af814ba..8a579734a06e1 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll @@ -137,7 +137,8 @@ define i16 @for_phi_removed(ptr %src) { ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], zeroinitializer -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> splat (i16 1), <4 x i16> zeroinitializer +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = select i1 [[TMP4]], <4 x i16> splat (i16 1), <4 x i16> zeroinitializer ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 104 ; UNROLL-NO-IC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -202,7 +203,8 @@ define i16 @for_phi_removed(ptr %src) { ; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 ; SINK-AFTER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; SINK-AFTER-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], zeroinitializer -; SINK-AFTER-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> splat (i16 1), <4 x i16> zeroinitializer +; SINK-AFTER-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 +; SINK-AFTER-NEXT: [[TMP2:%.*]] = select i1 [[TMP4]], <4 x i16> splat (i16 1), <4 x i16> zeroinitializer ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 108 ; SINK-AFTER-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll index 5b7c27a0b5f1b..af648df9fc5c7 100644 --- a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll @@ -66,13 +66,11 @@ define float @fmaxnum(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP4]] ; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP15]], [[TMP18]] ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP6]], [[TMP9]] ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]] -; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI1]], <4 x float> [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP6]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP6]], <4 x float> [[VEC_PHI1]], <4 x float> [[TMP8]] ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP6]], i64 [[IV]], i64 [[N_VEC]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP11]], <4 x float> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[RDX_MINMAX_SELECT]]) diff --git a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll index 8b6a6e1e46101..242df1fcf7618 100644 --- a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll @@ -209,12 +209,10 @@ define float @fmaxnum_1(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP2]] ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP3]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]] ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP3]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP3]], i64 [[IV]], i64 [[N_VEC]] ; CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP7]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -276,12 +274,10 @@ define float @fmaxnum_2(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP2]] ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP3]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]] ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP3]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP3]], i64 [[IV]], i64 [[N_VEC]] ; CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP7]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -345,12 +341,10 @@ define float @fmaxnum_induction_starts_at_10(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP5:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP12:%.*]] = freeze <4 x i1> [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP12]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[TMP4]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP6]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP6]], i64 [[INDEX]], i64 [[N_VEC]] ; CHECK-NEXT: [[TMP10:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP8]]) ; CHECK-NEXT: [[TMP11:%.*]] = add i64 10, [[TMP9]] @@ -415,12 +409,10 @@ define float @fmaxnum_induction_starts_at_value(ptr %src, i64 %start, i64 %n) { ; CHECK-NEXT: [[TMP5:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP12:%.*]] = freeze <4 x i1> [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP12]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[TMP4]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP6]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP6]], i64 [[INDEX]], i64 [[N_VEC]] ; CHECK-NEXT: [[TMP10:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP8]]) ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[START]], [[TMP9]] diff --git a/llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll index 211d3bf4c1f6a..7f65306bcbe52 100644 --- a/llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll @@ -209,12 +209,10 @@ define float @fminnum_1(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP2]] ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP3]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]] ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP3]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP3]], i64 [[IV]], i64 [[N_VEC]] ; CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP7]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -276,12 +274,10 @@ define float @fminnum_2(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP2]] ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP3]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]] ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP3]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP3]], i64 [[IV]], i64 [[N_VEC]] ; CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP7]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll index 7b9fcebb34049..c236b0af2a61d 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll @@ -716,15 +716,13 @@ define i32 @predicated_udiv_scalarized_operand(ptr %a, i1 %c, i32 %x, i64 %n) { ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[PRED_UDIV_CONTINUE2]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 -; CHECK-NEXT: br i1 [[C]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; CHECK: pred.udiv.if: ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP4]], [[X:%.*]] @@ -744,7 +742,7 @@ define i32 @predicated_udiv_scalarized_operand(ptr %a, i1 %c, i32 %x, i64 %n) { ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE2]] ; CHECK: pred.udiv.continue2: ; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x i32> [ [[TMP9]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP15]], [[PRED_UDIV_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP16]], <2 x i32> [[WIDE_LOAD]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[C]], <2 x i32> [[TMP16]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP18]] = add <2 x i32> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll index e7913c583b938..f9dd626e523e8 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -296,8 +296,6 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) { ; VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]] ; VEC-NEXT: [[IND_END:%.*]] = add i64 [[V_1]], [[N_VEC]] ; VEC-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> zeroinitializer, i32 [[V_2:%.*]], i32 0 -; VEC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[COND_2:%.*]], i64 0 -; VEC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer ; VEC-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC: vector.body: ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] @@ -305,7 +303,7 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) { ; VEC-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[V_1]], [[INDEX]] ; VEC-NEXT: [[TMP7:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR:%.*]], i64 0, i64 [[OFFSET_IDX]] ; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4 -; VEC-NEXT: br i1 [[COND_2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE2]] +; VEC-NEXT: br i1 [[COND_2:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE2]] ; VEC: pred.store.if: ; VEC-NEXT: [[INDVARS_IV3:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VEC-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[INDVARS_IV3]] @@ -318,7 +316,7 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) { ; VEC-NEXT: br label [[PRED_STORE_CONTINUE2]] ; VEC: pred.store.continue2: ; VEC-NEXT: [[TMP15:%.*]] = add <2 x i32> [[VEC_PHI]], splat (i32 1) -; VEC-NEXT: [[PREDPHI]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP15]], <2 x i32> [[VEC_PHI]] +; VEC-NEXT: [[PREDPHI]] = select i1 [[COND_2]], <2 x i32> [[TMP15]], <2 x i32> [[VEC_PHI]] ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VEC-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index e33995327b856..66e4de5da7955 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -1959,15 +1959,13 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[SMAX]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[SMAX]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[PRED_UDIV_CONTINUE2]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 -; CHECK-NEXT: br i1 [[C]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; CHECK: pred.udiv.if: ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0 @@ -1985,7 +1983,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE2]] ; CHECK: pred.udiv.continue2: ; CHECK-NEXT: [[TMP11:%.*]] = phi <2 x i32> [ [[TMP12]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP10]], [[PRED_UDIV_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP11]], <2 x i32> [[WIDE_LOAD]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[C]], <2 x i32> [[TMP11]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP16]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -2024,8 +2022,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IND: vector.ph: ; IND-NEXT: [[N_VEC:%.*]] = and i32 [[SMAX]], 2147483646 -; IND-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0 -; IND-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer ; IND-NEXT: br label [[VECTOR_BODY:%.*]] ; IND: vector.body: ; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ] @@ -2033,7 +2029,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; IND-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 ; IND-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] ; IND-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 -; IND-NEXT: br i1 [[C]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] +; IND-NEXT: br i1 [[C:%.*]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; IND: pred.udiv.if: ; IND-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i64 0 ; IND-NEXT: [[TMP3:%.*]] = udiv i32 [[TMP2]], [[INDEX]] @@ -2049,8 +2045,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; IND-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP8]], i64 1 ; IND-NEXT: br label [[PRED_UDIV_CONTINUE2]] ; IND: pred.udiv.continue2: -; IND-NEXT: [[TMP10:%.*]] = phi <2 x i32> [ [[TMP5]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP9]], [[PRED_UDIV_IF1]] ] -; IND-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP10]], <2 x i32> [[WIDE_LOAD]] +; IND-NEXT: [[PREDPHI:%.*]] = phi <2 x i32> [ [[WIDE_LOAD]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP9]], [[PRED_UDIV_IF1]] ] ; IND-NEXT: [[TMP13]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]] ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; IND-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -2090,8 +2085,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL: vector.ph: ; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[SMAX]], 2147483644 -; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0 -; UNROLL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE8:%.*]] ] @@ -2102,7 +2095,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 8 ; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 ; UNROLL-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 -; UNROLL-NEXT: br i1 [[C]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] +; UNROLL-NEXT: br i1 [[C:%.*]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; UNROLL: pred.udiv.if: ; UNROLL-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i64 0 ; UNROLL-NEXT: [[TMP4:%.*]] = udiv i32 [[TMP3]], [[INDEX]] @@ -2136,9 +2129,8 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NEXT: [[TMP20:%.*]] = insertelement <2 x i32> [[TMP16]], i32 [[TMP19]], i64 1 ; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE8]] ; UNROLL: pred.udiv.continue8: -; UNROLL-NEXT: [[TMP21:%.*]] = phi <2 x i32> [ [[TMP16]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP20]], [[PRED_UDIV_IF7]] ] -; UNROLL-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP11]], <2 x i32> [[WIDE_LOAD]] -; UNROLL-NEXT: [[PREDPHI9:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP21]], <2 x i32> [[WIDE_LOAD2]] +; UNROLL-NEXT: [[PREDPHI9:%.*]] = phi <2 x i32> [ [[WIDE_LOAD2]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP20]], [[PRED_UDIV_IF7]] ] +; UNROLL-NEXT: [[PREDPHI:%.*]] = phi <2 x i32> [ [[WIDE_LOAD]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP11]], [[PRED_UDIV_IF7]] ] ; UNROLL-NEXT: [[TMP22]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]] ; UNROLL-NEXT: [[TMP23]] = add <2 x i32> [[PREDPHI9]], [[VEC_PHI1]] ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -2181,8 +2173,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NO-IC: vector.ph: ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[SMAX]], 4 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[SMAX]], [[N_MOD_VF]] -; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0 -; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE8:%.*]] ] @@ -2192,7 +2182,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 -; UNROLL-NO-IC-NEXT: br i1 [[C]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] +; UNROLL-NO-IC-NEXT: br i1 [[C:%.*]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; UNROLL-NO-IC: pred.udiv.if: ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0 @@ -2228,8 +2218,8 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE8]] ; UNROLL-NO-IC: pred.udiv.continue8: ; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = phi <2 x i32> [ [[TMP23]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP28]], [[PRED_UDIV_IF7]] ] -; UNROLL-NO-IC-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP17]], <2 x i32> [[WIDE_LOAD]] -; UNROLL-NO-IC-NEXT: [[PREDPHI9:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP29]], <2 x i32> [[WIDE_LOAD2]] +; UNROLL-NO-IC-NEXT: [[PREDPHI:%.*]] = select i1 [[C]], <2 x i32> [[TMP17]], <2 x i32> [[WIDE_LOAD]] +; UNROLL-NO-IC-NEXT: [[PREDPHI9:%.*]] = select i1 [[C]], <2 x i32> [[TMP29]], <2 x i32> [[WIDE_LOAD2]] ; UNROLL-NO-IC-NEXT: [[TMP32]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]] ; UNROLL-NO-IC-NEXT: [[TMP33]] = add <2 x i32> [[PREDPHI9]], [[VEC_PHI1]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -2270,8 +2260,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; INTERLEAVE: vector.ph: ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[SMAX]], 2147483640 -; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C:%.*]], i64 0 -; INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE16:%.*]] ] @@ -2282,7 +2270,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16 ; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; INTERLEAVE-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] +; INTERLEAVE-NEXT: br i1 [[C:%.*]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; INTERLEAVE: pred.udiv.if: ; INTERLEAVE-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 0 ; INTERLEAVE-NEXT: [[TMP4:%.*]] = udiv i32 [[TMP3]], [[INDEX]] @@ -2352,9 +2340,8 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; INTERLEAVE-NEXT: [[TMP40:%.*]] = insertelement <4 x i32> [[TMP36]], i32 [[TMP39]], i64 3 ; INTERLEAVE-NEXT: br label [[PRED_UDIV_CONTINUE16]] ; INTERLEAVE: pred.udiv.continue16: -; INTERLEAVE-NEXT: [[TMP41:%.*]] = phi <4 x i32> [ [[TMP36]], [[PRED_UDIV_CONTINUE14]] ], [ [[TMP40]], [[PRED_UDIV_IF15]] ] -; INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[TMP21]], <4 x i32> [[WIDE_LOAD]] -; INTERLEAVE-NEXT: [[PREDPHI17:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[TMP41]], <4 x i32> [[WIDE_LOAD2]] +; INTERLEAVE-NEXT: [[PREDPHI17:%.*]] = phi <4 x i32> [ [[WIDE_LOAD2]], [[PRED_UDIV_CONTINUE14]] ], [ [[TMP40]], [[PRED_UDIV_IF15]] ] +; INTERLEAVE-NEXT: [[PREDPHI:%.*]] = phi <4 x i32> [ [[WIDE_LOAD]], [[PRED_UDIV_CONTINUE14]] ], [ [[TMP21]], [[PRED_UDIV_IF15]] ] ; INTERLEAVE-NEXT: [[TMP42]] = add <4 x i32> [[PREDPHI]], [[VEC_PHI]] ; INTERLEAVE-NEXT: [[TMP43]] = add <4 x i32> [[PREDPHI17]], [[VEC_PHI1]] ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll index a1cb361d20bee..9921f2916ce00 100644 --- a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll @@ -36,7 +36,8 @@ define i16 @test_true_and_false_branch_equal() { ; CHECK-NEXT: br label [[PRED_SREM_CONTINUE2]] ; CHECK: pred.srem.continue2: ; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x i16> [ [[TMP6]], [[PRED_SREM_CONTINUE]] ], [ [[TMP9]], [[PRED_SREM_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> [[TMP10]], <2 x i16> splat (i16 5786) +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP13]], <2 x i16> [[TMP10]], <2 x i16> splat (i16 5786) ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 1 ; CHECK-NEXT: store i16 [[TMP11]], ptr @v_39, align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/pr45525.ll b/llvm/test/Transforms/LoopVectorize/pr45525.ll index f32de2d75cdef..b05cf6ef76675 100644 --- a/llvm/test/Transforms/LoopVectorize/pr45525.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45525.ll @@ -9,14 +9,12 @@ define void @main(i1 %cond, ptr %arr) { ; CHECK-NEXT: [[BB_0:.*:]] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[COND]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i32> [[VEC_IND]], splat (i32 3) -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> splat (i32 7), <4 x i32> [[TMP5]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[COND]], <4 x i32> splat (i32 7), <4 x i32> [[TMP5]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i32 [[INDEX]] ; CHECK-NEXT: store <4 x i32> [[PREDPHI]], ptr [[TMP1]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -24,8 +22,9 @@ define void @main(i1 %cond, ptr %arr) { ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[BB_4:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[BB_4:.*]] +; CHECK: [[BB_4]]: +; CHECK-NEXT: ret void ; bb.0: br label %bb.1 diff --git a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll index 7b3500933314a..ebd532aa5032c 100644 --- a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll +++ b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll @@ -23,9 +23,9 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 { ; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i32> [[VEC_PHI]], splat (i32 10) ; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[TMP0]], splat (i32 20) ; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[PREDPHI5:%.*]] = select <2 x i1> [[BROADCAST_SPLAT4]], <2 x i32> [[VEC_IND]], <2 x i32> splat (i32 9) +; CHECK-NEXT: [[PREDPHI5:%.*]] = select i1 [[C_2]], <2 x i32> [[VEC_IND]], <2 x i32> splat (i32 9) ; CHECK-NEXT: [[PREDPHI6:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> [[TMP0]], <2 x i32> [[TMP3]] -; CHECK-NEXT: [[PREDPHI7]] = select <2 x i1> [[BROADCAST_SPLAT4]], <2 x i32> [[VEC_PHI]], <2 x i32> [[PREDPHI6]] +; CHECK-NEXT: [[PREDPHI7]] = select i1 [[C_2]], <2 x i32> [[VEC_PHI]], <2 x i32> [[PREDPHI6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 176 diff --git a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll index f01e562fe40c7..5f54b0ac7834a 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll @@ -13,9 +13,7 @@ define i8 @PR34687(i1 %c, i32 %x, i32 %n, i32 %divisor) { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i1> poison, i1 [[C:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT2]], <4 x i1> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT3]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> splat (i32 1) +; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[C:%.*]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> splat (i32 1) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[X1:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -24,7 +22,7 @@ define i8 @PR34687(i1 %c, i32 %x, i32 %n, i32 %divisor) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP6:%.*]] = sdiv <4 x i32> [[VEC_IND]], [[TMP0]] -; CHECK-NEXT: [[PREDPHI1:%.*]] = select <4 x i1> [[BROADCAST_SPLAT3]], <4 x i32> [[TMP6]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI1:%.*]] = select i1 [[C]], <4 x i32> [[TMP6]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 255) ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[BROADCAST_SPLAT4]] ; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i8> @@ -100,11 +98,9 @@ define i8 @PR34687_no_undef(i1 %c, i32 %x, i32 %n) { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i1> poison, i1 [[C:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT2]], <4 x i1> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> splat (i32 1) +; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[C:%.*]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> splat (i32 99), [[TMP0]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[TMP1]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[C]], <4 x i32> [[TMP1]], <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll index 71311db33cf1a..3b515a2acb1a7 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll @@ -96,13 +96,11 @@ define void @blend_chain_iv(i1 %c) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[PREDPHI1:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> [[PREDPHI1]], <4 x i64> poison +; CHECK-NEXT: [[PREDPHI2:%.*]] = select i1 [[C]], <4 x i64> [[PREDPHI1]], <4 x i64> poison ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[PREDPHI2]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[PREDPHI2]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[PREDPHI2]], i32 2 diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index 89f1ca6935cff..ac0d40cf25a41 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -64,6 +64,8 @@ config.excludes.append("LoopVectorize") # exclude UpdateTestChecks - they fail because of inserted prof annotations config.excludes.append("UpdateTestChecks") + # TODO(#166655): Reenable Instrumentation tests + config.excludes.append("Instrumentation") # test_source_root: The root path where tests are located. config.test_source_root = os.path.dirname(__file__) diff --git a/llvm/test/tools/llc/new-pm/start-stop.ll b/llvm/test/tools/llc/new-pm/start-stop.ll index e4c454900fd38..0e68cdbe67b63 100644 --- a/llvm/test/tools/llc/new-pm/start-stop.ll +++ b/llvm/test/tools/llc/new-pm/start-stop.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=x86_64-pc-linux-gnu -enable-new-pm -print-pipeline-passes -start-before=mergeicmps -stop-after=gc-lowering -filetype=null %s | FileCheck --match-full-lines %s --check-prefix=NULL ; RUN: llc -mtriple=x86_64-pc-linux-gnu -enable-new-pm -print-pipeline-passes -start-before=mergeicmps -stop-after=gc-lowering -o /dev/null %s | FileCheck --match-full-lines %s --check-prefix=OBJ -; NULL: require,require,require,function(verify,mergeicmps,expand-memcmp,gc-lowering,verify) -; OBJ: require,require,require,function(verify,mergeicmps,expand-memcmp,gc-lowering,verify),PrintMIRPreparePass,function(machine-function(print),free-machine-function) +; NULL: require,require,require,require,function(verify,mergeicmps,expand-memcmp,gc-lowering,verify) +; OBJ: require,require,require,require,function(verify,mergeicmps,expand-memcmp,gc-lowering,verify),PrintMIRPreparePass,function(machine-function(print),free-machine-function) diff --git a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml index 51a5a63ba370c..ff2c9ae00bdb9 100644 --- a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml +++ b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml @@ -34,7 +34,7 @@ # # CHECK: << Total TLI yes SDK no: 18 # CHECK: >> Total TLI no SDK yes: 0 -# CHECK: == Total TLI yes SDK yes: 271 +# CHECK: == Total TLI yes SDK yes: 277 # # WRONG_DETAIL: << TLI yes SDK no : '_ZdaPv' aka operator delete[](void*) # WRONG_DETAIL: >> TLI no SDK yes: '_ZdaPvj' aka operator delete[](void*, unsigned int) @@ -48,14 +48,14 @@ # WRONG_DETAIL: << TLI yes SDK no : 'fminimum_numl' # WRONG_SUMMARY: << Total TLI yes SDK no: 19{{$}} # WRONG_SUMMARY: >> Total TLI no SDK yes: 1{{$}} -# WRONG_SUMMARY: == Total TLI yes SDK yes: 270 +# WRONG_SUMMARY: == Total TLI yes SDK yes: 276 # ## The -COUNT suffix doesn't care if there are too many matches, so check ## the exact count first; the two directives should add up to that. ## Yes, this means additions to TLI will fail this test, but the argument ## to -COUNT can't be an expression. -# AVAIL: TLI knows 524 symbols, 289 available -# AVAIL-COUNT-289: {{^}} available +# AVAIL: TLI knows 530 symbols, 295 available +# AVAIL-COUNT-295: {{^}} available # AVAIL-NOT: {{^}} available # UNAVAIL-COUNT-235: not available # UNAVAIL-NOT: not available @@ -778,6 +778,30 @@ DynamicSymbols: Type: STT_FUNC Section: .text Binding: STB_GLOBAL + - Name: nextafter + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + - Name: nextafterf + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + - Name: nextafterl + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + - Name: nexttoward + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + - Name: nexttowardf + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + - Name: nexttowardl + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL - Name: perror Type: STT_FUNC Section: .text diff --git a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp index b33419545efa8..787a32407ad95 100644 --- a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp +++ b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp @@ -277,6 +277,12 @@ TEST_F(TargetLibraryInfoTest, ValidProto) { "declare x86_fp80 @logbl(x86_fp80)\n" "declare float @logf(float)\n" "declare x86_fp80 @logl(x86_fp80)\n" + "declare double @nextafter(double, double)\n" + "declare float @nextafterf(float, float)\n" + "declare x86_fp80 @nextafterl(x86_fp80, x86_fp80)\n" + "declare double @nexttoward(double, x86_fp80)\n" + "declare float @nexttowardf(float, x86_fp80)\n" + "declare x86_fp80 @nexttowardl(x86_fp80, x86_fp80)\n" "declare i8* @malloc(i64)\n" "declare i8* @memccpy(i8*, i8*, i32, i64)\n" "declare i8* @memchr(i8*, i32, i64)\n" diff --git a/llvm/unittests/CodeGen/CMakeLists.txt b/llvm/unittests/CodeGen/CMakeLists.txt index 18332d20bf8ff..4d07462babefa 100644 --- a/llvm/unittests/CodeGen/CMakeLists.txt +++ b/llvm/unittests/CodeGen/CMakeLists.txt @@ -39,6 +39,7 @@ add_llvm_unittest(CodeGenTests MachineOperandTest.cpp MIR2VecTest.cpp RegAllocScoreTest.cpp + RegisterTest.cpp PassManagerTest.cpp ScalableVectorMVTsTest.cpp SchedBoundary.cpp diff --git a/llvm/unittests/CodeGen/RegisterTest.cpp b/llvm/unittests/CodeGen/RegisterTest.cpp new file mode 100644 index 0000000000000..db2747ccc718e --- /dev/null +++ b/llvm/unittests/CodeGen/RegisterTest.cpp @@ -0,0 +1,38 @@ +//===- RegisterTest.cpp -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Register.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { +TEST(RegisterTest, Idx2StackSlot) { + EXPECT_EQ(Register::index2StackSlot(0), Register::StackSlotZero); + EXPECT_EQ(Register::index2StackSlot(1), Register::StackSlotZero | 1); + EXPECT_EQ(Register::index2StackSlot(-1), + Register::StackSlotZero | Register::StackSlotMask); + int MaxPowOf2 = 1 << (Register::MaxFrameIndexBitwidth - 1); + // Check the highest possible value of frame index + EXPECT_EQ(Register::index2StackSlot(MaxPowOf2 - 1), + Register::StackSlotZero | (MaxPowOf2 - 1)); + // Check the lowest possible value of frame index + EXPECT_EQ(Register::index2StackSlot(-MaxPowOf2), + Register::StackSlotZero | (-MaxPowOf2 & Register::StackSlotMask)); +} + +TEST(RegisterTest, StackSlotIndex) { + int MaxPowOf2 = 1 << (Register::MaxFrameIndexBitwidth - 1); + std::vector FIs = {0, 1 - 1, MaxPowOf2 - 1, -MaxPowOf2}; + + for (int FI : FIs) { + Register Reg = Register::index2StackSlot(FI); + EXPECT_EQ(Reg.stackSlotIndex(), FI); + } +} +} // end namespace diff --git a/llvm/utils/gn/secondary/lldb/source/API/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/API/BUILD.gn index cf6e3515d0c09..24d96bcdf9eaf 100644 --- a/llvm/utils/gn/secondary/lldb/source/API/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/API/BUILD.gn @@ -68,6 +68,7 @@ target(liblldb_type, "liblldb") { "SBFileSpecList.cpp", "SBFormat.cpp", "SBFrame.cpp", + "SBFrameList.cpp", "SBFunction.cpp", "SBHostOS.cpp", "SBInstruction.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn index 0d2106265d1d9..b7c471058815a 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn @@ -115,6 +115,7 @@ static_library("Analysis") { "RegionPass.cpp", "RegionPrinter.cpp", "ReplayInlineAdvisor.cpp", + "RuntimeLibcallInfo.cpp", "ScalarEvolution.cpp", "ScalarEvolutionAliasAnalysis.cpp", "ScalarEvolutionDivision.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn index 6879a92e7769c..e40a8ee04dd38 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn @@ -40,6 +40,7 @@ unittest("CodeGenTests") { "MachineOperandTest.cpp", "PassManagerTest.cpp", "RegAllocScoreTest.cpp", + "RegisterTest.cpp", "ScalableVectorMVTsTest.cpp", "SchedBoundary.cpp", "SelectionDAGAddressAnalysisTest.cpp", diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt index 53757c73fb8a6..e9889ba9cbd81 100644 --- a/llvm/utils/profcheck-xfail.txt +++ b/llvm/utils/profcheck-xfail.txt @@ -83,451 +83,6 @@ DebugInfo/assignment-tracking/X86/hotcoldsplit.ll DebugInfo/Generic/block-asan.ll DebugInfo/KeyInstructions/Generic/loop-unswitch.ll DebugInfo/X86/asan_debug_info.ll -Instrumentation/AddressSanitizer/aarch64be.ll -Instrumentation/AddressSanitizer/adaptive_global_redzones.ll -Instrumentation/AddressSanitizer/AMDGPU/adaptive_constant_global_redzones.ll -Instrumentation/AddressSanitizer/AMDGPU/adaptive_global_redzones.ll -Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_lds.ll -Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_scratch.ll -Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_constant_address_space.ll -Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_generic_address_space.ll -Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_global_address_space.ll -Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_mem_intrinsics.ll -Instrumentation/AddressSanitizer/AMDGPU/global_metadata_addrspacecasts.ll -Instrumentation/AddressSanitizer/AMDGPU/instrument-stack.ll -Instrumentation/AddressSanitizer/AMDGPU/no_redzones_in_lds_globals.ll -Instrumentation/AddressSanitizer/AMDGPU/no_redzones_in_scratch_globals.ll -Instrumentation/AddressSanitizer/asan_address_space_attr.ll -Instrumentation/AddressSanitizer/asan-detect-invalid-pointer-pair.ll -Instrumentation/AddressSanitizer/asan-disable-sanitizer-instrumentation.ll -Instrumentation/AddressSanitizer/asan-funclet.ll -Instrumentation/AddressSanitizer/asan-masked-load-store.ll -Instrumentation/AddressSanitizer/asan-optimize-callbacks.ll -Instrumentation/AddressSanitizer/asan-pass-second-run.ll -Instrumentation/AddressSanitizer/asan-scalable-vector.ll -Instrumentation/AddressSanitizer/asan-stack-safety.ll -Instrumentation/AddressSanitizer/asan-struct-scalable.ll -Instrumentation/AddressSanitizer/asan-vp-load-store.ll -Instrumentation/AddressSanitizer/asan-vs-gvn.ll -Instrumentation/AddressSanitizer/asan-win-dont-instrument-catchpad.ll -Instrumentation/AddressSanitizer/basic.ll -Instrumentation/AddressSanitizer/basic-msvc64.ll -Instrumentation/AddressSanitizer/byref-args.ll -Instrumentation/AddressSanitizer/byval-args.ll -Instrumentation/AddressSanitizer/calls-only.ll -Instrumentation/AddressSanitizer/calls-only-smallfn.ll -Instrumentation/AddressSanitizer/coro-byval-param.ll -Instrumentation/AddressSanitizer/debug-info-alloca.ll -Instrumentation/AddressSanitizer/debug-info-global-var.ll -Instrumentation/AddressSanitizer/debug_info.ll -Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca2.ll -Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll -Instrumentation/AddressSanitizer/do-not-instrument-globals-darwin.ll -Instrumentation/AddressSanitizer/do-not-instrument-globals-linux.ll -Instrumentation/AddressSanitizer/do-not-instrument-globals-windows.ll -Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll -Instrumentation/AddressSanitizer/do-not-instrument-netbsd-link_set.ll -Instrumentation/AddressSanitizer/do-not-instrument-profiling-globals.ll -Instrumentation/AddressSanitizer/do-not-instrument-promotable-allocas.ll -Instrumentation/AddressSanitizer/do-not-instrument-sanitizers.ll -Instrumentation/AddressSanitizer/do-not-touch-comdat-global.ll -Instrumentation/AddressSanitizer/do-not-touch-odr-global.ll -Instrumentation/AddressSanitizer/do-not-touch-threadlocal.ll -Instrumentation/AddressSanitizer/dynamic-shadow-darwin.ll -Instrumentation/AddressSanitizer/experiment-call.ll -Instrumentation/AddressSanitizer/experiment.ll -Instrumentation/AddressSanitizer/fake-stack.ll -Instrumentation/AddressSanitizer/force-dynamic-shadow.ll -Instrumentation/AddressSanitizer/freebsd.ll -Instrumentation/AddressSanitizer/global_addrspace.ll -Instrumentation/AddressSanitizer/global_cstring_darwin.ll -Instrumentation/AddressSanitizer/global_lto_merge.ll -Instrumentation/AddressSanitizer/global_metadata_array.ll -Instrumentation/AddressSanitizer/global_metadata_bitcasts.ll -Instrumentation/AddressSanitizer/global-metadata-code-model-medium.ll -Instrumentation/AddressSanitizer/global-metadata-code-model-small.ll -Instrumentation/AddressSanitizer/global_metadata_darwin.ll -Instrumentation/AddressSanitizer/global_metadata_external_comdat.ll -Instrumentation/AddressSanitizer/global_metadata.ll -Instrumentation/AddressSanitizer/global_metadata_windows.ll -Instrumentation/AddressSanitizer/global_with_comdat.ll -Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll -Instrumentation/AddressSanitizer/instrumentation-with-call-threshold.ll -Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll -Instrumentation/AddressSanitizer/instrument_global.ll -Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll -Instrumentation/AddressSanitizer/instrument_initializer_without_global.ll -Instrumentation/AddressSanitizer/instrument_late_initializer.ll -Instrumentation/AddressSanitizer/instrument_load_then_store.ll -Instrumentation/AddressSanitizer/instrument-no-return.ll -Instrumentation/AddressSanitizer/instrument-section-invalid-c-ident.ll -Instrumentation/AddressSanitizer/instrument-stack.ll -Instrumentation/AddressSanitizer/kcfi.ll -Instrumentation/AddressSanitizer/kcfi-offset.ll -Instrumentation/AddressSanitizer/keep_going.ll -Instrumentation/AddressSanitizer/lifetime.ll -Instrumentation/AddressSanitizer/lifetime-throw.ll -Instrumentation/AddressSanitizer/lifetime-uar-uas.ll -Instrumentation/AddressSanitizer/local_alias.ll -Instrumentation/AddressSanitizer/localescape.ll -Instrumentation/AddressSanitizer/local_stack_base.ll -Instrumentation/AddressSanitizer/mem-intrinsics.ll -Instrumentation/AddressSanitizer/missing_dbg.ll -Instrumentation/AddressSanitizer/module-flags-aarch64.ll -Instrumentation/AddressSanitizer/module-flags.ll -Instrumentation/AddressSanitizer/musttail.ll -Instrumentation/AddressSanitizer/no-global-ctors.ll -Instrumentation/AddressSanitizer/no_global_dtors.ll -Instrumentation/AddressSanitizer/no-globals.ll -Instrumentation/AddressSanitizer/odr-check-ignore.ll -Instrumentation/AddressSanitizer/program-addrspace.ll -Instrumentation/AddressSanitizer/ps4.ll -Instrumentation/AddressSanitizer/remove-memory-effects.ll -Instrumentation/AddressSanitizer/RISCV/asan-rvv-intrinsics.ll -Instrumentation/AddressSanitizer/scale-offset.ll -Instrumentation/AddressSanitizer/skip-coro.ll -Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll -Instrumentation/AddressSanitizer/stack_layout.ll -Instrumentation/AddressSanitizer/stack-poisoning-and-lifetime-be.ll -Instrumentation/AddressSanitizer/stack-poisoning-and-lifetime.ll -Instrumentation/AddressSanitizer/stack-poisoning-byval-args.ll -Instrumentation/AddressSanitizer/stack-poisoning.ll -Instrumentation/AddressSanitizer/str-nobuiltin.ll -Instrumentation/AddressSanitizer/test64.ll -Instrumentation/AddressSanitizer/twice.ll -Instrumentation/AddressSanitizer/ubsan.ll -Instrumentation/AddressSanitizer/vector-load-store.ll -Instrumentation/AddressSanitizer/version-mismatch-check.ll -Instrumentation/AddressSanitizer/win-sorted-sections.ll -Instrumentation/AddressSanitizer/win-string-literal.ll -Instrumentation/AddressSanitizer/with-ifunc.ll -Instrumentation/AddressSanitizer/X86/asm_cpuid.ll -Instrumentation/AddressSanitizer/X86/asm_more_registers_than_available.ll -Instrumentation/AddressSanitizer/X86/bug_11395.ll -Instrumentation/BoundsChecking/many-trap.ll -Instrumentation/BoundsChecking/many-traps-2.ll -Instrumentation/BoundsChecking/opt.ll -Instrumentation/BoundsChecking/phi.ll -Instrumentation/BoundsChecking/runtimes.ll -Instrumentation/BoundsChecking/simple.ll -Instrumentation/BoundsChecking/ubsan-unique-traps.ll -Instrumentation/DataFlowSanitizer/abilist_aggregate.ll -Instrumentation/DataFlowSanitizer/abilist.ll -Instrumentation/DataFlowSanitizer/atomics.ll -Instrumentation/DataFlowSanitizer/basic.ll -Instrumentation/DataFlowSanitizer/custom_fun_callback_attributes.ll -Instrumentation/DataFlowSanitizer/custom_fun_varargs_attributes.ll -Instrumentation/DataFlowSanitizer/dataflow-disable-sanitizer-instrumentation.ll -Instrumentation/DataFlowSanitizer/debug.ll -Instrumentation/DataFlowSanitizer/extern_weak.ll -Instrumentation/DataFlowSanitizer/ignore_persnality_routine.ll -Instrumentation/DataFlowSanitizer/origin_abilist.ll -Instrumentation/DataFlowSanitizer/origin_cached_shadows.ll -Instrumentation/DataFlowSanitizer/origin_load.ll -Instrumentation/DataFlowSanitizer/origin_other_ops.ll -Instrumentation/DataFlowSanitizer/origin_phi.ll -Instrumentation/DataFlowSanitizer/origin_select.ll -Instrumentation/DataFlowSanitizer/origin_store.ll -Instrumentation/DataFlowSanitizer/origin_track_load.ll -Instrumentation/DataFlowSanitizer/select.ll -Instrumentation/DataFlowSanitizer/shadow-args-zext.ll -Instrumentation/DataFlowSanitizer/struct.ll -Instrumentation/DataFlowSanitizer/uninstrumented_local_functions.ll -Instrumentation/HeapProfiler/basic-histogram.ll -Instrumentation/HeapProfiler/basic.ll -Instrumentation/HeapProfiler/filename.ll -Instrumentation/HeapProfiler/instrumentation-use-callbacks.ll -Instrumentation/HeapProfiler/masked-load-store.ll -Instrumentation/HeapProfiler/memprof-options.ll -Instrumentation/HeapProfiler/no-instrumentation.ll -Instrumentation/HeapProfiler/scale-granularity.ll -Instrumentation/HeapProfiler/shadow.ll -Instrumentation/HeapProfiler/skip-compiler-inserted.ll -Instrumentation/HeapProfiler/stack.ll -Instrumentation/HeapProfiler/version-mismatch-check.ll -Instrumentation/HWAddressSanitizer/alloca-array.ll -Instrumentation/HWAddressSanitizer/alloca-compat.ll -Instrumentation/HWAddressSanitizer/alloca.ll -Instrumentation/HWAddressSanitizer/alloca-uninteresting.ll -Instrumentation/HWAddressSanitizer/alloca-with-calls.ll -Instrumentation/HWAddressSanitizer/atomic.ll -Instrumentation/HWAddressSanitizer/basic-compat.ll -Instrumentation/HWAddressSanitizer/basic.ll -Instrumentation/HWAddressSanitizer/coro-byval-param.ll -Instrumentation/HWAddressSanitizer/dbg-assign-tag-offset.ll -Instrumentation/HWAddressSanitizer/dbg-declare-tag-offset.ll -Instrumentation/HWAddressSanitizer/dbg-value-tag-offset.ll -Instrumentation/HWAddressSanitizer/dbg-value-tag-offset-nopad.ll -Instrumentation/HWAddressSanitizer/exception-lifetime.ll -Instrumentation/HWAddressSanitizer/fixed-shadow.ll -Instrumentation/HWAddressSanitizer/fuchsia.ll -Instrumentation/HWAddressSanitizer/globals-access.ll -Instrumentation/HWAddressSanitizer/globals.ll -Instrumentation/HWAddressSanitizer/globals-tag.ll -Instrumentation/HWAddressSanitizer/hwasan-pass-second-run.ll -Instrumentation/HWAddressSanitizer/kernel-inline.ll -Instrumentation/HWAddressSanitizer/landingpad.ll -Instrumentation/HWAddressSanitizer/mapping-override.ll -Instrumentation/HWAddressSanitizer/memaccess-clobber.ll -Instrumentation/HWAddressSanitizer/mem-attr.ll -Instrumentation/HWAddressSanitizer/mem-intrinsics.ll -Instrumentation/HWAddressSanitizer/musttail.ll -Instrumentation/HWAddressSanitizer/personality-bti.ll -Instrumentation/HWAddressSanitizer/personality.ll -Instrumentation/HWAddressSanitizer/pgo-opt-out.ll -Instrumentation/HWAddressSanitizer/pgo-opt-out-no-ps.ll -Instrumentation/HWAddressSanitizer/prologue.ll -Instrumentation/HWAddressSanitizer/RISCV/alloca.ll -Instrumentation/HWAddressSanitizer/RISCV/alloca-with-calls.ll -Instrumentation/HWAddressSanitizer/RISCV/atomic.ll -Instrumentation/HWAddressSanitizer/RISCV/basic.ll -Instrumentation/HWAddressSanitizer/RISCV/exception-lifetime.ll -Instrumentation/HWAddressSanitizer/RISCV/use-after-scope-setjmp.ll -Instrumentation/HWAddressSanitizer/RISCV/with-calls.ll -Instrumentation/HWAddressSanitizer/stack-coloring.ll -Instrumentation/HWAddressSanitizer/stack-safety-analysis.ll -Instrumentation/HWAddressSanitizer/str-nobuiltin.ll -Instrumentation/HWAddressSanitizer/use-after-scope.ll -Instrumentation/HWAddressSanitizer/use-after-scope-setjmp.ll -Instrumentation/HWAddressSanitizer/vector-load-store.ll -Instrumentation/HWAddressSanitizer/with-calls.ll -Instrumentation/HWAddressSanitizer/X86/alloca-array.ll -Instrumentation/HWAddressSanitizer/X86/alloca.ll -Instrumentation/HWAddressSanitizer/X86/alloca-with-calls.ll -Instrumentation/HWAddressSanitizer/X86/atomic.ll -Instrumentation/HWAddressSanitizer/X86/basic.ll -Instrumentation/HWAddressSanitizer/X86/globals.ll -Instrumentation/HWAddressSanitizer/X86/with-calls.ll -Instrumentation/HWAddressSanitizer/zero-ptr.ll -Instrumentation/InstrProfiling/always_inline.ll -Instrumentation/InstrProfiling/atomic-updates.ll -Instrumentation/InstrProfiling/comdat.ll -Instrumentation/InstrProfiling/conditional-counter-updates.ll -Instrumentation/InstrProfiling/early-exit.ll -Instrumentation/InstrProfiling/icall-nocomdat.ll -Instrumentation/InstrProfiling/no-counters.ll -Instrumentation/InstrProfiling/platform.ll -Instrumentation/InstrProfiling/profiling.ll -Instrumentation/JustMyCode/jmc-instrument-elf.ll -Instrumentation/JustMyCode/jmc-instrument.ll -Instrumentation/JustMyCode/jmc-instrument-x86.ll -Instrumentation/MemorySanitizer/AArch64/arm64-cvt.ll -Instrumentation/MemorySanitizer/AArch64/arm64-fminv.ll -Instrumentation/MemorySanitizer/AArch64/arm64-ld1.ll -Instrumentation/MemorySanitizer/AArch64/arm64-smaxv.ll -Instrumentation/MemorySanitizer/AArch64/arm64-sminv.ll -Instrumentation/MemorySanitizer/AArch64/arm64-st1_lane.ll -Instrumentation/MemorySanitizer/AArch64/arm64-st1.ll -Instrumentation/MemorySanitizer/AArch64/arm64-st1_origins.ll -Instrumentation/MemorySanitizer/AArch64/arm64-tbl.ll -Instrumentation/MemorySanitizer/AArch64/arm64-umaxv.ll -Instrumentation/MemorySanitizer/AArch64/arm64-uminv.ll -Instrumentation/MemorySanitizer/AArch64/arm64-vadd.ll -Instrumentation/MemorySanitizer/AArch64/arm64-vaddlv.ll -Instrumentation/MemorySanitizer/AArch64/arm64-vaddv.ll -Instrumentation/MemorySanitizer/AArch64/arm64-vcvt.ll -Instrumentation/MemorySanitizer/AArch64/arm64-vmax.ll -Instrumentation/MemorySanitizer/AArch64/arm64-vmovn.ll -Instrumentation/MemorySanitizer/AArch64/arm64-vmul.ll -Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll -Instrumentation/MemorySanitizer/AArch64/module-flags-aarch64.ll -Instrumentation/MemorySanitizer/AArch64/neon_vst_float.ll -Instrumentation/MemorySanitizer/AArch64/qshrn.ll -Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll -Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll -Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll -Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll -Instrumentation/MemorySanitizer/AArch64/vararg.ll -Instrumentation/MemorySanitizer/AArch64/vararg_shadow.ll -Instrumentation/MemorySanitizer/abs-vector.ll -Instrumentation/MemorySanitizer/alloca.ll -Instrumentation/MemorySanitizer/ARM32/vararg-arm32.ll -Instrumentation/MemorySanitizer/array_types.ll -Instrumentation/MemorySanitizer/atomics.ll -Instrumentation/MemorySanitizer/attributes.ll -Instrumentation/MemorySanitizer/bitreverse.ll -Instrumentation/MemorySanitizer/bmi.ll -Instrumentation/MemorySanitizer/byval-alignment.ll -Instrumentation/MemorySanitizer/byval.ll -Instrumentation/MemorySanitizer/check_access_address.ll -Instrumentation/MemorySanitizer/check-array.ll -Instrumentation/MemorySanitizer/check-constant-shadow.ll -Instrumentation/MemorySanitizer/check-struct.ll -Instrumentation/MemorySanitizer/clmul.ll -Instrumentation/MemorySanitizer/count-zeroes.ll -Instrumentation/MemorySanitizer/csr.ll -Instrumentation/MemorySanitizer/disambiguate-origin.ll -Instrumentation/MemorySanitizer/expand-experimental-reductions.ll -Instrumentation/MemorySanitizer/freeze.ll -Instrumentation/MemorySanitizer/funnel_shift.ll -Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll -Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll -Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll -Instrumentation/MemorySanitizer/i386/msan_i386intrinsics.ll -Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll -Instrumentation/MemorySanitizer/i386/sse41-intrinsics-i386.ll -Instrumentation/MemorySanitizer/i386/sse-intrinsics-i386.ll -Instrumentation/MemorySanitizer/i386/vararg_call.ll -Instrumentation/MemorySanitizer/i386/vararg.ll -Instrumentation/MemorySanitizer/i386/vararg_shadow.ll -Instrumentation/MemorySanitizer/i386/vararg-too-large.ll -Instrumentation/MemorySanitizer/instrumentation-with-call-threshold.ll -Instrumentation/MemorySanitizer/invalidate_global_aa.ll -Instrumentation/MemorySanitizer/is-fpclass.ll -Instrumentation/MemorySanitizer/libatomic.ll -Instrumentation/MemorySanitizer/LoongArch/vararg.ll -Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll -Instrumentation/MemorySanitizer/manual-shadow.ll -Instrumentation/MemorySanitizer/masked-store-load.ll -Instrumentation/MemorySanitizer/Mips32/vararg-mipsel.ll -Instrumentation/MemorySanitizer/Mips32/vararg-mips.ll -Instrumentation/MemorySanitizer/Mips/vararg-mips64el.ll -Instrumentation/MemorySanitizer/Mips/vararg-mips64.ll -Instrumentation/MemorySanitizer/missing_origin.ll -Instrumentation/MemorySanitizer/msan_asm_conservative.ll -Instrumentation/MemorySanitizer/msan_basic.ll -Instrumentation/MemorySanitizer/msan_debug_info.ll -Instrumentation/MemorySanitizer/msan-disable-checks.ll -Instrumentation/MemorySanitizer/msan_eager.ll -Instrumentation/MemorySanitizer/msan_invalidate.ll -Instrumentation/MemorySanitizer/msan_llvm_is_constant.ll -Instrumentation/MemorySanitizer/msan_llvm_launder_invariant.ll -Instrumentation/MemorySanitizer/msan_llvm_strip_invariant.ll -Instrumentation/MemorySanitizer/msan-pass-second-run.ll -Instrumentation/MemorySanitizer/mul_by_constant.ll -Instrumentation/MemorySanitizer/no-check-rt-unaligned.ll -Instrumentation/MemorySanitizer/nosanitize.ll -Instrumentation/MemorySanitizer/opaque-ptr.ll -Instrumentation/MemorySanitizer/origin-alignment.ll -Instrumentation/MemorySanitizer/origin-array.ll -Instrumentation/MemorySanitizer/or.ll -Instrumentation/MemorySanitizer/overflow.ll -Instrumentation/MemorySanitizer/partial-poison.ll -Instrumentation/MemorySanitizer/PowerPC32/vararg-ppcle.ll -Instrumentation/MemorySanitizer/PowerPC32/vararg-ppc.ll -Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64le.ll -Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64.ll -Instrumentation/MemorySanitizer/pr32842.ll -Instrumentation/MemorySanitizer/reduce.ll -Instrumentation/MemorySanitizer/return_from_main.ll -Instrumentation/MemorySanitizer/RISCV32/vararg-riscv32.ll -Instrumentation/MemorySanitizer/saturating.ll -Instrumentation/MemorySanitizer/scmp.ll -Instrumentation/MemorySanitizer/stable_set_alloca_origin.ll -Instrumentation/MemorySanitizer/store-long-origin.ll -Instrumentation/MemorySanitizer/store-origin.ll -Instrumentation/MemorySanitizer/str-nobuiltin.ll -Instrumentation/MemorySanitizer/SystemZ/vararg.ll -Instrumentation/MemorySanitizer/ucmp.ll -Instrumentation/MemorySanitizer/unreachable.ll -Instrumentation/MemorySanitizer/unsized_type.ll -Instrumentation/MemorySanitizer/vector_arith.ll -Instrumentation/MemorySanitizer/vector_cmp.ll -Instrumentation/MemorySanitizer/vector_cvt.ll -Instrumentation/MemorySanitizer/vector-load-store.ll -Instrumentation/MemorySanitizer/vector_pack.ll -Instrumentation/MemorySanitizer/vector-reduce-fadd.ll -Instrumentation/MemorySanitizer/vector-reduce-fmul.ll -Instrumentation/MemorySanitizer/vector_shift.ll -Instrumentation/MemorySanitizer/vector-track-origins-neon.ll -Instrumentation/MemorySanitizer/vector-track-origins-struct.ll -Instrumentation/MemorySanitizer/vscale.ll -Instrumentation/MemorySanitizer/with-call-type-size.ll -Instrumentation/MemorySanitizer/X86/avx10_2_512ni-intrinsics.ll -Instrumentation/MemorySanitizer/X86/avx10_2ni-intrinsics.ll -Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll -Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll -Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll -Instrumentation/MemorySanitizer/X86/avx512fp16-arith-intrinsics.ll -Instrumentation/MemorySanitizer/X86/avx512fp16-arith-vl-intrinsics.ll -Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll -Instrumentation/MemorySanitizer/X86/avx512-gfni-intrinsics.ll -Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll -Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll -Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll -Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics.ll -Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics-upgrade.ll -Instrumentation/MemorySanitizer/X86/avx512vnni-intrinsics.ll -Instrumentation/MemorySanitizer/X86/avx512vnni-intrinsics-upgrade.ll -Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll -Instrumentation/MemorySanitizer/X86/avxvnniint16-intrinsics.ll -Instrumentation/MemorySanitizer/X86/avxvnniint8-intrinsics.ll -Instrumentation/MemorySanitizer/X86/avx_vnni-intrinsics.ll -Instrumentation/MemorySanitizer/X86/f16c-intrinsics.ll -Instrumentation/MemorySanitizer/X86/f16c-intrinsics-upgrade.ll -Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll -Instrumentation/MemorySanitizer/X86/msan_x86intrinsics.ll -Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll -Instrumentation/MemorySanitizer/X86/sse41-intrinsics-x86.ll -Instrumentation/MemorySanitizer/X86/sse-intrinsics-x86.ll -Instrumentation/MemorySanitizer/X86/vararg_call.ll -Instrumentation/MemorySanitizer/X86/vararg_shadow.ll -Instrumentation/MemorySanitizer/X86/vararg-too-large.ll -Instrumentation/MemorySanitizer/X86/x86-vpermi2.ll -Instrumentation/NumericalStabilitySanitizer/basic.ll -Instrumentation/NumericalStabilitySanitizer/cfg.ll -Instrumentation/NumericalStabilitySanitizer/invoke.ll -Instrumentation/NumericalStabilitySanitizer/memory.ll -Instrumentation/NumericalStabilitySanitizer/non_float_store.ll -Instrumentation/NumericalStabilitySanitizer/scalable_vector.ll -Instrumentation/RealtimeSanitizer/rtsan_blocking.ll -Instrumentation/RealtimeSanitizer/rtsan.ll -Instrumentation/RealtimeSanitizer/rtsan_multi_return.ll -Instrumentation/SanitizerBinaryMetadata/atomics.ll -Instrumentation/SanitizerBinaryMetadata/ctor.ll -Instrumentation/SanitizerBinaryMetadata/pretend-atomic-access.ll -Instrumentation/SanitizerBinaryMetadata/shared-mutable.ll -Instrumentation/SanitizerCoverage/backedge-pruning.ll -Instrumentation/SanitizerCoverage/cmp-tracing-api-x86_32.ll -Instrumentation/SanitizerCoverage/cmp-tracing-api-x86_64.ll -Instrumentation/SanitizerCoverage/cmp-tracing.ll -Instrumentation/SanitizerCoverage/coff-comdat.ll -Instrumentation/SanitizerCoverage/coff-pc-table-inline-8bit-counters.ll -Instrumentation/SanitizerCoverage/coff-pc-table-inline-bool-flag.ll -Instrumentation/SanitizerCoverage/coff-used-ctor.ll -Instrumentation/SanitizerCoverage/const-cmp-tracing.ll -Instrumentation/SanitizerCoverage/control-flow.ll -Instrumentation/SanitizerCoverage/coverage2-dbg.ll -Instrumentation/SanitizerCoverage/coverage-dbg.ll -Instrumentation/SanitizerCoverage/coverage-disable-sanitizer-instrumentation.ll -Instrumentation/SanitizerCoverage/coverage.ll -Instrumentation/SanitizerCoverage/div-tracing.ll -Instrumentation/SanitizerCoverage/gep-tracing.ll -Instrumentation/SanitizerCoverage/inline-8bit-counters.ll -Instrumentation/SanitizerCoverage/inline-bool-flag.ll -Instrumentation/SanitizerCoverage/interposable-symbol.ll -Instrumentation/SanitizerCoverage/missing_dbg.ll -Instrumentation/SanitizerCoverage/opaque-ptr.ll -Instrumentation/SanitizerCoverage/pc-table.ll -Instrumentation/SanitizerCoverage/seh.ll -Instrumentation/SanitizerCoverage/stack-depth.ll -Instrumentation/SanitizerCoverage/switch-tracing.ll -Instrumentation/SanitizerCoverage/trace-pc-guard-inline-8bit-counters.ll -Instrumentation/SanitizerCoverage/trace-pc-guard-inline-bool-flag.ll -Instrumentation/SanitizerCoverage/trace-pc-guard.ll -Instrumentation/SanitizerCoverage/tracing-comdat.ll -Instrumentation/SanitizerCoverage/tracing.ll -Instrumentation/SanitizerCoverage/unreachable-critedge.ll -Instrumentation/sanitizers-naked.ll -Instrumentation/ThreadSanitizer/do-not-instrument-memory-access.ll -Instrumentation/ThreadSanitizer/tsan_basic.ll -Instrumentation/ThreadSanitizer/tsan-pass-second-run.ll -Instrumentation/TypeSanitizer/access-with-offset.ll -Instrumentation/TypeSanitizer/alloca.ll -Instrumentation/TypeSanitizer/alloca-only.ll -Instrumentation/TypeSanitizer/anon.ll -Instrumentation/TypeSanitizer/basic.ll -Instrumentation/TypeSanitizer/basic-nosan.ll -Instrumentation/TypeSanitizer/basic_outlined.ll -Instrumentation/TypeSanitizer/basic_verify_outlined.ll -Instrumentation/TypeSanitizer/byval.ll -Instrumentation/TypeSanitizer/globals.ll -Instrumentation/TypeSanitizer/globals_outlined.ll -Instrumentation/TypeSanitizer/invalid-metadata.ll -Instrumentation/TypeSanitizer/memintrinsics.ll -Instrumentation/TypeSanitizer/nosanitize.ll -Instrumentation/TypeSanitizer/sanitize-no-tbaa.ll -Instrumentation/TypeSanitizer/swifterror.ll LTO/X86/diagnostic-handler-remarks-with-hotness.ll Other/optimization-remarks-auto.ll Other/X86/debugcounter-partiallyinlinelibcalls.ll diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index ad013035251f5..bb35815a18d71 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -693,6 +693,8 @@ cc_binary( "utils/TableGen/Basic/DirectiveEmitter.cpp", "utils/TableGen/Basic/IntrinsicEmitter.cpp", "utils/TableGen/Basic/RISCVTargetDefEmitter.cpp", + "utils/TableGen/Basic/RuntimeLibcalls.cpp", + "utils/TableGen/Basic/RuntimeLibcalls.h", "utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp", "utils/TableGen/Basic/SDNodeProperties.cpp", "utils/TableGen/Basic/SDNodeProperties.h",