diff --git a/clang/Maintainers.rst b/clang/Maintainers.rst index 847d37d124083..8a5a7ed7c2a41 100644 --- a/clang/Maintainers.rst +++ b/clang/Maintainers.rst @@ -147,7 +147,6 @@ Clang static analyzer | Balázs Benics | benicsbalazs\@gmail.com (email), steakhal (Phabricator), steakhal (GitHub) -| balazs.benics\@sonarsource.com (email), balazs-benics-sonarsource (GitHub) Compiler options ~~~~~~~~~~~~~~~~ diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst index fd0b304cba0df..31edf9e99dc7d 100644 --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -198,7 +198,7 @@ as error. Specifically on x86/x86-64 target if the pointer address space is dereference is not defined as error. See `X86/X86-64 Language Extensions `__ for reference. - + If the analyzer option ``suppress-dereferences-from-any-address-space`` is set to true (the default value), then this checker never reports dereference of pointers with a specified address space. If the option is set to false, then @@ -1664,6 +1664,23 @@ Warn on uses of the 'bzero' function. bzero(ptr, n); // warn } +.. _security-insecureAPI-decodeValueOfObjCType: + +security.insecureAPI.decodeValueOfObjCType (C) +"""""""""""""""""""""""""""""""""""""""""""""" +Warn on uses of the Objective-C method ``-decodeValueOfObjCType:at:``. + +.. code-block:: objc + + void test(NSCoder *decoder) { + unsigned int x; + [decoder decodeValueOfObjCType:"I" at:&x]; // warn + } + +This diagnostic is emitted only on Apple platforms where the safer +``-decodeValueOfObjCType:at:size:`` alternative is available +(iOS 11+, macOS 10.13+, tvOS 11+, watchOS 4.0+). + .. _security-insecureAPI-getpw: security.insecureAPI.getpw (C) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index bbe0aa3657c06..a656fe341c8e0 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -92,8 +92,8 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { def cmpsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">; } - - let Features = "sse3" in { + let Features = "sse3", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { foreach Op = ["addsub"] in { def Op#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">; def Op#pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">; @@ -121,8 +121,9 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } // AVX -let Attributes = [Const, NoThrow, RequiredVectorWidth<256>], Features = "avx" in { - foreach Op = ["addsub", "max", "min"] in { +let Attributes = [Const, NoThrow, RequiredVectorWidth<256>], + Features = "avx" in { + foreach Op = ["max", "min"] in { def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">; def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">; } @@ -571,6 +572,15 @@ let Features = "avx", def movmskps256 : X86Builtin<"int(_Vector<8, float>)">; } +let Features = "avx", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def addsubpd256 + : X86Builtin< + "_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">; + def addsubps256 + : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">; +} + let Features = "avx", Attributes = [NoThrow] in { def vzeroall : X86Builtin<"void()">; def vzeroupper : X86Builtin<"void()">; diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index a2f99c7c234fe..30426565407ba 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2715,6 +2715,35 @@ static bool interp_builtin_horizontal_fp_binop( return true; } +static bool interp__builtin_ia32_addsub(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + // Addsub: alternates between subtraction and addition + // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i]) + const Pointer &RHS = S.Stk.pop(); + const Pointer &LHS = S.Stk.pop(); + const Pointer &Dst = S.Stk.peek(); + FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts()); + llvm::RoundingMode RM = getRoundingMode(FPO); + const auto *VT = Call->getArg(0)->getType()->castAs(); + unsigned NumElems = VT->getNumElements(); + + using T = PrimConv::T; + for (unsigned I = 0; I != NumElems; ++I) { + APFloat LElem = LHS.elem(I).getAPFloat(); + APFloat RElem = RHS.elem(I).getAPFloat(); + if (I % 2 == 0) { + // Even indices: subtract + LElem.subtract(RElem, RM); + } else { + // Odd indices: add + LElem.add(RElem, RM); + } + Dst.elem(I) = static_cast(LElem); + } + Dst.initializeAllElements(); + return true; +} + static bool interp__builtin_elementwise_triop_fp( InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::function_refgetArg(0), SourceLHS) || + !EvaluateAsRValue(Info, E->getArg(1), SourceRHS)) + return false; + unsigned NumElems = SourceLHS.getVectorLength(); + SmallVector ResultElements; + ResultElements.reserve(NumElems); + llvm::RoundingMode RM = getActiveRoundingMode(getEvalInfo(), E); + + for (unsigned I = 0; I != NumElems; ++I) { + APFloat LHS = SourceLHS.getVectorElt(I).getFloat(); + APFloat RHS = SourceRHS.getVectorElt(I).getFloat(); + if (I % 2 == 0) { + // Even indices: subtract + LHS.subtract(RHS, RM); + } else { + // Odd indices: add + LHS.add(RHS, RM); + } + ResultElements.push_back(APValue(LHS)); + } + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } case Builtin::BI__builtin_elementwise_fshl: case Builtin::BI__builtin_elementwise_fshr: { APValue SourceHi, SourceLo, SourceShift; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 4460f20088414..16074215e8275 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2727,6 +2727,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_acosf16: case Builtin::BI__builtin_acosl: case Builtin::BI__builtin_acosf128: + case Builtin::BI__builtin_elementwise_acos: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::acos, Intrinsic::experimental_constrained_acos)); @@ -2738,6 +2739,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_asinf16: case Builtin::BI__builtin_asinl: case Builtin::BI__builtin_asinf128: + case Builtin::BI__builtin_elementwise_asin: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::asin, Intrinsic::experimental_constrained_asin)); @@ -2749,6 +2751,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_atanf16: case Builtin::BI__builtin_atanl: case Builtin::BI__builtin_atanf128: + case Builtin::BI__builtin_elementwise_atan: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::atan, Intrinsic::experimental_constrained_atan)); @@ -2760,6 +2763,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_atan2f16: case Builtin::BI__builtin_atan2l: case Builtin::BI__builtin_atan2f128: + case Builtin::BI__builtin_elementwise_atan2: return RValue::get(emitBinaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::atan2, Intrinsic::experimental_constrained_atan2)); @@ -2772,6 +2776,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_ceilf16: case Builtin::BI__builtin_ceill: case Builtin::BI__builtin_ceilf128: + case Builtin::BI__builtin_elementwise_ceil: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::ceil, Intrinsic::experimental_constrained_ceil)); @@ -2795,6 +2800,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_cosf16: case Builtin::BI__builtin_cosl: case Builtin::BI__builtin_cosf128: + case Builtin::BI__builtin_elementwise_cos: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::cos, Intrinsic::experimental_constrained_cos)); @@ -2807,6 +2813,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_coshf16: case Builtin::BI__builtin_coshl: case Builtin::BI__builtin_coshf128: + case Builtin::BI__builtin_elementwise_cosh: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::cosh, Intrinsic::experimental_constrained_cosh)); @@ -2818,6 +2825,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_expf16: case Builtin::BI__builtin_expl: case Builtin::BI__builtin_expf128: + case Builtin::BI__builtin_elementwise_exp: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::exp, Intrinsic::experimental_constrained_exp)); @@ -2830,6 +2838,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_exp2f16: case Builtin::BI__builtin_exp2l: case Builtin::BI__builtin_exp2f128: + case Builtin::BI__builtin_elementwise_exp2: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::exp2, Intrinsic::experimental_constrained_exp2)); @@ -2837,7 +2846,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_exp10f: case Builtin::BI__builtin_exp10f16: case Builtin::BI__builtin_exp10l: - case Builtin::BI__builtin_exp10f128: { + case Builtin::BI__builtin_exp10f128: + case Builtin::BI__builtin_elementwise_exp10: { // TODO: strictfp support if (Builder.getIsFPConstrained()) break; @@ -2863,6 +2873,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_floorf16: case Builtin::BI__builtin_floorl: case Builtin::BI__builtin_floorf128: + case Builtin::BI__builtin_elementwise_floor: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::floor, Intrinsic::experimental_constrained_floor)); @@ -2875,6 +2886,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_fmaf16: case Builtin::BI__builtin_fmal: case Builtin::BI__builtin_fmaf128: + case Builtin::BI__builtin_elementwise_fma: return RValue::get(emitTernaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::fma, Intrinsic::experimental_constrained_fma)); @@ -2939,7 +2951,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); Value *Arg1 = EmitScalarExpr(E->getArg(0)); Value *Arg2 = EmitScalarExpr(E->getArg(1)); - return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod")); + if (Builder.getIsFPConstrained()) { + Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_frem, + Arg1->getType()); + return RValue::get(Builder.CreateConstrainedFPCall(F, {Arg1, Arg2})); + } else { + return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod")); + } } case Builtin::BIlog: @@ -2950,6 +2968,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_logf16: case Builtin::BI__builtin_logl: case Builtin::BI__builtin_logf128: + case Builtin::BI__builtin_elementwise_log: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::log, Intrinsic::experimental_constrained_log)); @@ -2962,6 +2981,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_log10f16: case Builtin::BI__builtin_log10l: case Builtin::BI__builtin_log10f128: + case Builtin::BI__builtin_elementwise_log10: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::log10, Intrinsic::experimental_constrained_log10)); @@ -2974,6 +2994,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_log2f16: case Builtin::BI__builtin_log2l: case Builtin::BI__builtin_log2f128: + case Builtin::BI__builtin_elementwise_log2: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::log2, Intrinsic::experimental_constrained_log2)); @@ -2985,6 +3006,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_nearbyintf: case Builtin::BI__builtin_nearbyintl: case Builtin::BI__builtin_nearbyintf128: + case Builtin::BI__builtin_elementwise_nearbyint: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::nearbyint, Intrinsic::experimental_constrained_nearbyint)); @@ -2997,6 +3019,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_powf16: case Builtin::BI__builtin_powl: case Builtin::BI__builtin_powf128: + case Builtin::BI__builtin_elementwise_pow: return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::pow, Intrinsic::experimental_constrained_pow)); @@ -3009,6 +3032,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_rintf16: case Builtin::BI__builtin_rintl: case Builtin::BI__builtin_rintf128: + case Builtin::BI__builtin_elementwise_rint: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::rint, Intrinsic::experimental_constrained_rint)); @@ -3021,6 +3045,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_roundf16: case Builtin::BI__builtin_roundl: case Builtin::BI__builtin_roundf128: + case Builtin::BI__builtin_elementwise_round: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::round, Intrinsic::experimental_constrained_round)); @@ -3033,6 +3058,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_roundevenf16: case Builtin::BI__builtin_roundevenl: case Builtin::BI__builtin_roundevenf128: + case Builtin::BI__builtin_elementwise_roundeven: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::roundeven, Intrinsic::experimental_constrained_roundeven)); @@ -3045,6 +3071,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_sinf16: case Builtin::BI__builtin_sinl: case Builtin::BI__builtin_sinf128: + case Builtin::BI__builtin_elementwise_sin: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::sin, Intrinsic::experimental_constrained_sin)); @@ -3057,6 +3084,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_sinhf16: case Builtin::BI__builtin_sinhl: case Builtin::BI__builtin_sinhf128: + case Builtin::BI__builtin_elementwise_sinh: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::sinh, Intrinsic::experimental_constrained_sinh)); @@ -3104,6 +3132,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_tanf16: case Builtin::BI__builtin_tanl: case Builtin::BI__builtin_tanf128: + case Builtin::BI__builtin_elementwise_tan: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::tan, Intrinsic::experimental_constrained_tan)); @@ -3115,6 +3144,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_tanhf16: case Builtin::BI__builtin_tanhl: case Builtin::BI__builtin_tanhf128: + case Builtin::BI__builtin_elementwise_tanh: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::tanh, Intrinsic::experimental_constrained_tanh)); @@ -3126,6 +3156,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_truncf16: case Builtin::BI__builtin_truncl: case Builtin::BI__builtin_truncf128: + case Builtin::BI__builtin_elementwise_trunc: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::trunc, Intrinsic::experimental_constrained_trunc)); @@ -3177,11 +3208,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_ldexpf: case Builtin::BI__builtin_ldexpl: case Builtin::BI__builtin_ldexpf16: - case Builtin::BI__builtin_ldexpf128: { + case Builtin::BI__builtin_ldexpf128: + case Builtin::BI__builtin_elementwise_ldexp: return RValue::get(emitBinaryExpMaybeConstrainedFPBuiltin( *this, E, Intrinsic::ldexp, Intrinsic::experimental_constrained_ldexp)); - } default: break; } @@ -3977,100 +4008,18 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(Result); } - case Builtin::BI__builtin_elementwise_acos: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::acos, "elt.acos")); - case Builtin::BI__builtin_elementwise_asin: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::asin, "elt.asin")); - case Builtin::BI__builtin_elementwise_atan: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::atan, "elt.atan")); - case Builtin::BI__builtin_elementwise_atan2: - return RValue::get(emitBuiltinWithOneOverloadedType<2>( - *this, E, Intrinsic::atan2, "elt.atan2")); - case Builtin::BI__builtin_elementwise_ceil: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::ceil, "elt.ceil")); - case Builtin::BI__builtin_elementwise_exp: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::exp, "elt.exp")); - case Builtin::BI__builtin_elementwise_exp2: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::exp2, "elt.exp2")); - case Builtin::BI__builtin_elementwise_exp10: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::exp10, "elt.exp10")); - case Builtin::BI__builtin_elementwise_ldexp: { - Value *Src = EmitScalarExpr(E->getArg(0)); - Value *Exp = EmitScalarExpr(E->getArg(1)); - Value *Result = Builder.CreateLdexp(Src, Exp, {}, "elt.ldexp"); - return RValue::get(Result); - } - case Builtin::BI__builtin_elementwise_log: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::log, "elt.log")); - case Builtin::BI__builtin_elementwise_log2: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::log2, "elt.log2")); - case Builtin::BI__builtin_elementwise_log10: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::log10, "elt.log10")); - case Builtin::BI__builtin_elementwise_pow: { - return RValue::get( - emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::pow)); - } case Builtin::BI__builtin_elementwise_bitreverse: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, Intrinsic::bitreverse, "elt.bitreverse")); - case Builtin::BI__builtin_elementwise_cos: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::cos, "elt.cos")); - case Builtin::BI__builtin_elementwise_cosh: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::cosh, "elt.cosh")); - case Builtin::BI__builtin_elementwise_floor: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::floor, "elt.floor")); case Builtin::BI__builtin_elementwise_popcount: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, Intrinsic::ctpop, "elt.ctpop")); - case Builtin::BI__builtin_elementwise_roundeven: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::roundeven, "elt.roundeven")); - case Builtin::BI__builtin_elementwise_round: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::round, "elt.round")); - case Builtin::BI__builtin_elementwise_rint: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::rint, "elt.rint")); - case Builtin::BI__builtin_elementwise_nearbyint: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::nearbyint, "elt.nearbyint")); - case Builtin::BI__builtin_elementwise_sin: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::sin, "elt.sin")); - case Builtin::BI__builtin_elementwise_sinh: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::sinh, "elt.sinh")); - case Builtin::BI__builtin_elementwise_tan: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::tan, "elt.tan")); - case Builtin::BI__builtin_elementwise_tanh: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::tanh, "elt.tanh")); - case Builtin::BI__builtin_elementwise_trunc: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::trunc, "elt.trunc")); case Builtin::BI__builtin_elementwise_canonicalize: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, Intrinsic::canonicalize, "elt.canonicalize")); case Builtin::BI__builtin_elementwise_copysign: return RValue::get( emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::copysign)); - case Builtin::BI__builtin_elementwise_fma: - return RValue::get( - emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::fma)); case Builtin::BI__builtin_elementwise_fshl: return RValue::get( emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::fshl)); diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 4aef9245323fb..3e1618ed192c8 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -147,9 +147,8 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sub_ps(__m256 __a, /// A 256-bit vector of [4 x double] containing the right source operand. /// \returns A 256-bit vector of [4 x double] containing the alternating sums /// and differences between both operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_addsub_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_addsub_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b); } @@ -166,9 +165,8 @@ _mm256_addsub_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing the right source operand. /// \returns A 256-bit vector of [8 x float] containing the alternating sums and /// differences between both operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_addsub_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_addsub_ps(__m256 __a, __m256 __b) { return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b); } diff --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h index 6b152bde29fc1..a9a65440363c3 100644 --- a/clang/lib/Headers/pmmintrin.h +++ b/clang/lib/Headers/pmmintrin.h @@ -60,9 +60,8 @@ _mm_lddqu_si128(__m128i_u const *__p) /// A 128-bit vector of [4 x float] containing the right source operand. /// \returns A 128-bit vector of [4 x float] containing the alternating sums and /// differences of both operands. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_addsub_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_addsub_ps(__m128 __a, __m128 __b) { return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b); } @@ -166,7 +165,7 @@ _mm_moveldup_ps(__m128 __a) /// A 128-bit vector of [2 x double] containing the right source operand. /// \returns A 128-bit vector of [2 x double] containing the alternating sums /// and differences of both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_addsub_pd(__m128d __a, __m128d __b) { return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b); } diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 737febbc7fef6..46bc28b85d8db 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -45,12 +45,14 @@ __m256d test_mm256_addsub_pd(__m256d A, __m256d B) { // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_addsub_pd(A, B); } +TEST_CONSTEXPR(match_m256d(_mm256_addsub_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+1.0, +1.0, +1.0, +1.0}), +0.0, +3.0, +2.0, +5.0)); __m256 test_mm256_addsub_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_addsub_ps // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_addsub_ps(A, B); } +TEST_CONSTEXPR(match_m256(_mm256_addsub_ps((__m256){+1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}, (__m256){+1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f}), +0.0f, +3.0f, +2.0f, +5.0f, +4.0f, +7.0f, +6.0f, +9.0f)); __m256d test_mm256_and_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_and_pd diff --git a/clang/test/CodeGen/X86/sse3-builtins.c b/clang/test/CodeGen/X86/sse3-builtins.c index a82dd4080670b..44389fbdc6f77 100644 --- a/clang/test/CodeGen/X86/sse3-builtins.c +++ b/clang/test/CodeGen/X86/sse3-builtins.c @@ -19,12 +19,14 @@ __m128d test_mm_addsub_pd(__m128d A, __m128d B) { // CHECK: call {{.*}}<2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_addsub_pd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_addsub_pd((__m128d){+2.0, +2.0}, (__m128d){+1.0, +2.0}), +1.0, +4.0)); __m128 test_mm_addsub_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_addsub_ps // CHECK: call {{.*}}<4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_addsub_ps(A, B); } +TEST_CONSTEXPR(match_m128(_mm_addsub_ps((__m128){+3.0f, +4.0f, +5.0f, +6.0f}, (__m128){+1.0f, +2.0f, +3.0f, +4.0f}), +2.0f, +6.0f, +2.0f, +10.0f)); __m128d test_mm_hadd_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_hadd_pd diff --git a/clang/test/CodeGen/strictfp-elementwise-builtins.cpp b/clang/test/CodeGen/strictfp-elementwise-builtins.cpp index b250512efc5c7..6453d50f044aa 100644 --- a/clang/test/CodeGen/strictfp-elementwise-builtins.cpp +++ b/clang/test/CodeGen/strictfp-elementwise-builtins.cpp @@ -68,180 +68,170 @@ float4 strict_elementwise_minimum(float4 a, float4 b) { } // CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_ceilDv4_f -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_CEIL:%.*]] = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> [[A]]) #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[ELT_CEIL]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x float> [[A]], metadata !"fpexcept.strict") #[[ATTR4]] +// CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_ceil(float4 a) { return __builtin_elementwise_ceil(a); } // CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_acosDv4_f -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_ACOS:%.*]] = tail call <4 x float> @llvm.acos.v4f32(<4 x float> [[A]]) #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[ELT_ACOS]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.acos.v4f32(<4 x float> [[A]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_acos(float4 a) { return __builtin_elementwise_acos(a); } // CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_cosDv4_f -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_COS:%.*]] = tail call <4 x float> @llvm.cos.v4f32(<4 x float> [[A]]) #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[ELT_COS]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.cos.v4f32(<4 x float> [[A]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_cos(float4 a) { return __builtin_elementwise_cos(a); } // CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_coshDv4_f -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_COSH:%.*]] = tail call <4 x float> @llvm.cosh.v4f32(<4 x float> [[A]]) #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[ELT_COSH]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.cosh.v4f32(<4 x float> [[A]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_cosh(float4 a) { return __builtin_elementwise_cosh(a); } // CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_expDv4_f -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_EXP:%.*]] = tail call <4 x float> @llvm.exp.v4f32(<4 x float> [[A]]) #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[ELT_EXP]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.exp.v4f32(<4 x float> [[A]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_exp(float4 a) { return __builtin_elementwise_exp(a); } // CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_exp2Dv4_f -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_EXP2:%.*]] = tail call <4 x float> @llvm.exp2.v4f32(<4 x float> [[A]]) #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[ELT_EXP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.exp2.v4f32(<4 x float> [[A]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_exp2(float4 a) { return __builtin_elementwise_exp2(a); } -// CHECK-LABEL: define dso_local noundef <4 x float> @_Z24strict_elementwise_exp10Dv4_f -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_EXP10:%.*]] = tail call <4 x float> @llvm.exp10.v4f32(<4 x float> [[A]]) #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[ELT_EXP10]] -// -float4 strict_elementwise_exp10(float4 a) { - return __builtin_elementwise_exp10(a); -} - // CHECK-LABEL: define dso_local noundef <4 x float> @_Z24strict_elementwise_floorDv4_f -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_FLOOR:%.*]] = tail call <4 x float> @llvm.floor.v4f32(<4 x float> [[A]]) #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[ELT_FLOOR]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float> [[A]], metadata !"fpexcept.strict") #[[ATTR4]] +// CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_floor(float4 a) { return __builtin_elementwise_floor(a); } // CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_logDv4_f -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_LOG:%.*]] = tail call <4 x float> @llvm.log.v4f32(<4 x float> [[A]]) #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[ELT_LOG]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.log.v4f32(<4 x float> [[A]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_log(float4 a) { return __builtin_elementwise_log(a); } // CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_log2Dv4_f -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_LOG2:%.*]] = tail call <4 x float> @llvm.log2.v4f32(<4 x float> [[A]]) #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[ELT_LOG2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.log2.v4f32(<4 x float> [[A]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_log2(float4 a) { return __builtin_elementwise_log2(a); } // CHECK-LABEL: define dso_local noundef <4 x float> @_Z24strict_elementwise_log10Dv4_f -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_LOG2:%.*]] = tail call <4 x float> @llvm.log2.v4f32(<4 x float> [[A]]) #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[ELT_LOG2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.log2.v4f32(<4 x float> [[A]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_log10(float4 a) { return __builtin_elementwise_log2(a); } // CHECK-LABEL: define dso_local noundef <4 x float> @_Z28strict_elementwise_roundevenDv4_f -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_ROUNDEVEN:%.*]] = tail call <4 x float> @llvm.roundeven.v4f32(<4 x float> [[A]]) #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[ELT_ROUNDEVEN]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.roundeven.v4f32(<4 x float> [[A]], metadata !"fpexcept.strict") #[[ATTR4]] +// CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_roundeven(float4 a) { return __builtin_elementwise_roundeven(a); } // CHECK-LABEL: define dso_local noundef <4 x float> @_Z24strict_elementwise_roundDv4_f -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_ROUND:%.*]] = tail call <4 x float> @llvm.round.v4f32(<4 x float> [[A]]) #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[ELT_ROUND]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.round.v4f32(<4 x float> [[A]], metadata !"fpexcept.strict") #[[ATTR4]] +// CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_round(float4 a) { return __builtin_elementwise_round(a); } // CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_rintDv4_f -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_RINT:%.*]] = tail call <4 x float> @llvm.rint.v4f32(<4 x float> [[A]]) #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[ELT_RINT]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.rint.v4f32(<4 x float> [[A]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_rint(float4 a) { return __builtin_elementwise_rint(a); } // CHECK-LABEL: define dso_local noundef <4 x float> @_Z28strict_elementwise_nearbyintDv4_f -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_NEARBYINT:%.*]] = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[A]]) #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[ELT_NEARBYINT]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float> [[A]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_nearbyint(float4 a) { return __builtin_elementwise_nearbyint(a); } // CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_asinDv4_f -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_ASIN:%.*]] = tail call <4 x float> @llvm.asin.v4f32(<4 x float> [[A]]) #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[ELT_ASIN]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.asin.v4f32(<4 x float> [[A]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_asin(float4 a) { return __builtin_elementwise_asin(a); } // CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_sinDv4_f -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_SIN:%.*]] = tail call <4 x float> @llvm.sin.v4f32(<4 x float> [[A]]) #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[ELT_SIN]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.sin.v4f32(<4 x float> [[A]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_sin(float4 a) { return __builtin_elementwise_sin(a); } // CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_sinhDv4_f -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_SINH:%.*]] = tail call <4 x float> @llvm.sinh.v4f32(<4 x float> [[A]]) #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[ELT_SINH]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.sinh.v4f32(<4 x float> [[A]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_sinh(float4 a) { return __builtin_elementwise_sinh(a); @@ -258,79 +248,59 @@ float4 strict_elementwise_sqrt(float4 a) { } // CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_atanDv4_f -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_ATAN:%.*]] = tail call <4 x float> @llvm.atan.v4f32(<4 x float> [[A]]) #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[ELT_ATAN]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.atan.v4f32(<4 x float> [[A]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_atan(float4 a) { return __builtin_elementwise_atan(a); } // CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_tanDv4_f -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_TAN:%.*]] = tail call <4 x float> @llvm.tan.v4f32(<4 x float> [[A]]) #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[ELT_TAN]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.tan.v4f32(<4 x float> [[A]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_tan(float4 a) { return __builtin_elementwise_tan(a); } // CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_tanhDv4_f -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_TANH:%.*]] = tail call <4 x float> @llvm.tanh.v4f32(<4 x float> [[A]]) #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[ELT_TANH]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.tanh.v4f32(<4 x float> [[A]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_tanh(float4 a) { return __builtin_elementwise_tanh(a); } // CHECK-LABEL: define dso_local noundef <4 x float> @_Z24strict_elementwise_atan2Dv4_fS_ -// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_ATAN2:%.*]] = tail call <4 x float> @llvm.atan2.v4f32(<4 x float> [[A]], <4 x float> [[B]]) #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[ELT_ATAN2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.atan2.v4f32(<4 x float> [[A]], <4 x float> [[B]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_atan2(float4 a, float4 b) { return __builtin_elementwise_atan2(a, b); } // CHECK-LABEL: define dso_local noundef <4 x float> @_Z24strict_elementwise_truncDv4_f -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_TRUNC:%.*]] = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> [[A]]) #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[ELT_TRUNC]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float> [[A]], metadata !"fpexcept.strict") #[[ATTR4]] +// CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_trunc(float4 a) { return __builtin_elementwise_trunc(a); } -// CHECK-LABEL: define dso_local noundef <4 x float> @_Z31strict_elementwise_canonicalizeDv4_f -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_CANONICALIZE:%.*]] = tail call <4 x float> @llvm.canonicalize.v4f32(<4 x float> [[A]]) #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[ELT_CANONICALIZE]] -// -float4 strict_elementwise_canonicalize(float4 a) { - return __builtin_elementwise_canonicalize(a); -} - -// CHECK-LABEL: define dso_local noundef <4 x float> @_Z27strict_elementwise_copysignDv4_fS_ -// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.copysign.v4f32(<4 x float> [[A]], <4 x float> [[B]]) #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[TMP0]] -// -float4 strict_elementwise_copysign(float4 a, float4 b) { - return __builtin_elementwise_copysign(a, b); -} - // CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_fmaDv4_fS_S_ -// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.fma.v4f32(<4 x float> [[A]], <4 x float> [[B]], <4 x float> [[C]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> [[A]], <4 x float> [[B]], <4 x float> [[C]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] // CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_fma(float4 a, float4 b, float4 c) { @@ -338,9 +308,9 @@ float4 strict_elementwise_fma(float4 a, float4 b, float4 c) { } // CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_powDv4_fS_ -// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.pow.v4f32(<4 x float> [[A]], <4 x float> [[B]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.pow.v4f32(<4 x float> [[A]], <4 x float> [[B]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] // CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_pow(float4 a, float4 b) { @@ -350,8 +320,8 @@ float4 strict_elementwise_pow(float4 a, float4 b) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_fmodDv4_fS_ // CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[FMOD:%.*]] = tail call <4 x float> @llvm.experimental.constrained.frem.v4f32(<4 x float> [[A]], <4 x float> [[B]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[FMOD]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.frem.v4f32(<4 x float> [[A]], <4 x float> [[B]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_fmod(float4 a, float4 b) { return __builtin_elementwise_fmod(a, b); diff --git a/clang/test/CodeGenHLSL/builtins/exp-overloads.hlsl b/clang/test/CodeGenHLSL/builtins/exp-overloads.hlsl index df34beeba7a8c..c22f012421e3a 100644 --- a/clang/test/CodeGenHLSL/builtins/exp-overloads.hlsl +++ b/clang/test/CodeGenHLSL/builtins/exp-overloads.hlsl @@ -3,86 +3,86 @@ // RUN: FileCheck %s --check-prefixes=CHECK // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float {{.*}}test_exp_double -// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn float @llvm.exp.f32( -// CHECK: ret float %elt.exp +// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.exp.f32( +// CHECK: ret float [[EXP]] float test_exp_double(double p0) { return exp(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> {{.*}}test_exp_double2 -// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp.v2f32 -// CHECK: ret <2 x float> %elt.exp +// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp.v2f32 +// CHECK: ret <2 x float> [[EXP]] float2 test_exp_double2(double2 p0) { return exp(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> {{.*}}test_exp_double3 -// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp.v3f32 -// CHECK: ret <3 x float> %elt.exp +// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp.v3f32 +// CHECK: ret <3 x float> [[EXP]] float3 test_exp_double3(double3 p0) { return exp(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> {{.*}}test_exp_double4 -// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp.v4f32 -// CHECK: ret <4 x float> %elt.exp +// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp.v4f32 +// CHECK: ret <4 x float> [[EXP]] float4 test_exp_double4(double4 p0) { return exp(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float {{.*}}test_exp_int -// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn float @llvm.exp.f32( -// CHECK: ret float %elt.exp +// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.exp.f32( +// CHECK: ret float [[EXP]] float test_exp_int(int p0) { return exp(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> {{.*}}test_exp_int2 -// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp.v2f32 -// CHECK: ret <2 x float> %elt.exp +// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp.v2f32 +// CHECK: ret <2 x float> [[EXP]] float2 test_exp_int2(int2 p0) { return exp(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> {{.*}}test_exp_int3 -// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp.v3f32 -// CHECK: ret <3 x float> %elt.exp +// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp.v3f32 +// CHECK: ret <3 x float> [[EXP]] float3 test_exp_int3(int3 p0) { return exp(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> {{.*}}test_exp_int4 -// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp.v4f32 -// CHECK: ret <4 x float> %elt.exp +// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp.v4f32 +// CHECK: ret <4 x float> [[EXP]] float4 test_exp_int4(int4 p0) { return exp(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float {{.*}}test_exp_uint -// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn float @llvm.exp.f32( -// CHECK: ret float %elt.exp +// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.exp.f32( +// CHECK: ret float [[EXP]] float test_exp_uint(uint p0) { return exp(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> {{.*}}test_exp_uint2 -// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp.v2f32 -// CHECK: ret <2 x float> %elt.exp +// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp.v2f32 +// CHECK: ret <2 x float> [[EXP]] float2 test_exp_uint2(uint2 p0) { return exp(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> {{.*}}test_exp_uint3 -// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp.v3f32 -// CHECK: ret <3 x float> %elt.exp +// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp.v3f32 +// CHECK: ret <3 x float> [[EXP]] float3 test_exp_uint3(uint3 p0) { return exp(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> {{.*}}test_exp_uint4 -// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp.v4f32 -// CHECK: ret <4 x float> %elt.exp +// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp.v4f32 +// CHECK: ret <4 x float> [[EXP]] float4 test_exp_uint4(uint4 p0) { return exp(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float {{.*}}test_exp_int64_t -// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn float @llvm.exp.f32( -// CHECK: ret float %elt.exp +// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.exp.f32( +// CHECK: ret float [[EXP]] float test_exp_int64_t(int64_t p0) { return exp(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> {{.*}}test_exp_int64_t2 -// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp.v2f32 -// CHECK: ret <2 x float> %elt.exp +// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp.v2f32 +// CHECK: ret <2 x float> [[EXP]] float2 test_exp_int64_t2(int64_t2 p0) { return exp(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> {{.*}}test_exp_int64_t3 -// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp.v3f32 -// CHECK: ret <3 x float> %elt.exp +// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp.v3f32 +// CHECK: ret <3 x float> [[EXP]] float3 test_exp_int64_t3(int64_t3 p0) { return exp(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> {{.*}}test_exp_int64_t4 -// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp.v4f32 -// CHECK: ret <4 x float> %elt.exp +// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp.v4f32 +// CHECK: ret <4 x float> [[EXP]] float4 test_exp_int64_t4(int64_t4 p0) { return exp(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float {{.*}}test_exp_uint64_t -// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn float @llvm.exp.f32( -// CHECK: ret float %elt.exp +// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.exp.f32( +// CHECK: ret float [[EXP]] float test_exp_uint64_t(uint64_t p0) { return exp(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> {{.*}}test_exp_uint64_t2 -// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp.v2f32 -// CHECK: ret <2 x float> %elt.exp +// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp.v2f32 +// CHECK: ret <2 x float> [[EXP]] float2 test_exp_uint64_t2(uint64_t2 p0) { return exp(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> {{.*}}test_exp_uint64_t3 -// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp.v3f32 -// CHECK: ret <3 x float> %elt.exp +// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp.v3f32 +// CHECK: ret <3 x float> [[EXP]] float3 test_exp_uint64_t3(uint64_t3 p0) { return exp(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> {{.*}}test_exp_uint64_t4 -// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp.v4f32 -// CHECK: ret <4 x float> %elt.exp +// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp.v4f32 +// CHECK: ret <4 x float> [[EXP]] float4 test_exp_uint64_t4(uint64_t4 p0) { return exp(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/exp.hlsl b/clang/test/CodeGenHLSL/builtins/exp.hlsl index d50ef021eecb8..56efb03d1f98b 100644 --- a/clang/test/CodeGenHLSL/builtins/exp.hlsl +++ b/clang/test/CodeGenHLSL/builtins/exp.hlsl @@ -6,47 +6,47 @@ // RUN: FileCheck %s --check-prefixes=CHECK,NO_HALF // NATIVE_HALF-LABEL: define hidden noundef nofpclass(nan inf) half @_Z13test_exp_half -// NATIVE_HALF: %elt.exp = call reassoc nnan ninf nsz arcp afn half @llvm.exp.f16( -// NATIVE_HALF: ret half %elt.exp +// NATIVE_HALF: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.exp.f16( +// NATIVE_HALF: ret half [[EXP]] // NO_HALF-LABEL: define hidden noundef nofpclass(nan inf) float @_Z13test_exp_half -// NO_HALF: %elt.exp = call reassoc nnan ninf nsz arcp afn float @llvm.exp.f32( -// NO_HALF: ret float %elt.exp +// NO_HALF: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.exp.f32( +// NO_HALF: ret float [[EXP]] half test_exp_half(half p0) { return exp(p0); } // NATIVE_HALF-LABEL: define hidden noundef nofpclass(nan inf) <2 x half> @_Z14test_exp_half2 -// NATIVE_HALF: %elt.exp = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.exp.v2f16 -// NATIVE_HALF: ret <2 x half> %elt.exp +// NATIVE_HALF: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.exp.v2f16 +// NATIVE_HALF: ret <2 x half> [[EXP]] // NO_HALF-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> @_Z14test_exp_half2 -// NO_HALF: %elt.exp = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp.v2f32( -// NO_HALF: ret <2 x float> %elt.exp +// NO_HALF: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp.v2f32( +// NO_HALF: ret <2 x float> [[EXP]] half2 test_exp_half2(half2 p0) { return exp(p0); } // NATIVE_HALF-LABEL: define hidden noundef nofpclass(nan inf) <3 x half> @_Z14test_exp_half3 -// NATIVE_HALF: %elt.exp = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.exp.v3f16 -// NATIVE_HALF: ret <3 x half> %elt.exp +// NATIVE_HALF: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.exp.v3f16 +// NATIVE_HALF: ret <3 x half> [[EXP]] // NO_HALF-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> @_Z14test_exp_half3 -// NO_HALF: %elt.exp = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp.v3f32( -// NO_HALF: ret <3 x float> %elt.exp +// NO_HALF: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp.v3f32( +// NO_HALF: ret <3 x float> [[EXP]] half3 test_exp_half3(half3 p0) { return exp(p0); } // NATIVE_HALF-LABEL: define hidden noundef nofpclass(nan inf) <4 x half> @_Z14test_exp_half4 -// NATIVE_HALF: %elt.exp = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.exp.v4f16 -// NATIVE_HALF: ret <4 x half> %elt.exp +// NATIVE_HALF: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.exp.v4f16 +// NATIVE_HALF: ret <4 x half> [[EXP]] // NO_HALF-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> @_Z14test_exp_half4 -// NO_HALF: %elt.exp = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp.v4f32( -// NO_HALF: ret <4 x float> %elt.exp +// NO_HALF: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp.v4f32( +// NO_HALF: ret <4 x float> [[EXP]] half4 test_exp_half4(half4 p0) { return exp(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z14test_exp_float -// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn float @llvm.exp.f32( -// CHECK: ret float %elt.exp +// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.exp.f32( +// CHECK: ret float [[EXP:%.*]] float test_exp_float(float p0) { return exp(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> @_Z15test_exp_float2 -// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp.v2f32 -// CHECK: ret <2 x float> %elt.exp +// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp.v2f32 +// CHECK: ret <2 x float> [[EXP]] float2 test_exp_float2(float2 p0) { return exp(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> @_Z15test_exp_float3 -// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp.v3f32 -// CHECK: ret <3 x float> %elt.exp +// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp.v3f32 +// CHECK: ret <3 x float> [[EXP]] float3 test_exp_float3(float3 p0) { return exp(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> @_Z15test_exp_float4 -// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp.v4f32 -// CHECK: ret <4 x float> %elt.exp +// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp.v4f32 +// CHECK: ret <4 x float> [[EXP]] float4 test_exp_float4(float4 p0) { return exp(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/exp2-overloads.hlsl b/clang/test/CodeGenHLSL/builtins/exp2-overloads.hlsl index 20482777a18de..a8a6f3ba76b4f 100644 --- a/clang/test/CodeGenHLSL/builtins/exp2-overloads.hlsl +++ b/clang/test/CodeGenHLSL/builtins/exp2-overloads.hlsl @@ -3,86 +3,86 @@ // RUN: FileCheck %s --check-prefixes=CHECK // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float {{.*}}test_exp2_double -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn float @llvm.exp2.f32( -// CHECK: ret float %elt.exp2 +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.exp2.f32( +// CHECK: ret float [[EXP2]] float test_exp2_double(double p0) { return exp2(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> {{.*}}test_exp2_double2 -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp2.v2f32 -// CHECK: ret <2 x float> %elt.exp2 +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp2.v2f32 +// CHECK: ret <2 x float> [[EXP2]] float2 test_exp2_double2(double2 p0) { return exp2(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> {{.*}}test_exp2_double3 -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp2.v3f32 -// CHECK: ret <3 x float> %elt.exp2 +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp2.v3f32 +// CHECK: ret <3 x float> [[EXP2]] float3 test_exp2_double3(double3 p0) { return exp2(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> {{.*}}test_exp2_double4 -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp2.v4f32 -// CHECK: ret <4 x float> %elt.exp2 +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp2.v4f32 +// CHECK: ret <4 x float> [[EXP2]] float4 test_exp2_double4(double4 p0) { return exp2(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float {{.*}}test_exp2_int -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn float @llvm.exp2.f32( -// CHECK: ret float %elt.exp2 +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.exp2.f32( +// CHECK: ret float [[EXP2]] float test_exp2_int(int p0) { return exp2(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> {{.*}}test_exp2_int2 -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp2.v2f32 -// CHECK: ret <2 x float> %elt.exp2 +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp2.v2f32 +// CHECK: ret <2 x float> [[EXP2]] float2 test_exp2_int2(int2 p0) { return exp2(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> {{.*}}test_exp2_int3 -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp2.v3f32 -// CHECK: ret <3 x float> %elt.exp2 +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp2.v3f32 +// CHECK: ret <3 x float> [[EXP2]] float3 test_exp2_int3(int3 p0) { return exp2(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> {{.*}}test_exp2_int4 -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp2.v4f32 -// CHECK: ret <4 x float> %elt.exp2 +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp2.v4f32 +// CHECK: ret <4 x float> [[EXP2]] float4 test_exp2_int4(int4 p0) { return exp2(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float {{.*}}test_exp2_uint -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn float @llvm.exp2.f32( -// CHECK: ret float %elt.exp2 +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.exp2.f32( +// CHECK: ret float [[EXP2]] float test_exp2_uint(uint p0) { return exp2(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> {{.*}}test_exp2_uint2 -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp2.v2f32 -// CHECK: ret <2 x float> %elt.exp2 +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp2.v2f32 +// CHECK: ret <2 x float> [[EXP2]] float2 test_exp2_uint2(uint2 p0) { return exp2(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> {{.*}}test_exp2_uint3 -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp2.v3f32 -// CHECK: ret <3 x float> %elt.exp2 +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp2.v3f32 +// CHECK: ret <3 x float> [[EXP2]] float3 test_exp2_uint3(uint3 p0) { return exp2(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> {{.*}}test_exp2_uint4 -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp2.v4f32 -// CHECK: ret <4 x float> %elt.exp2 +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp2.v4f32 +// CHECK: ret <4 x float> [[EXP2]] float4 test_exp2_uint4(uint4 p0) { return exp2(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float {{.*}}test_exp2_int64_t -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn float @llvm.exp2.f32( -// CHECK: ret float %elt.exp2 +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.exp2.f32( +// CHECK: ret float [[EXP2]] float test_exp2_int64_t(int64_t p0) { return exp2(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> {{.*}}test_exp2_int64_t2 -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp2.v2f32 -// CHECK: ret <2 x float> %elt.exp2 +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp2.v2f32 +// CHECK: ret <2 x float> [[EXP2]] float2 test_exp2_int64_t2(int64_t2 p0) { return exp2(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> {{.*}}test_exp2_int64_t3 -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp2.v3f32 -// CHECK: ret <3 x float> %elt.exp2 +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp2.v3f32 +// CHECK: ret <3 x float> [[EXP2]] float3 test_exp2_int64_t3(int64_t3 p0) { return exp2(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> {{.*}}test_exp2_int64_t4 -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp2.v4f32 -// CHECK: ret <4 x float> %elt.exp2 +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp2.v4f32 +// CHECK: ret <4 x float> [[EXP2]] float4 test_exp2_int64_t4(int64_t4 p0) { return exp2(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float {{.*}}test_exp2_uint64_t -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn float @llvm.exp2.f32( -// CHECK: ret float %elt.exp2 +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.exp2.f32( +// CHECK: ret float [[EXP2]] float test_exp2_uint64_t(uint64_t p0) { return exp2(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> {{.*}}test_exp2_uint64_t2 -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp2.v2f32 -// CHECK: ret <2 x float> %elt.exp2 +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp2.v2f32 +// CHECK: ret <2 x float> [[EXP2]] float2 test_exp2_uint64_t2(uint64_t2 p0) { return exp2(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> {{.*}}test_exp2_uint64_t3 -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp2.v3f32 -// CHECK: ret <3 x float> %elt.exp2 +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp2.v3f32 +// CHECK: ret <3 x float> [[EXP2]] float3 test_exp2_uint64_t3(uint64_t3 p0) { return exp2(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> {{.*}}test_exp2_uint64_t4 -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp2.v4f32 -// CHECK: ret <4 x float> %elt.exp2 +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp2.v4f32 +// CHECK: ret <4 x float> [[EXP2]] float4 test_exp2_uint64_t4(uint64_t4 p0) { return exp2(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/exp2.hlsl b/clang/test/CodeGenHLSL/builtins/exp2.hlsl index ed8cfcf47b04b..b4d9c411681d1 100644 --- a/clang/test/CodeGenHLSL/builtins/exp2.hlsl +++ b/clang/test/CodeGenHLSL/builtins/exp2.hlsl @@ -6,47 +6,47 @@ // RUN: FileCheck %s --check-prefixes=CHECK,NO_HALF // NATIVE_HALF-LABEL: define hidden noundef nofpclass(nan inf) half @_Z14test_exp2_half -// NATIVE_HALF: %elt.exp2 = call reassoc nnan ninf nsz arcp afn half @llvm.exp2.f16( -// NATIVE_HALF: ret half %elt.exp2 +// NATIVE_HALF: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.exp2.f16( +// NATIVE_HALF: ret half [[EXP2]] // NO_HALF-LABEL: define hidden noundef nofpclass(nan inf) float @_Z14test_exp2_half -// NO_HALF: %elt.exp2 = call reassoc nnan ninf nsz arcp afn float @llvm.exp2.f32( -// NO_HALF: ret float %elt.exp2 +// NO_HALF: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.exp2.f32( +// NO_HALF: ret float [[EXP2]] half test_exp2_half(half p0) { return exp2(p0); } // NATIVE_HALF-LABEL: define hidden noundef nofpclass(nan inf) <2 x half> @_Z15test_exp2_half2 -// NATIVE_HALF: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.exp2.v2f16 -// NATIVE_HALF: ret <2 x half> %elt.exp2 +// NATIVE_HALF: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.exp2.v2f16 +// NATIVE_HALF: ret <2 x half> [[EXP2]] // NO_HALF-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> @_Z15test_exp2_half2 -// NO_HALF: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp2.v2f32( -// NO_HALF: ret <2 x float> %elt.exp2 +// NO_HALF: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp2.v2f32( +// NO_HALF: ret <2 x float> [[EXP2]] half2 test_exp2_half2(half2 p0) { return exp2(p0); } // NATIVE_HALF-LABEL: define hidden noundef nofpclass(nan inf) <3 x half> @_Z15test_exp2_half3 -// NATIVE_HALF: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.exp2.v3f16 -// NATIVE_HALF: ret <3 x half> %elt.exp2 +// NATIVE_HALF: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.exp2.v3f16 +// NATIVE_HALF: ret <3 x half> [[EXP2]] // NO_HALF-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> @_Z15test_exp2_half3 -// NO_HALF: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp2.v3f32( -// NO_HALF: ret <3 x float> %elt.exp2 +// NO_HALF: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp2.v3f32( +// NO_HALF: ret <3 x float> [[EXP2]] half3 test_exp2_half3(half3 p0) { return exp2(p0); } // NATIVE_HALF-LABEL: define hidden noundef nofpclass(nan inf) <4 x half> @_Z15test_exp2_half4 -// NATIVE_HALF: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.exp2.v4f16 -// NATIVE_HALF: ret <4 x half> %elt.exp2 +// NATIVE_HALF: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.exp2.v4f16 +// NATIVE_HALF: ret <4 x half> [[EXP2]] // NO_HALF-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> @_Z15test_exp2_half4 -// NO_HALF: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp2.v4f32( -// NO_HALF: ret <4 x float> %elt.exp2 +// NO_HALF: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp2.v4f32( +// NO_HALF: ret <4 x float> [[EXP2]] half4 test_exp2_half4(half4 p0) { return exp2(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z15test_exp2_float -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn float @llvm.exp2.f32( -// CHECK: ret float %elt.exp2 +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.exp2.f32( +// CHECK: ret float [[EXP2]] float test_exp2_float(float p0) { return exp2(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> @_Z16test_exp2_float2 -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp2.v2f32 -// CHECK: ret <2 x float> %elt.exp2 +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp2.v2f32 +// CHECK: ret <2 x float> [[EXP2]] float2 test_exp2_float2(float2 p0) { return exp2(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> @_Z16test_exp2_float3 -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp2.v3f32 -// CHECK: ret <3 x float> %elt.exp2 +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp2.v3f32 +// CHECK: ret <3 x float> [[EXP2]] float3 test_exp2_float3(float3 p0) { return exp2(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> @_Z16test_exp2_float4 -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp2.v4f32 -// CHECK: ret <4 x float> %elt.exp2 +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp2.v4f32 +// CHECK: ret <4 x float> [[EXP2]] float4 test_exp2_float4(float4 p0) { return exp2(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/ldexp.hlsl b/clang/test/CodeGenHLSL/builtins/ldexp.hlsl index 012adc588ddfa..2dec126788956 100644 --- a/clang/test/CodeGenHLSL/builtins/ldexp.hlsl +++ b/clang/test/CodeGenHLSL/builtins/ldexp.hlsl @@ -1,49 +1,49 @@ // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s -fnative-half-type -fnative-int16-type -emit-llvm -disable-llvm-passes -o - | FileCheck %s // CHECK-LABEL: define linkonce_odr hidden noundef nofpclass(nan inf) half @_ZN4hlsl8__detail10ldexp_implIDhEET_S2_S2_ -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn half @llvm.exp2.f16(half %{{.*}}) -// CHECK: %mul = fmul reassoc nnan ninf nsz arcp afn half %elt.exp2, %{{.*}} +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.exp2.f16(half %{{.*}}) +// CHECK: %mul = fmul reassoc nnan ninf nsz arcp afn half [[EXP2]], %{{.*}} // CHECK: ret half %mul half test_ldexp_half(half X, half Exp) { return ldexp(X, Exp); } // CHECK-LABEL: define linkonce_odr hidden noundef nofpclass(nan inf) <2 x half> @_ZN4hlsl8__detail10ldexp_implIDv2_DhEET_S3_S3_ -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.exp2.v2f16(<2 x half> %{{.*}}) -// CHECK: %mul = fmul reassoc nnan ninf nsz arcp afn <2 x half> %elt.exp2, %{{.*}} +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.exp2.v2f16(<2 x half> %{{.*}}) +// CHECK: %mul = fmul reassoc nnan ninf nsz arcp afn <2 x half> [[EXP2]], %{{.*}} // CHECK: ret <2 x half> %mul half2 test_ldexp_half2(half2 X, half2 Exp) { return ldexp(X, Exp); } // CHECK-LABEL: define linkonce_odr hidden noundef nofpclass(nan inf) <3 x half> @_ZN4hlsl8__detail10ldexp_implIDv3_DhEET_S3_S3_ -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.exp2.v3f16(<3 x half> %{{.*}}) -// CHECK: %mul = fmul reassoc nnan ninf nsz arcp afn <3 x half> %elt.exp2, %{{.*}} +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.exp2.v3f16(<3 x half> %{{.*}}) +// CHECK: %mul = fmul reassoc nnan ninf nsz arcp afn <3 x half> [[EXP2]], %{{.*}} // CHECK: ret <3 x half> %mul half3 test_ldexp_half3(half3 X, half3 Exp) { return ldexp(X, Exp); } // CHECK-LABEL: define linkonce_odr hidden noundef nofpclass(nan inf) <4 x half> @_ZN4hlsl8__detail10ldexp_implIDv4_DhEET_S3_S3_ -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.exp2.v4f16(<4 x half> %{{.*}}) -// CHECK: %mul = fmul reassoc nnan ninf nsz arcp afn <4 x half> %elt.exp2, %{{.*}} +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.exp2.v4f16(<4 x half> %{{.*}}) +// CHECK: %mul = fmul reassoc nnan ninf nsz arcp afn <4 x half> [[EXP2]], %{{.*}} // CHECK: ret <4 x half> %mul half4 test_ldexp_half4(half4 X, half4 Exp) { return ldexp(X, Exp); } // CHECK-LABEL: define linkonce_odr hidden noundef nofpclass(nan inf) float @_ZN4hlsl8__detail10ldexp_implIfEET_S2_S2_ -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn float @llvm.exp2.f32(float %{{.*}}) -// CHECK: %mul = fmul reassoc nnan ninf nsz arcp afn float %elt.exp2, %{{.*}} +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.exp2.f32(float %{{.*}}) +// CHECK: %mul = fmul reassoc nnan ninf nsz arcp afn float [[EXP2]], %{{.*}} // CHECK: ret float %mul float test_ldexp_float(float X, float Exp) { return ldexp(X, Exp); } // CHECK-LABEL: define linkonce_odr hidden noundef nofpclass(nan inf) <2 x float> @_ZN4hlsl8__detail10ldexp_implIDv2_fEET_S3_S3_ -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp2.v2f32(<2 x float> %{{.*}}) -// CHECK: %mul = fmul reassoc nnan ninf nsz arcp afn <2 x float> %elt.exp2, %{{.*}} +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp2.v2f32(<2 x float> %{{.*}}) +// CHECK: %mul = fmul reassoc nnan ninf nsz arcp afn <2 x float> [[EXP2]], %{{.*}} // CHECK: ret <2 x float> %mul float2 test_ldexp_float2(float2 X, float2 Exp) { return ldexp(X, Exp); } // CHECK-LABEL: define linkonce_odr hidden noundef nofpclass(nan inf) <3 x float> @_ZN4hlsl8__detail10ldexp_implIDv3_fEET_S3_S3_ -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp2.v3f32(<3 x float> %{{.*}}) -// CHECK: %mul = fmul reassoc nnan ninf nsz arcp afn <3 x float> %elt.exp2, %{{.*}} +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp2.v3f32(<3 x float> %{{.*}}) +// CHECK: %mul = fmul reassoc nnan ninf nsz arcp afn <3 x float> [[EXP2]], %{{.*}} // CHECK: ret <3 x float> %mul float3 test_ldexp_float3(float3 X, float3 Exp) { return ldexp(X, Exp); } // CHECK-LABEL: define linkonce_odr hidden noundef nofpclass(nan inf) <4 x float> @_ZN4hlsl8__detail10ldexp_implIDv4_fEET_S3_S3_ -// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp2.v4f32(<4 x float> %{{.*}}) -// CHECK: %mul = fmul reassoc nnan ninf nsz arcp afn <4 x float> %elt.exp2, %{{.*}} +// CHECK: [[EXP2:%.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp2.v4f32(<4 x float> %{{.*}}) +// CHECK: %mul = fmul reassoc nnan ninf nsz arcp afn <4 x float> [[EXP2]], %{{.*}} // CHECK: ret <4 x float> %mul float4 test_ldexp_float4(float4 X, float4 Exp) { return ldexp(X, Exp); } diff --git a/clang/test/CodeGenHLSL/builtins/lit.hlsl b/clang/test/CodeGenHLSL/builtins/lit.hlsl index c0b109a75906b..b7979960de9f6 100644 --- a/clang/test/CodeGenHLSL/builtins/lit.hlsl +++ b/clang/test/CodeGenHLSL/builtins/lit.hlsl @@ -7,9 +7,9 @@ // CHECK: %vecinit2.i = insertelement <4 x half> %{{.*}}, half 0xH3C00, i32 3 // CHECK: %cmp4.i = fcmp reassoc nnan ninf nsz arcp afn olt half %{{.*}}, 0xH0000 // CHECK: %hlsl.or.i = or i1 %{{.*}}, %cmp4.i -// CHECK: %elt.log.i = call reassoc nnan ninf nsz arcp afn half @llvm.log.f16(half %{{.*}}) -// CHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn half %elt.log.i, %{{.*}} -// CHECK: %elt.exp.i = call reassoc nnan ninf nsz arcp afn half @llvm.exp.f16(half %mul.i) +// CHECK: [[LOG:%.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.log.f16(half %{{.*}}) +// CHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn half [[LOG]], %{{.*}} +// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.exp.f16(half %mul.i) // CHECK: %hlsl.select7.i = select reassoc nnan ninf nsz arcp afn i1 %{{.*}}, half 0xH0000, half %{{.*}} // CHECK: %vecins.i = insertelement <4 x half> %{{.*}}, half %hlsl.select7.i, i32 2 // CHECK: ret <4 x half> %{{.*}} @@ -22,9 +22,9 @@ half4 test_lit_half(half NDotL, half NDotH, half M) { return lit(NDotL, NDotH, M // CHECK: %vecinit2.i = insertelement <4 x float> %{{.*}}, float 1.000000e+00, i32 3 // CHECK: %cmp4.i = fcmp reassoc nnan ninf nsz arcp afn olt float %{{.*}}, 0.000000e+00 // CHECK: %hlsl.or.i = or i1 %{{.*}}, %cmp4.i -// CHECK: %elt.log.i = call reassoc nnan ninf nsz arcp afn float @llvm.log.f32(float %{{.*}}) -// CHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn float %elt.log.i, %{{.*}} -// CHECK: %elt.exp.i = call reassoc nnan ninf nsz arcp afn float @llvm.exp.f32(float %mul.i) +// CHECK: [[LOG:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.log.f32(float %{{.*}}) +// CHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn float [[LOG]], %{{.*}} +// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.exp.f32(float %mul.i) // CHECK: %hlsl.select7.i = select reassoc nnan ninf nsz arcp afn i1 %{{.*}}, float 0.000000e+00, float %{{.*}} // CHECK: %vecins.i = insertelement <4 x float> %{{.*}}, float %hlsl.select7.i, i32 2 // CHECK: ret <4 x float> %{{.*}} diff --git a/clang/test/CodeGenHLSL/builtins/round-overloads.hlsl b/clang/test/CodeGenHLSL/builtins/round-overloads.hlsl index 3b07fcec064d8..5719d9d92991e 100644 --- a/clang/test/CodeGenHLSL/builtins/round-overloads.hlsl +++ b/clang/test/CodeGenHLSL/builtins/round-overloads.hlsl @@ -3,86 +3,86 @@ // RUN: FileCheck %s --check-prefixes=CHECK // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float {{.*}}test_round_double -// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn float @llvm.roundeven.f32( -// CHECK: ret float %elt.roundeven +// CHECK: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.roundeven.f32( +// CHECK: ret float [[ROUNDEVEN]] float test_round_double(double p0) { return round(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> {{.*}}test_round_double2 -// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.roundeven.v2f32 -// CHECK: ret <2 x float> %elt.roundeven +// CHECK: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.roundeven.v2f32 +// CHECK: ret <2 x float> [[ROUNDEVEN]] float2 test_round_double2(double2 p0) { return round(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> {{.*}}test_round_double3 -// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.roundeven.v3f32 -// CHECK: ret <3 x float> %elt.roundeven +// CHECK: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.roundeven.v3f32 +// CHECK: ret <3 x float> [[ROUNDEVEN]] float3 test_round_double3(double3 p0) { return round(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> {{.*}}test_round_double4 -// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.roundeven.v4f32 -// CHECK: ret <4 x float> %elt.roundeven +// CHECK: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.roundeven.v4f32 +// CHECK: ret <4 x float> [[ROUNDEVEN]] float4 test_round_double4(double4 p0) { return round(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float {{.*}}test_round_int -// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn float @llvm.roundeven.f32( -// CHECK: ret float %elt.roundeven +// CHECK: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.roundeven.f32( +// CHECK: ret float [[ROUNDEVEN]] float test_round_int(int p0) { return round(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> {{.*}}test_round_int2 -// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.roundeven.v2f32 -// CHECK: ret <2 x float> %elt.roundeven +// CHECK: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.roundeven.v2f32 +// CHECK: ret <2 x float> [[ROUNDEVEN]] float2 test_round_int2(int2 p0) { return round(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> {{.*}}test_round_int3 -// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.roundeven.v3f32 -// CHECK: ret <3 x float> %elt.roundeven +// CHECK: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.roundeven.v3f32 +// CHECK: ret <3 x float> [[ROUNDEVEN]] float3 test_round_int3(int3 p0) { return round(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> {{.*}}test_round_int4 -// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.roundeven.v4f32 -// CHECK: ret <4 x float> %elt.roundeven +// CHECK: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.roundeven.v4f32 +// CHECK: ret <4 x float> [[ROUNDEVEN]] float4 test_round_int4(int4 p0) { return round(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float {{.*}}test_round_uint -// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn float @llvm.roundeven.f32( -// CHECK: ret float %elt.roundeven +// CHECK: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.roundeven.f32( +// CHECK: ret float [[ROUNDEVEN]] float test_round_uint(uint p0) { return round(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> {{.*}}test_round_uint2 -// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.roundeven.v2f32 -// CHECK: ret <2 x float> %elt.roundeven +// CHECK: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.roundeven.v2f32 +// CHECK: ret <2 x float> [[ROUNDEVEN]] float2 test_round_uint2(uint2 p0) { return round(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> {{.*}}test_round_uint3 -// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.roundeven.v3f32 -// CHECK: ret <3 x float> %elt.roundeven +// CHECK: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.roundeven.v3f32 +// CHECK: ret <3 x float> [[ROUNDEVEN]] float3 test_round_uint3(uint3 p0) { return round(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> {{.*}}test_round_uint4 -// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.roundeven.v4f32 -// CHECK: ret <4 x float> %elt.roundeven +// CHECK: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.roundeven.v4f32 +// CHECK: ret <4 x float> [[ROUNDEVEN]] float4 test_round_uint4(uint4 p0) { return round(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float {{.*}}test_round_int64_t -// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn float @llvm.roundeven.f32( -// CHECK: ret float %elt.roundeven +// CHECK: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.roundeven.f32( +// CHECK: ret float [[ROUNDEVEN]] float test_round_int64_t(int64_t p0) { return round(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> {{.*}}test_round_int64_t2 -// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.roundeven.v2f32 -// CHECK: ret <2 x float> %elt.roundeven +// CHECK: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.roundeven.v2f32 +// CHECK: ret <2 x float> [[ROUNDEVEN]] float2 test_round_int64_t2(int64_t2 p0) { return round(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> {{.*}}test_round_int64_t3 -// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.roundeven.v3f32 -// CHECK: ret <3 x float> %elt.roundeven +// CHECK: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.roundeven.v3f32 +// CHECK: ret <3 x float> [[ROUNDEVEN]] float3 test_round_int64_t3(int64_t3 p0) { return round(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> {{.*}}test_round_int64_t4 -// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.roundeven.v4f32 -// CHECK: ret <4 x float> %elt.roundeven +// CHECK: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.roundeven.v4f32 +// CHECK: ret <4 x float> [[ROUNDEVEN]] float4 test_round_int64_t4(int64_t4 p0) { return round(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float {{.*}}test_round_uint64_t -// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn float @llvm.roundeven.f32( -// CHECK: ret float %elt.roundeven +// CHECK: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.roundeven.f32( +// CHECK: ret float [[ROUNDEVEN]] float test_round_uint64_t(uint64_t p0) { return round(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> {{.*}}test_round_uint64_t2 -// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.roundeven.v2f32 -// CHECK: ret <2 x float> %elt.roundeven +// CHECK: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.roundeven.v2f32 +// CHECK: ret <2 x float> [[ROUNDEVEN]] float2 test_round_uint64_t2(uint64_t2 p0) { return round(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> {{.*}}test_round_uint64_t3 -// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.roundeven.v3f32 -// CHECK: ret <3 x float> %elt.roundeven +// CHECK: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.roundeven.v3f32 +// CHECK: ret <3 x float> [[ROUNDEVEN]] float3 test_round_uint64_t3(uint64_t3 p0) { return round(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> {{.*}}test_round_uint64_t4 -// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.roundeven.v4f32 -// CHECK: ret <4 x float> %elt.roundeven +// CHECK: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.roundeven.v4f32 +// CHECK: ret <4 x float> [[ROUNDEVEN]] float4 test_round_uint64_t4(uint64_t4 p0) { return round(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/round.hlsl b/clang/test/CodeGenHLSL/builtins/round.hlsl index 0d4afee6ba9a8..8161b0c1c3256 100644 --- a/clang/test/CodeGenHLSL/builtins/round.hlsl +++ b/clang/test/CodeGenHLSL/builtins/round.hlsl @@ -6,47 +6,47 @@ // RUN: FileCheck %s --check-prefixes=CHECK,NO_HALF // NATIVE_HALF-LABEL: define hidden noundef nofpclass(nan inf) half @_Z15test_round_half -// NATIVE_HALF: %elt.roundeven = call reassoc nnan ninf nsz arcp afn half @llvm.roundeven.f16( -// NATIVE_HALF: ret half %elt.roundeven +// NATIVE_HALF: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.roundeven.f16( +// NATIVE_HALF: ret half [[ROUNDEVEN]] // NO_HALF-LABEL: define hidden noundef nofpclass(nan inf) float @_Z15test_round_half -// NO_HALF: %elt.roundeven = call reassoc nnan ninf nsz arcp afn float @llvm.roundeven.f32( -// NO_HALF: ret float %elt.roundeven +// NO_HALF: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.roundeven.f32( +// NO_HALF: ret float [[ROUNDEVEN]] half test_round_half(half p0) { return round(p0); } // NATIVE_HALF-LABEL: define hidden noundef nofpclass(nan inf) <2 x half> @_Z16test_round_half2 -// NATIVE_HALF: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.roundeven.v2f16 -// NATIVE_HALF: ret <2 x half> %elt.roundeven +// NATIVE_HALF: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.roundeven.v2f16 +// NATIVE_HALF: ret <2 x half> [[ROUNDEVEN:%.*]] // NO_HALF-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> @_Z16test_round_half2 -// NO_HALF: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.roundeven.v2f32( -// NO_HALF: ret <2 x float> %elt.roundeven +// NO_HALF: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.roundeven.v2f32( +// NO_HALF: ret <2 x float> [[ROUNDEVEN]] half2 test_round_half2(half2 p0) { return round(p0); } // NATIVE_HALF-LABEL: define hidden noundef nofpclass(nan inf) <3 x half> @_Z16test_round_half3 -// NATIVE_HALF: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.roundeven.v3f16 -// NATIVE_HALF: ret <3 x half> %elt.roundeven +// NATIVE_HALF: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.roundeven.v3f16 +// NATIVE_HALF: ret <3 x half> [[ROUNDEVEN:%.*]] // NO_HALF-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> @_Z16test_round_half3 -// NO_HALF: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.roundeven.v3f32( -// NO_HALF: ret <3 x float> %elt.roundeven +// NO_HALF: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.roundeven.v3f32( +// NO_HALF: ret <3 x float> [[ROUNDEVEN]] half3 test_round_half3(half3 p0) { return round(p0); } // NATIVE_HALF-LABEL: define hidden noundef nofpclass(nan inf) <4 x half> @_Z16test_round_half4 -// NATIVE_HALF: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.roundeven.v4f16 -// NATIVE_HALF: ret <4 x half> %elt.roundeven +// NATIVE_HALF: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.roundeven.v4f16 +// NATIVE_HALF: ret <4 x half> [[ROUNDEVEN:%.*]] // NO_HALF-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> @_Z16test_round_half4 -// NO_HALF: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.roundeven.v4f32( -// NO_HALF: ret <4 x float> %elt.roundeven +// NO_HALF: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.roundeven.v4f32( +// NO_HALF: ret <4 x float> [[ROUNDEVEN]] half4 test_round_half4(half4 p0) { return round(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z16test_round_float -// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn float @llvm.roundeven.f32( -// CHECK: ret float %elt.roundeven +// CHECK: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.roundeven.f32( +// CHECK: ret float [[ROUNDEVEN]] float test_round_float(float p0) { return round(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> @_Z17test_round_float2 -// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.roundeven.v2f32 -// CHECK: ret <2 x float> %elt.roundeven +// CHECK: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.roundeven.v2f32 +// CHECK: ret <2 x float> [[ROUNDEVEN]] float2 test_round_float2(float2 p0) { return round(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> @_Z17test_round_float3 -// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.roundeven.v3f32 -// CHECK: ret <3 x float> %elt.roundeven +// CHECK: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.roundeven.v3f32 +// CHECK: ret <3 x float> [[ROUNDEVEN]] float3 test_round_float3(float3 p0) { return round(p0); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> @_Z17test_round_float4 -// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.roundeven.v4f32 -// CHECK: ret <4 x float> %elt.roundeven +// CHECK: [[ROUNDEVEN:%.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.roundeven.v4f32 +// CHECK: ret <4 x float> [[ROUNDEVEN]] float4 test_round_float4(float4 p0) { return round(p0); } diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index cf2cd4a10b056..27feeb57b5de2 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -2525,15 +2525,32 @@ // RUN: %clang -march=wildcatlake -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M32,CHECK_ARLS_M32,CHECK_NKL_M32 -// RUN: %clang -march=novalake -m32 -E -dM %s -o - 2>&1 \ -// RUN: -target i386-unknown-linux \ -// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M32,CHECK_ARLS_M32,CHECK_NVL_M32,CHECK_NKL_M32 // RUN: %clang -march=clearwaterforest -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ -// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_SRF_M32,CHECK_ARLS_M32,CHECK_NVL_M32,CHECK_UMSR_M32,CHECK_NKL_M32 +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_SRF_M32,CHECK_ARLS_M32,CHECK_UMSR_M32,CHECK_CWF_M32,CHECK_NKL_M32 +// RUN: %clang -march=novalake -m32 -E -dM %s -o - 2>&1 \ +// RUN: -target i386-unknown-linux \ +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M32,CHECK_ARLS_M32,CHECK_CWF_M32,CHECK_NVL_M32,CHECK_NKL_M32 // CHECK_ARL_M32: #define __ADX__ 1 // CHECK_ARL_M32: #define __AES__ 1 +// CHECK_NVL_M32: #define __AVX10_1__ 1 +// CHECK_NVL_M32: #define __AVX10_2__ 1 // CHECK_ARL_M32: #define __AVX2__ 1 +// CHECK_NVL_M32: #define __AVX512BF16__ 1 +// CHECK_NVL_M32: #define __AVX512BITALG__ 1 +// CHECK_NVL_M32: #define __AVX512BW__ 1 +// CHECK_NVL_M32: #define __AVX512CD__ 1 +// CHECK_NVL_M32: #define __AVX512DQ__ 1 +// CHECK_NVL_M32: #define __AVX512FP16__ 1 +// CHECK_NVL_M32: #define __AVX512F__ 1 +// CHECK_NVL_M32: #define __AVX512IFMA__ 1 +// CHECK_NVL_M32: #define __AVX512VBMI2__ 1 +// CHECK_NVL_M32: #define __AVX512VBMI__ 1 +// CHECK_NVL_M32: #define __AVX512VL__ 1 +// CHECK_NVL_M32: #define __AVX512VNNI__ 1 +// CHECK_NVL_M32: #define __AVX512VPOPCNTDQ__ 1 +// We check for NOT AVX512 after all checks for AVX512, so +// if we missed some check on NVL, the test will fail. // CHECK_ARL_M32-NOT: AVX512 // CHECK_ARL_M32: #define __AVXIFMA__ 1 // CHECK_ARL_M32: #define __AVXNECONVERT__ 1 @@ -2544,11 +2561,13 @@ // CHECK_ARL_M32: #define __AVX__ 1 // CHECK_ARL_M32: #define __BMI2__ 1 // CHECK_ARL_M32: #define __BMI__ 1 +// CHECK_NVL_M32: #define __CCMP__ 1 // CHECK_ARLS_M32-NOT: __CLDEMOTE__ // CHECK_SRF_M32: #define __CLDEMOTE__ 1 // CHECK_ARL_M32: #define __CLFLUSHOPT__ 1 // CHECK_ARL_M32: #define __CLWB__ 1 // CHECK_ARL_M32: #define __CMPCCXADD__ 1 +// CHECK_NVL_M32: #define __EGPR__ 1 // CHECK_ARL_M32: #define __ENQCMD__ 1 // CHECK_ARL_M32: #define __F16C__ 1 // CHECK_ARL_M32: #define __FMA__ 1 @@ -2564,15 +2583,20 @@ // CHECK_ARL_M32: #define __MOVBE__ 1 // CHECK_ARL_M32: #define __MOVDIR64B__ 1 // CHECK_ARL_M32: #define __MOVDIRI__ 1 +// CHECK_NVL_M32: #define __MOVRS__ 1 +// CHECK_NVL_M32: #define __NDD__ 1 +// CHECK_NVL_M32: #define __NF__ 1 // CHECK_ARL_M32: #define __PCLMUL__ 1 // CHECK_ARL_M32: #define __PCONFIG__ 1 // CHECK_ARL_M32: #define __PKU__ 1 // CHECK_ARL_M32: #define __POPCNT__ 1 +// CHECK_NVL_M32: #define __PPX__ 1 // CHECK_ARL_M32-NOT: #define __PREFETCHI__ 1 // CHECK_ARLS_M32-NOT: #define __PREFETCHI__ 1 -// CHECK_NVL_M32: #define __PREFETCHI__ 1 +// CHECK_CWF_M32: #define __PREFETCHI__ 1 // CHECK_ARL_M32: #define __PRFCHW__ 1 // CHECK_ARL_M32: #define __PTWRITE__ 1 +// CHECK_NVL_M32: #define __PUSH2POP2__ 1 // CHECK_ARL_M32-NOT: #define __RAOINT__ 1 // CHECK_ARL_M32: #define __RDPID__ 1 // CHECK_ARL_M32: #define __RDRND__ 1 @@ -2607,6 +2631,7 @@ // CHECK_ARL_M32: #define __XSAVEOPT__ 1 // CHECK_ARL_M32: #define __XSAVES__ 1 // CHECK_ARL_M32: #define __XSAVE__ 1 +// CHECK_NVL_M32: #define __ZU__ 1 // CHECK_ARL_M32: #define __corei7 1 // CHECK_ARL_M32: #define __corei7__ 1 // CHECK_ARL_M32: #define __i386 1 @@ -2635,15 +2660,30 @@ // RUN: %clang -march=wildcatlake -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_ARLS_M64,CHECK_NKL_M64 -// RUN: %clang -march=novalake -m64 -E -dM %s -o - 2>&1 \ -// RUN: -target i386-unknown-linux \ -// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_ARLS_M64,CHECK_NVL_M64,CHECK_NKL_M64 // RUN: %clang -march=clearwaterforest -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ -// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_SRF_M64,CHECK_ARLS_M64,CHECK_NVL_M64,CHECK_UMSR_M64,CHECK_NKL_M64 +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_SRF_M64,CHECK_ARLS_M64,CHECK_UMSR_M64,CHECK_CWF_M64,CHECK_NKL_M64 +// RUN: %clang -march=novalake -m64 -E -dM %s -o - 2>&1 \ +// RUN: -target i386-unknown-linux \ +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_ARLS_M64,CHECK_CWF_M64,CHECK_NVL_M64,CHECK_NKL_M64 // CHECK_ARL_M64: #define __ADX__ 1 // CHECK_ARL_M64: #define __AES__ 1 +// CHECK_NVL_M64: #define __AVX10_1__ 1 +// CHECK_NVL_M64: #define __AVX10_2__ 1 // CHECK_ARL_M64: #define __AVX2__ 1 +// CHECK_NVL_M64: #define __AVX512BF16__ 1 +// CHECK_NVL_M64: #define __AVX512BITALG__ 1 +// CHECK_NVL_M64: #define __AVX512BW__ 1 +// CHECK_NVL_M64: #define __AVX512CD__ 1 +// CHECK_NVL_M64: #define __AVX512DQ__ 1 +// CHECK_NVL_M64: #define __AVX512FP16__ 1 +// CHECK_NVL_M64: #define __AVX512F__ 1 +// CHECK_NVL_M64: #define __AVX512IFMA__ 1 +// CHECK_NVL_M64: #define __AVX512VBMI2__ 1 +// CHECK_NVL_M64: #define __AVX512VBMI__ 1 +// CHECK_NVL_M64: #define __AVX512VL__ 1 +// CHECK_NVL_M64: #define __AVX512VNNI__ 1 +// CHECK_NVL_M64: #define __AVX512VPOPCNTDQ__ 1 // CHECK_ARL_M64-NOT: AVX512 // CHECK_ARL_M64: #define __AVXIFMA__ 1 // CHECK_ARL_M64: #define __AVXNECONVERT__ 1 @@ -2654,11 +2694,13 @@ // CHECK_ARL_M64: #define __AVX__ 1 // CHECK_ARL_M64: #define __BMI2__ 1 // CHECK_ARL_M64: #define __BMI__ 1 +// CHECK_NVL_M64: #define __CCMP__ 1 // CHECK_ARLS_M64-NOT: __CLDEMOTE__ // CHECK_SRF_M64: #define __CLDEMOTE__ 1 // CHECK_ARL_M64: #define __CLFLUSHOPT__ 1 // CHECK_ARL_M64: #define __CLWB__ 1 // CHECK_ARL_M64: #define __CMPCCXADD__ 1 +// CHECK_NVL_M64: #define __EGPR__ 1 // CHECK_ARL_M64: #define __ENQCMD__ 1 // CHECK_ARL_M64: #define __F16C__ 1 // CHECK_ARL_M64: #define __FMA__ 1 @@ -2674,15 +2716,20 @@ // CHECK_ARL_M64: #define __MOVBE__ 1 // CHECK_ARL_M64: #define __MOVDIR64B__ 1 // CHECK_ARL_M64: #define __MOVDIRI__ 1 +// CHECK_NVL_M64: #define __MOVRS__ 1 +// CHECK_NVL_M64: #define __NDD__ 1 +// CHECK_NVL_M64: #define __NF__ 1 // CHECK_ARL_M64: #define __PCLMUL__ 1 // CHECK_ARL_M64: #define __PCONFIG__ 1 // CHECK_ARL_M64: #define __PKU__ 1 // CHECK_ARL_M64: #define __POPCNT__ 1 +// CHECK_NVL_M64: #define __PPX__ 1 // CHECK_ARL_M64-NOT: #define __PREFETCHI__ 1 // CHECK_ARLS_M64-NOT: #define __PREFETCHI__ 1 -// CHECK_NVL_M64: #define __PREFETCHI__ 1 +// CHECK_CWF_M64: #define __PREFETCHI__ 1 // CHECK_ARL_M64: #define __PRFCHW__ 1 // CHECK_ARL_M64: #define __PTWRITE__ 1 +// CHECK_NVL_M64: #define __PUSH2POP2__ 1 // CHECK_ARL_M64-NOT: #define __RAOINT__ 1 // CHECK_ARL_M64: #define __RDPID__ 1 // CHECK_ARL_M64: #define __RDRND__ 1 @@ -2718,6 +2765,7 @@ // CHECK_ARL_M64: #define __XSAVEOPT__ 1 // CHECK_ARL_M64: #define __XSAVES__ 1 // CHECK_ARL_M64: #define __XSAVE__ 1 +// CHECK_NVL_M64: #define __ZU__ 1 // CHECK_ARL_M64: #define __amd64 1 // CHECK_ARL_M64: #define __amd64__ 1 // CHECK_ARL_M64: #define __corei7 1 diff --git a/compiler-rt/test/xray/TestCases/Posix/basic-filtering.cpp b/compiler-rt/test/xray/TestCases/Posix/basic-filtering.cpp index 1440460c9de27..7b46fe29a00e3 100644 --- a/compiler-rt/test/xray/TestCases/Posix/basic-filtering.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/basic-filtering.cpp @@ -3,23 +3,25 @@ // RUN: %clangxx_xray -std=c++11 %s -o %t -g // RUN: rm -f basic-filtering-* -// RUN: XRAY_OPTIONS="patch_premain=true xray_mode=xray-basic verbosity=1 \ +// RUN: env XRAY_OPTIONS="patch_premain=true xray_mode=xray-basic verbosity=1 \ // RUN: xray_logfile_base=basic-filtering- \ // RUN: xray_naive_log_func_duration_threshold_us=1000 \ // RUN: xray_naive_log_max_stack_depth=2" %run %t 2>&1 | \ // RUN: FileCheck %s +// RUN: ls basic-filtering-* | head -1 | tr -d '\n' > %t.log // RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t \ -// RUN: "`ls basic-filtering-* | head -1`" | \ +// RUN: "%{readfile:%t.log}" | \ // RUN: FileCheck %s --check-prefix TRACE // RUN: rm -f basic-filtering-* // // Now check support for the XRAY_BASIC_OPTIONS environment variable. -// RUN: XRAY_OPTIONS="patch_premain=true xray_mode=xray-basic verbosity=1 \ +// RUN: env XRAY_OPTIONS="patch_premain=true xray_mode=xray-basic verbosity=1 \ // RUN: xray_logfile_base=basic-filtering-" \ -// RUN: XRAY_BASIC_OPTIONS="func_duration_threshold_us=1000 max_stack_depth=2" \ +// RUN: env XRAY_BASIC_OPTIONS="func_duration_threshold_us=1000 max_stack_depth=2" \ // RUN: %run %t 2>&1 | FileCheck %s +// RUN: ls basic-filtering-* | head -1 | tr -d '\n' > %t.log // RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t \ -// RUN: "`ls basic-filtering-* | head -1`" | \ +// RUN: "%{readfile:%t.log}" | \ // RUN: FileCheck %s --check-prefix TRACE // RUN: rm -f basic-filtering-* diff --git a/compiler-rt/test/xray/TestCases/Posix/basic-mode-dso.cpp b/compiler-rt/test/xray/TestCases/Posix/basic-mode-dso.cpp index d40dcd808bcba..954c7ef9626d8 100644 --- a/compiler-rt/test/xray/TestCases/Posix/basic-mode-dso.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/basic-mode-dso.cpp @@ -4,8 +4,9 @@ // RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -shared -std=c++11 %t/testlib.cpp -o %t/testlib.so // RUN: %clangxx_xray -g -fPIC -fxray-instrument -fxray-shared -std=c++11 %t/main.cpp %t/testlib.so -Wl,-rpath,%t -o %t/main.o -// RUN: XRAY_OPTIONS="patch_premain=false,xray_mode=xray-basic,xray_logfile_base=basic-mode-dso-,verbosity=1" XRAY_BASIC_OPTIONS="func_duration_threshold_us=0" %run %t/main.o 2>&1 | FileCheck %s -// RUN: %llvm_xray account --format=csv --sort=funcid "`ls basic-mode-dso-* | head -1`" | FileCheck --check-prefix=ACCOUNT %s +// RUN: env XRAY_OPTIONS="patch_premain=false,xray_mode=xray-basic,xray_logfile_base=basic-mode-dso-,verbosity=1" XRAY_BASIC_OPTIONS="func_duration_threshold_us=0" %run %t/main.o 2>&1 | FileCheck %s +// RUN: ls basic-mode-dso-* | head -1 | tr -d '\n' > %t.log +// RUN: %llvm_xray account --format=csv --sort=funcid "%{readfile:%t.log}" | FileCheck --check-prefix=ACCOUNT %s // RUN: rm basic-mode-dso-* // REQUIRES: target={{(aarch64|x86_64)-.*}} diff --git a/compiler-rt/test/xray/TestCases/Posix/c-test.cpp b/compiler-rt/test/xray/TestCases/Posix/c-test.cpp index 6427566186514..d7c766682f38f 100644 --- a/compiler-rt/test/xray/TestCases/Posix/c-test.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/c-test.cpp @@ -1,6 +1,6 @@ // RUN: %clang_xray -g -fxray-modes=xray-basic,xray-fdr,xray-profiling -o %t %s // RUN: rm -f xray-log.c-test.* -// RUN: XRAY_OPTIONS=patch_premain=true:verbosity=1:xray_mode=xray-basic %t \ +// RUN: env XRAY_OPTIONS=patch_premain=true:verbosity=1:xray_mode=xray-basic %t \ // RUN: 2>&1 | FileCheck %s // RUN: rm -f xray-log.c-test.* // diff --git a/compiler-rt/test/xray/TestCases/Posix/common-trampoline-alignment.cpp b/compiler-rt/test/xray/TestCases/Posix/common-trampoline-alignment.cpp index f9189644b40b8..d072e57f5cdbc 100644 --- a/compiler-rt/test/xray/TestCases/Posix/common-trampoline-alignment.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/common-trampoline-alignment.cpp @@ -2,7 +2,7 @@ // expect 16-byte alignment of the stack. // // RUN: %clangxx_xray -std=c++11 %s -o %t -// RUN: XRAY_OPTIONS="patch_premain=false verbosity=1" \ +// RUN: env XRAY_OPTIONS="patch_premain=false verbosity=1" \ // RUN: %run %t 2>&1 // REQUIRES: x86_64-target-arch // REQUIRES: built-in-llvm-tree diff --git a/compiler-rt/test/xray/TestCases/Posix/custom-event-handler-alignment.cpp b/compiler-rt/test/xray/TestCases/Posix/custom-event-handler-alignment.cpp index 9c61cba83b0da..19154820d5089 100644 --- a/compiler-rt/test/xray/TestCases/Posix/custom-event-handler-alignment.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/custom-event-handler-alignment.cpp @@ -2,7 +2,7 @@ // calls. // // RUN: %clangxx_xray -std=c++11 %s -o %t -// RUN: XRAY_OPTIONS="patch_premain=false verbosity=1" \ +// RUN: env XRAY_OPTIONS="patch_premain=false verbosity=1" \ // RUN: %run %t 2>&1 // REQUIRES: x86_64-target-arch // REQUIRES: built-in-llvm-tree diff --git a/compiler-rt/test/xray/TestCases/Posix/custom-event-logging.cpp b/compiler-rt/test/xray/TestCases/Posix/custom-event-logging.cpp index 30f4fffa429ee..cbdfe6c57b38e 100644 --- a/compiler-rt/test/xray/TestCases/Posix/custom-event-logging.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/custom-event-logging.cpp @@ -1,9 +1,9 @@ // Use the clang feature for custom xray event logging. // // RUN: %clangxx_xray -std=c++11 %s -o %t -// RUN: XRAY_OPTIONS="patch_premain=false verbosity=1 xray_logfile_base=custom-event-logging.xray-" %run %t 2>&1 | FileCheck %s +// RUN: env XRAY_OPTIONS="patch_premain=false verbosity=1 xray_logfile_base=custom-event-logging.xray-" %run %t 2>&1 | FileCheck %s // RUN: %clangxx_xray -std=c++11 -fpic -fpie %s -o %t -// RUN: XRAY_OPTIONS="patch_premain=false verbosity=1 xray_logfile_base=custom-event-logging.xray-" %run %t 2>&1 | FileCheck %s +// RUN: env XRAY_OPTIONS="patch_premain=false verbosity=1 xray_logfile_base=custom-event-logging.xray-" %run %t 2>&1 | FileCheck %s // FIXME: Support this in non-x86_64 as well // REQUIRES: target={{(aarch64|x86_64)-.*linux.*}} // REQUIRES: built-in-llvm-tree diff --git a/compiler-rt/test/xray/TestCases/Posix/fdr-mode-inmemory.cpp b/compiler-rt/test/xray/TestCases/Posix/fdr-mode-inmemory.cpp index 6c94dbd8b21d2..58a800a7c382a 100644 --- a/compiler-rt/test/xray/TestCases/Posix/fdr-mode-inmemory.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/fdr-mode-inmemory.cpp @@ -2,12 +2,12 @@ // RUN: rm -rf %t.dir // RUN: mkdir -p %t.dir // RUN: cd %t.dir -// RUN: XRAY_OPTIONS="patch_premain=false xray_logfile_base=fdr-inmemory-test- \ +// RUN: env XRAY_OPTIONS="patch_premain=false xray_logfile_base=fdr-inmemory-test- \ // RUN: verbosity=1" \ -// RUN: XRAY_FDR_OPTIONS="no_file_flush=true func_duration_threshold_us=0" \ +// RUN: env XRAY_FDR_OPTIONS="no_file_flush=true func_duration_threshold_us=0" \ // RUN: %run %t 2>&1 | FileCheck %s -// RUN: FILES=`find %t.dir -name 'fdr-inmemory-test-*' | wc -l` -// RUN: [ $FILES -eq 0 ] +// RUN: find %t.dir -name 'fdr-inmemory-test-*' | wc -l | tr -d '\n' > %t.file_count +// RUN: %python -c "import sys; sys.exit(int(sys.argv[1]))" %{readfile:%t.file_count} // RUN: rm -rf %t.dir // // REQUIRES: built-in-llvm-tree diff --git a/compiler-rt/test/xray/TestCases/Posix/fdr-mode-multiple.cpp b/compiler-rt/test/xray/TestCases/Posix/fdr-mode-multiple.cpp index f9288d9002de0..ffe43c9b516d7 100644 --- a/compiler-rt/test/xray/TestCases/Posix/fdr-mode-multiple.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/fdr-mode-multiple.cpp @@ -2,12 +2,12 @@ // RUN: rm -rf %t.dir // RUN: mkdir -p %t.dir // RUN: cd %t.dir -// RUN: XRAY_OPTIONS="patch_premain=false xray_logfile_base=fdr-inmemory-test- \ +// RUN: env XRAY_OPTIONS="patch_premain=false xray_logfile_base=fdr-inmemory-test- \ // RUN: verbosity=1" \ -// RUN: XRAY_FDR_OPTIONS="no_file_flush=true func_duration_threshold_us=0" \ +// RUN: env XRAY_FDR_OPTIONS="no_file_flush=true func_duration_threshold_us=0" \ // RUN: %run %t 2>&1 | FileCheck %s -// RUN: FILES=`find %t.dir -name 'fdr-inmemory-test-*' | wc -l` -// RUN: [ $FILES -eq 0 ] +// RUN: find %t.dir -name 'fdr-inmemory-test-*' | wc -l | tr -d '\n' > %t.file_count +// RUN: %python -c "import sys; sys.exit(int(sys.argv[1]))" %{readfile:%t.file_count} // RUN: rm -rf %t.dir // // REQUIRES: built-in-llvm-tree diff --git a/compiler-rt/test/xray/TestCases/Posix/fdr-mode.cpp b/compiler-rt/test/xray/TestCases/Posix/fdr-mode.cpp index 0ee8aaa755d5a..fa83b5767e015 100644 --- a/compiler-rt/test/xray/TestCases/Posix/fdr-mode.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/fdr-mode.cpp @@ -1,20 +1,22 @@ // RUN: %clangxx_xray -g -std=c++11 %s -o %t // RUN: rm -f fdr-logging-test-* // RUN: rm -f fdr-unwrite-test-* -// RUN: XRAY_OPTIONS="patch_premain=false xray_logfile_base=fdr-logging-test- \ +// RUN: env XRAY_OPTIONS="patch_premain=false xray_logfile_base=fdr-logging-test- \ // RUN: xray_mode=xray-fdr verbosity=1" \ -// RUN: XRAY_FDR_OPTIONS="func_duration_threshold_us=0" \ +// RUN: env XRAY_FDR_OPTIONS="func_duration_threshold_us=0" \ // RUN: %run %t 2>&1 | FileCheck %s -// RUN: XRAY_OPTIONS="patch_premain=false \ +// RUN: env XRAY_OPTIONS="patch_premain=false \ // RUN: xray_logfile_base=fdr-unwrite-test- xray_mode=xray-fdr \ // RUN: verbosity=1" \ -// RUN: XRAY_FDR_OPTIONS="func_duration_threshold_us=5000" \ +// RUN: env XRAY_FDR_OPTIONS="func_duration_threshold_us=5000" \ // RUN: %run %t 2>&1 | FileCheck %s +// RUN: ls fdr-logging-test-* | head -1 | tr -d '\n' > %t.log // RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t \ -// RUN: "`ls fdr-logging-test-* | head -1`" \ +// RUN: "%{readfile:%t.log}" \ // RUN: | FileCheck %s --check-prefix=TRACE +// RUN: ls fdr-unwrite-test-* | head -1 | tr -d '\n' > %t.log // RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t \ -// RUN: "`ls fdr-unwrite-test-* | head -1`" \ +// RUN: "%{readfile:%t.log}" \ // RUN: | FileCheck %s --check-prefix=UNWRITE // RUN: rm fdr-logging-test-* // RUN: rm fdr-unwrite-test-* diff --git a/compiler-rt/test/xray/TestCases/Posix/fdr-thread-order.cpp b/compiler-rt/test/xray/TestCases/Posix/fdr-thread-order.cpp index 85284fc27ab38..d3fd2536ead00 100644 --- a/compiler-rt/test/xray/TestCases/Posix/fdr-thread-order.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/fdr-thread-order.cpp @@ -1,6 +1,6 @@ // RUN: rm -rf %t && mkdir %t // RUN: %clangxx_xray -g -std=c++11 %s -o %t.exe -// RUN: XRAY_OPTIONS="patch_premain=false \ +// RUN: env XRAY_OPTIONS="patch_premain=false \ // RUN: xray_logfile_base=%t/ xray_mode=xray-fdr verbosity=1" \ // RUN: XRAY_FDR_OPTIONS=func_duration_threshold_us=0 %run %t.exe 2>&1 | \ // RUN: FileCheck %s diff --git a/compiler-rt/test/xray/TestCases/Posix/fork_basic_logging.cpp b/compiler-rt/test/xray/TestCases/Posix/fork_basic_logging.cpp index 58f310e3a1083..d0905c18cdc14 100644 --- a/compiler-rt/test/xray/TestCases/Posix/fork_basic_logging.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/fork_basic_logging.cpp @@ -1,11 +1,12 @@ // Check that when forking in basic logging mode, we get the different tids for child and parent // RUN: %clangxx_xray -g -std=c++11 %s -o %t // RUN: rm -f fork-basic-logging-test-* -// RUN: XRAY_OPTIONS="patch_premain=true xray_logfile_base=fork-basic-logging-test- \ +// RUN: env XRAY_OPTIONS="patch_premain=true xray_logfile_base=fork-basic-logging-test- \ // RUN: xray_mode=xray-basic verbosity=1 xray_naive_log_func_duration_threshold_us=0" \ // RUN: %run %t 2>&1 | FileCheck %s +// RUN: ls -S fork-basic-logging-test-* | head -1 | tr -d '\n' > %t.log // RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t \ -// RUN: "`ls -S fork-basic-logging-test-* | head -1`" \ +// RUN: "%{readfile:%t.log}" \ // RUN: | FileCheck %s --check-prefix=TRACE // REQUIRES: x86_64-target-arch diff --git a/compiler-rt/test/xray/TestCases/Posix/profiling-multi-threaded.cpp b/compiler-rt/test/xray/TestCases/Posix/profiling-multi-threaded.cpp index b850c053681a1..b1b8edc659ea6 100644 --- a/compiler-rt/test/xray/TestCases/Posix/profiling-multi-threaded.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/profiling-multi-threaded.cpp @@ -4,11 +4,11 @@ // FIXME: Make -fxray-modes=xray-profiling part of the default? // RUN: %clangxx_xray -std=c++11 %s -o %t -fxray-modes=xray-profiling // RUN: rm -f xray-log.profiling-multi-* -// RUN: XRAY_OPTIONS=verbosity=1 \ +// RUN: env XRAY_OPTIONS=verbosity=1 \ // RUN: XRAY_PROFILING_OPTIONS=no_flush=1 %run %t -// RUN: XRAY_OPTIONS=verbosity=1 %run %t -// RUN: PROFILES=`ls xray-log.profiling-multi-* | wc -l` -// RUN: [ $PROFILES -eq 1 ] +// RUN: env XRAY_OPTIONS=verbosity=1 %run %t +// RUN: ls xray-log.profiling-multi-* | wc -l | tr -d '\n' > %t.profiles +// RUN: %python -c "import sys; sys.exit(int(sys.argv[1]) - 1)" %{readfile:%t.profiles} // RUN: rm -f xray-log.profiling-multi-* // // REQUIRES: built-in-llvm-tree diff --git a/compiler-rt/test/xray/TestCases/Posix/profiling-single-threaded.cpp b/compiler-rt/test/xray/TestCases/Posix/profiling-single-threaded.cpp index b2359607379d6..d84f75bcac0da 100644 --- a/compiler-rt/test/xray/TestCases/Posix/profiling-single-threaded.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/profiling-single-threaded.cpp @@ -4,11 +4,11 @@ // FIXME: Make -fxray-modes=xray-profiling part of the default? // RUN: %clangxx_xray -std=c++11 %s -o %t -fxray-modes=xray-profiling // RUN: rm -f xray-log.profiling-single-* -// RUN: XRAY_OPTIONS=verbosity=1 \ +// RUN: env XRAY_OPTIONS=verbosity=1 \ // RUN: XRAY_PROFILING_OPTIONS=no_flush=true %run %t -// RUN: XRAY_OPTIONS=verbosity=1 %run %t -// RUN: PROFILES=`ls xray-log.profiling-single-* | wc -l` -// RUN: [ $PROFILES -eq 2 ] +// RUN: env XRAY_OPTIONS=verbosity=1 %run %t +// RUN: ls xray-log.profiling-single-* | wc -l | tr -d '\n' > %t.profiles +// RUN: %python -c "import sys; sys.exit(int(sys.argv[1]) - 2)" %{readfile:%t.profiles} // RUN: rm -f xray-log.profiling-single-* // // REQUIRES: built-in-llvm-tree diff --git a/compiler-rt/test/xray/TestCases/Posix/quiet-start.cpp b/compiler-rt/test/xray/TestCases/Posix/quiet-start.cpp index 48830017047c0..d8e96e0297bd1 100644 --- a/compiler-rt/test/xray/TestCases/Posix/quiet-start.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/quiet-start.cpp @@ -2,11 +2,11 @@ // instrumentation sleds. // // RUN: %clangxx -std=c++11 %s -o %t %xraylib -// RUN: XRAY_OPTIONS="patch_premain=true verbosity=1" %run %t 2>&1 | \ +// RUN: env XRAY_OPTIONS="patch_premain=true verbosity=1" %run %t 2>&1 | \ // RUN: FileCheck %s --check-prefix NOISY -// RUN: XRAY_OPTIONS="patch_premain=true verbosity=0" %run %t 2>&1 | \ +// RUN: env XRAY_OPTIONS="patch_premain=true verbosity=0" %run %t 2>&1 | \ // RUN: FileCheck %s --check-prefix QUIET -// RUN: XRAY_OPTIONS="" %run %t 2>&1 | FileCheck %s --check-prefix DEFAULT +// RUN: env XRAY_OPTIONS="" %run %t 2>&1 | FileCheck %s --check-prefix DEFAULT // REQUIRES: built-in-llvm-tree diff --git a/cross-project-tests/dtlto/dtlto-cache.test b/cross-project-tests/dtlto/dtlto-cache.test new file mode 100644 index 0000000000000..b98d4dbb433bb --- /dev/null +++ b/cross-project-tests/dtlto/dtlto-cache.test @@ -0,0 +1,89 @@ +REQUIRES: x86-registered-target, ld.lld + +# Show that the ThinLTO cache works with DTLTO. + +RUN: rm -rf %t && split-file %s %t && cd %t + +# Compile source files into bitcode files. +RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c foo.c main.c + +# Execute the linker and check that the cache is populated. +RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \ +RUN: main.o foo.o -o populate1.elf \ +RUN: -Wl,--thinlto-distributor=%python \ +RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \ +RUN: -Wl,--thinlto-remote-compiler=%clang \ +RUN: -Wl,--thinlto-cache-dir=cache.dir \ +RUN: -Wl,--save-temps + +# Check that there are two backend compilation jobs occurred. +RUN: grep -wo args populate1.*.dist-file.json | wc -l | grep -qx 3 +RUN: ls cache.dir/llvmcache.timestamp +RUN: ls cache.dir | count 3 + +# Execute the linker again and check that a fully populated cache is used correctly, +# i.e., no additional cache entries are created for cache hits. +RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \ +RUN: main.o foo.o -o populate2.elf \ +RUN: -Wl,--thinlto-distributor=%python \ +RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \ +RUN: -Wl,--thinlto-remote-compiler=%clang \ +RUN: -Wl,--thinlto-cache-dir=cache.dir \ +RUN: -Wl,--save-temps + +# Check that there are no backend compilation jobs occurred. +RUN: grep -wo args populate2.*.dist-file.json | wc -l | grep -qx 1 +RUN: ls cache.dir | count 3 + +RUN: %clang -O0 --target=x86_64-linux-gnu -flto=thin -c foo.c -o foo.O0.o +RUN: %clang -O0 --target=x86_64-linux-gnu -flto=thin -c main.c -o main.O0.o + +# Execute the linker again and check that the cache is populated correctly when there +# are no cache hits but there are existing cache entries. +# As a side effect, this also verifies that the optimization level is considered when +# evaluating the cache entry key. + +RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \ +RUN: main.O0.o foo.O0.o -o populate3.elf \ +RUN: -Wl,--thinlto-distributor=%python \ +RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \ +RUN: -Wl,--thinlto-remote-compiler=%clang \ +RUN: -Wl,--thinlto-cache-dir=cache.dir \ +RUN: -Wl,--save-temps + +# Check that there are two new backend compilation jobs occurred. +RUN: grep -wo args populate3.*.dist-file.json | wc -l | grep -qx 3 +RUN: ls cache.dir | count 5 + +RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c main-partial.c + +# Execute the linker and check that everything works correctly with the partially populated cache; +# One more cache entry should be generated after this run. + +RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \ +RUN: main-partial.o foo.o -o main-partial.elf \ +RUN: -Wl,--thinlto-distributor=%python \ +RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \ +RUN: -Wl,--thinlto-remote-compiler=%clang \ +RUN: -Wl,--thinlto-cache-dir=cache.dir \ +RUN: -Wl,--save-temps + +# Check that there is one new backend compilation jobs occurred. +RUN: grep -wo args main-partial.*.dist-file.json | wc -l | grep -qx 2 +RUN: ls cache.dir | count 6 + +#--- foo.c +volatile int foo_int; +__attribute__((retain)) int foo(int x) { return x + foo_int; } + +#--- main.c +extern int foo(int x); +__attribute__((retain)) int main(int argc, char** argv) { + return foo(argc); +} + +#--- main-partial.c +extern int foo(int x); +__attribute__((retain)) int main(int argc, char** argv) { + return foo(argc+1); +} diff --git a/cross-project-tests/dtlto/dtlto-thinlto-cache.test b/cross-project-tests/dtlto/dtlto-thinlto-cache.test new file mode 100644 index 0000000000000..c177112e2dbbd --- /dev/null +++ b/cross-project-tests/dtlto/dtlto-thinlto-cache.test @@ -0,0 +1,70 @@ +REQUIRES: x86-registered-target, ld.lld + +# This test verifies that a cache populated by an in-process ThinLTO codegen is +# not reused by an out-of-process (DTLTO) codegen and vice versa. + +RUN: rm -rf %t && split-file %s %t && cd %t + +# Compile source files into bitcode files. +RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c foo.c main.c + +# Execute the linker and check that in-process ThinLTO cache is populated. +RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \ +RUN: main.o foo.o -o main.elf \ +RUN: -Wl,--thinlto-cache-dir=cache.dir \ +RUN: -Wl,--save-temps + +RUN: ls cache.dir/llvmcache.timestamp +RUN: ls cache.dir | count 3 + +# Execute the linker and check that out-of-process codegen (DTLTO) adds +# additional entries to the cache, implying that in-process and +# out-of-process codegens do not share cache entries. +RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \ +RUN: main.o foo.o -o populate1.elf \ +RUN: -Wl,--thinlto-distributor=%python \ +RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \ +RUN: -Wl,--thinlto-remote-compiler=%clang \ +RUN: -Wl,--thinlto-cache-dir=cache.dir \ +RUN: -Wl,--save-temps + +# Check that there are two backend compilation jobs occurred. +RUN: grep -wo args populate1.*.dist-file.json | wc -l | grep -qx 3 +RUN: ls cache.dir | count 5 + +# Clean up cache directory. +RUN: rm -rf cache.dir + +# Execute the linker and check that out-of-process (DTLTO) cache is populated. +RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \ +RUN: main.o foo.o -o populate2.elf \ +RUN: -Wl,--thinlto-distributor=%python \ +RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \ +RUN: -Wl,--thinlto-remote-compiler=%clang \ +RUN: -Wl,--thinlto-cache-dir=cache.dir \ +RUN: -Wl,--save-temps + +# Check that there are two backend compilation jobs occurred. +RUN: grep -wo args populate2.*.dist-file.json | wc -l | grep -qx 3 +RUN: ls cache.dir/llvmcache.timestamp +RUN: ls cache.dir | count 3 + +# Execute the linker and check that in-process codegen adds additional entries +# to the cache, implying that in-process and out-of-process codegens do +# not share cache entries. +RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \ +RUN: main.o foo.o -o main.elf \ +RUN: -Wl,--thinlto-cache-dir=cache.dir \ +RUN: -Wl,--save-temps + +RUN: ls cache.dir | count 5 + +#--- foo.c +volatile int foo_int; +__attribute__((retain)) int foo(int x) { return x + foo_int; } + +#--- main.c +extern int foo(int x); +__attribute__((retain)) int main(int argc, char** argv) { + return foo(argc); +} diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index b1765f927d6c9..60d2ad0b764b9 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -5345,12 +5345,10 @@ struct OmpEndLoopDirective : public OmpEndDirective { }; // OpenMP directives enclosing do loop -using NestedConstruct = - std::variant>; struct OpenMPLoopConstruct { TUPLE_CLASS_BOILERPLATE(OpenMPLoopConstruct); OpenMPLoopConstruct(OmpBeginLoopDirective &&a) - : t({std::move(a), std::nullopt, std::nullopt}) {} + : t({std::move(a), Block{}, std::nullopt}) {} const OmpBeginLoopDirective &BeginDir() const { return std::get(t); @@ -5358,8 +5356,10 @@ struct OpenMPLoopConstruct { const std::optional &EndDir() const { return std::get>(t); } - std::tuple, - std::optional> + const DoConstruct *GetNestedLoop() const; + const OpenMPLoopConstruct *GetNestedConstruct() const; + + std::tuple> t; }; diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index abf65b5b6c762..ff72d09edeaf3 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -3988,27 +3988,22 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, mlir::Location currentLocation = converter.genLocation(beginSpec.source); - auto &optLoopCons = - std::get>(loopConstruct.t); - if (optLoopCons.has_value()) { - if (auto *ompNestedLoopCons{ - std::get_if>( - &*optLoopCons)}) { - llvm::omp::Directive nestedDirective = - parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v; - switch (nestedDirective) { - case llvm::omp::Directive::OMPD_tile: - // Skip OMPD_tile since the tile sizes will be retrieved when - // generating the omp.loop_nest op. - break; - default: { - unsigned version = semaCtx.langOptions().OpenMPVersion; - TODO(currentLocation, - "Applying a loop-associated on the loop generated by the " + - llvm::omp::getOpenMPDirectiveName(nestedDirective, version) + - " construct"); - } - } + if (const parser::OpenMPLoopConstruct *ompNestedLoopCons = + loopConstruct.GetNestedConstruct()) { + llvm::omp::Directive nestedDirective = + parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v; + switch (nestedDirective) { + case llvm::omp::Directive::OMPD_tile: + // Skip OMPD_tile since the tile sizes will be retrieved when + // generating the omp.loop_nest op. + break; + default: { + unsigned version = semaCtx.langOptions().OpenMPVersion; + TODO(currentLocation, + "Applying a loop-associated on the loop generated by the " + + llvm::omp::getOpenMPDirectiveName(nestedDirective, version) + + " construct"); + } } } diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 6cb2508658d33..55b3a36261233 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -780,17 +780,9 @@ static void processTileSizesFromOpenMPConstruct( if (!ompCons) return; if (auto *ompLoop{std::get_if(&ompCons->u)}) { - const auto &nestedOptional = - std::get>(ompLoop->t); - assert(nestedOptional.has_value() && - "Expected a DoConstruct or OpenMPLoopConstruct"); - const auto *innerConstruct = - std::get_if>( - &(nestedOptional.value())); - if (innerConstruct) { - const auto &innerLoopDirective = innerConstruct->value(); + if (auto *innerConstruct = ompLoop->GetNestedConstruct()) { const parser::OmpDirectiveSpecification &innerBeginSpec = - innerLoopDirective.BeginDir(); + innerConstruct->BeginDir(); if (innerBeginSpec.DirId() == llvm::omp::Directive::OMPD_tile) { // Get the size values from parse tree and convert to a vector. for (const auto &clause : innerBeginSpec.Clauses().v) { diff --git a/flang/lib/Parser/parse-tree.cpp b/flang/lib/Parser/parse-tree.cpp index ad0016e1404f9..60e51895cdcea 100644 --- a/flang/lib/Parser/parse-tree.cpp +++ b/flang/lib/Parser/parse-tree.cpp @@ -7,8 +7,10 @@ //===----------------------------------------------------------------------===// #include "flang/Parser/parse-tree.h" + #include "flang/Common/idioms.h" #include "flang/Common/indirection.h" +#include "flang/Parser/openmp-utils.h" #include "flang/Parser/tools.h" #include "flang/Parser/user-state.h" #include "llvm/ADT/ArrayRef.h" @@ -432,6 +434,20 @@ const OmpClauseList &OmpDirectiveSpecification::Clauses() const { return empty; } +const DoConstruct *OpenMPLoopConstruct::GetNestedLoop() const { + if (auto &body{std::get(t)}; !body.empty()) { + return Unwrap(body.front()); + } + return nullptr; +} + +const OpenMPLoopConstruct *OpenMPLoopConstruct::GetNestedConstruct() const { + if (auto &body{std::get(t)}; !body.empty()) { + return Unwrap(body.front()); + } + return nullptr; +} + static bool InitCharBlocksFromStrings(llvm::MutableArrayRef blocks, llvm::ArrayRef strings) { for (auto [i, n] : llvm::enumerate(strings)) { diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp index 8cccd84f9fa19..5e8d50be277a0 100644 --- a/flang/lib/Parser/prescan.cpp +++ b/flang/lib/Parser/prescan.cpp @@ -845,6 +845,9 @@ bool Prescanner::NextToken(TokenSequence &tokens) { if (InFixedFormSource()) { SkipSpaces(); } + if (inFixedForm_ && (IsOpenMPDirective() && parenthesisNesting_ > 0)) { + SkipSpaces(); + } if ((*at_ == '\'' || *at_ == '"') && tokens.CharAt(tokens.SizeInChars() - 1) == '_') { // kind_"..." QuotedCharacterLiteral(tokens, start); diff --git a/flang/lib/Parser/prescan.h b/flang/lib/Parser/prescan.h index 5e7481781d944..4f691d56975a9 100644 --- a/flang/lib/Parser/prescan.h +++ b/flang/lib/Parser/prescan.h @@ -183,6 +183,9 @@ class Prescanner { bool InConditionalLine() const { return InOpenMPConditionalLine() || InOpenACCOrCUDAConditionalLine(); } + bool IsOpenMPDirective() const { + return directiveSentinel_ && std::strcmp(directiveSentinel_, "$omp") == 0; + } bool InFixedFormSource() const { return inFixedForm_ && !inPreprocessorDirective_ && !InCompilerDirective(); } diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index e3bc3cdc42ffb..f81200d092b11 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2706,12 +2706,6 @@ class UnparseVisitor { Put("\n"); EndOpenMP(); } - void Unparse(const OpenMPLoopConstruct &x) { - Walk(std::get(x.t)); - Walk(std::get>>>(x.t)); - Walk(std::get>(x.t)); - } void Unparse(const BasedPointer &x) { Put('('), Walk(std::get<0>(x.t)), Put(","), Walk(std::get<1>(x.t)); Walk("(", std::get>(x.t), ")"), Put(')'); diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index a11c5250b1ab4..0cec1969e0978 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -143,6 +143,8 @@ class CanonicalizationOfOmp { parser::ToUpperCaseLetters(dirName.source.ToString())); }; + auto &body{std::get(x.t)}; + nextIt = it; while (++nextIt != block.end()) { // Ignore compiler directives. @@ -152,9 +154,7 @@ class CanonicalizationOfOmp { if (auto *doCons{GetConstructIf(*nextIt)}) { if (doCons->GetLoopControl()) { // move DoConstruct - std::get>>>(x.t) = - std::move(*doCons); + body.push_back(std::move(*nextIt)); nextIt = block.erase(nextIt); // try to match OmpEndLoopDirective if (nextIt != block.end()) { @@ -198,10 +198,7 @@ class CanonicalizationOfOmp { ++endIt; } RewriteOpenMPLoopConstruct(*ompLoopCons, block, nextIt); - auto &ompLoop = std::get>(x.t); - ompLoop = - std::optional{parser::NestedConstruct{ - common::Indirection{std::move(*ompLoopCons)}}}; + body.push_back(std::move(*nextIt)); nextIt = block.erase(nextIt); } else if (nestedBeginName.v == llvm::omp::Directive::OMPD_unroll && beginName.v == llvm::omp::Directive::OMPD_tile) { diff --git a/flang/lib/Semantics/check-omp-loop.cpp b/flang/lib/Semantics/check-omp-loop.cpp index aaaa2d6e78280..3d3596b500880 100644 --- a/flang/lib/Semantics/check-omp-loop.cpp +++ b/flang/lib/Semantics/check-omp-loop.cpp @@ -285,13 +285,9 @@ void OmpStructureChecker::Enter(const parser::OpenMPLoopConstruct &x) { } SetLoopInfo(x); - auto &optLoopCons = std::get>(x.t); - if (optLoopCons.has_value()) { - if (const auto &doConstruct{ - std::get_if(&*optLoopCons)}) { - const auto &doBlock{std::get(doConstruct->t)}; - CheckNoBranching(doBlock, beginName.v, beginName.source); - } + if (const auto *doConstruct{x.GetNestedLoop()}) { + const auto &doBlock{std::get(doConstruct->t)}; + CheckNoBranching(doBlock, beginName.v, beginName.source); } CheckLoopItrVariableIsInt(x); CheckAssociatedLoopConstraints(x); @@ -314,46 +310,34 @@ const parser::Name OmpStructureChecker::GetLoopIndex( } void OmpStructureChecker::SetLoopInfo(const parser::OpenMPLoopConstruct &x) { - auto &optLoopCons = std::get>(x.t); - if (optLoopCons.has_value()) { - if (const auto &loopConstruct{ - std::get_if(&*optLoopCons)}) { - const parser::DoConstruct *loop{&*loopConstruct}; - if (loop && loop->IsDoNormal()) { - const parser::Name &itrVal{GetLoopIndex(loop)}; - SetLoopIv(itrVal.symbol); - } + if (const auto *loop{x.GetNestedLoop()}) { + if (loop->IsDoNormal()) { + const parser::Name &itrVal{GetLoopIndex(loop)}; + SetLoopIv(itrVal.symbol); } } } void OmpStructureChecker::CheckLoopItrVariableIsInt( const parser::OpenMPLoopConstruct &x) { - auto &optLoopCons = std::get>(x.t); - if (optLoopCons.has_value()) { - if (const auto &loopConstruct{ - std::get_if(&*optLoopCons)}) { - - for (const parser::DoConstruct *loop{&*loopConstruct}; loop;) { - if (loop->IsDoNormal()) { - const parser::Name &itrVal{GetLoopIndex(loop)}; - if (itrVal.symbol) { - const auto *type{itrVal.symbol->GetType()}; - if (!type->IsNumeric(TypeCategory::Integer)) { - context_.Say(itrVal.source, - "The DO loop iteration" - " variable must be of the type integer."_err_en_US, - itrVal.ToString()); - } - } + for (const parser::DoConstruct *loop{x.GetNestedLoop()}; loop;) { + if (loop->IsDoNormal()) { + const parser::Name &itrVal{GetLoopIndex(loop)}; + if (itrVal.symbol) { + const auto *type{itrVal.symbol->GetType()}; + if (!type->IsNumeric(TypeCategory::Integer)) { + context_.Say(itrVal.source, + "The DO loop iteration" + " variable must be of the type integer."_err_en_US, + itrVal.ToString()); } - // Get the next DoConstruct if block is not empty. - const auto &block{std::get(loop->t)}; - const auto it{block.begin()}; - loop = it != block.end() ? parser::Unwrap(*it) - : nullptr; } } + // Get the next DoConstruct if block is not empty. + const auto &block{std::get(loop->t)}; + const auto it{block.begin()}; + loop = + it != block.end() ? parser::Unwrap(*it) : nullptr; } } @@ -417,29 +401,23 @@ void OmpStructureChecker::CheckDistLinear( // Match the loop index variables with the collected symbols from linear // clauses. - auto &optLoopCons = std::get>(x.t); - if (optLoopCons.has_value()) { - if (const auto &loopConstruct{ - std::get_if(&*optLoopCons)}) { - for (const parser::DoConstruct *loop{&*loopConstruct}; loop;) { - if (loop->IsDoNormal()) { - const parser::Name &itrVal{GetLoopIndex(loop)}; - if (itrVal.symbol) { - // Remove the symbol from the collected set - indexVars.erase(&itrVal.symbol->GetUltimate()); - } - collapseVal--; - if (collapseVal == 0) { - break; - } - } - // Get the next DoConstruct if block is not empty. - const auto &block{std::get(loop->t)}; - const auto it{block.begin()}; - loop = it != block.end() ? parser::Unwrap(*it) - : nullptr; + for (const parser::DoConstruct *loop{x.GetNestedLoop()}; loop;) { + if (loop->IsDoNormal()) { + const parser::Name &itrVal{GetLoopIndex(loop)}; + if (itrVal.symbol) { + // Remove the symbol from the collected set + indexVars.erase(&itrVal.symbol->GetUltimate()); + } + collapseVal--; + if (collapseVal == 0) { + break; } } + // Get the next DoConstruct if block is not empty. + const auto &block{std::get(loop->t)}; + const auto it{block.begin()}; + loop = it != block.end() ? parser::Unwrap(*it) + : nullptr; } // Show error for the remaining variables diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 1b9852fb89486..4b3c0903b95ec 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -2047,13 +2047,9 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { SetContextAssociatedLoopLevel(GetNumAffectedLoopsFromLoopConstruct(x)); if (beginName.v == llvm::omp::Directive::OMPD_do) { - auto &optLoopCons = std::get>(x.t); - if (optLoopCons.has_value()) { - if (const auto &doConstruct{ - std::get_if(&*optLoopCons)}) { - if (doConstruct->IsDoWhile()) { - return true; - } + if (const parser::DoConstruct *doConstruct{x.GetNestedLoop()}) { + if (doConstruct->IsDoWhile()) { + return true; } } } @@ -2210,18 +2206,8 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromInnerLoopContruct( const parser::OpenMPLoopConstruct &x, llvm::SmallVector &levels, llvm::SmallVector &clauses) { - - const auto &nestedOptional = - std::get>(x.t); - assert(nestedOptional.has_value() && - "Expected a DoConstruct or OpenMPLoopConstruct"); - const auto *innerConstruct = - std::get_if>( - &(nestedOptional.value())); - - if (innerConstruct) { - CollectNumAffectedLoopsFromLoopConstruct( - innerConstruct->value(), levels, clauses); + if (auto *innerConstruct{x.GetNestedConstruct()}) { + CollectNumAffectedLoopsFromLoopConstruct(*innerConstruct, levels, clauses); } } @@ -2286,24 +2272,12 @@ void OmpAttributeVisitor::CheckPerfectNestAndRectangularLoop( // Find the associated region by skipping nested loop-associated constructs // such as loop transformations - const parser::NestedConstruct *innermostAssocRegion{nullptr}; const parser::OpenMPLoopConstruct *innermostConstruct{&x}; - while (const auto &innerAssocStmt{ - std::get>( - innermostConstruct->t)}) { - innermostAssocRegion = &(innerAssocStmt.value()); - if (const auto *innerConstruct{ - std::get_if>( - innermostAssocRegion)}) { - innermostConstruct = &innerConstruct->value(); - } else { - break; - } + while (auto *nested{innermostConstruct->GetNestedConstruct()}) { + innermostConstruct = nested; } - if (!innermostAssocRegion) - return; - const auto &outer{std::get_if(innermostAssocRegion)}; + const auto *outer{innermostConstruct->GetNestedLoop()}; if (!outer) return; @@ -2398,61 +2372,51 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel( const parser::OmpClause *clause{GetAssociatedClause()}; bool hasCollapseClause{ clause ? (clause->Id() == llvm::omp::OMPC_collapse) : false}; - const parser::OpenMPLoopConstruct *innerMostLoop = &x; - const parser::NestedConstruct *innerMostNest = nullptr; - while (auto &optLoopCons{ - std::get>(innerMostLoop->t)}) { - innerMostNest = &(optLoopCons.value()); - if (const auto *innerLoop{ - std::get_if>( - innerMostNest)}) { - innerMostLoop = &(innerLoop->value()); - } else - break; - } - if (innerMostNest) { - if (const auto &outer{std::get_if(innerMostNest)}) { - for (const parser::DoConstruct *loop{&*outer}; loop && level > 0; - --level) { - if (loop->IsDoConcurrent()) { - // DO CONCURRENT is explicitly allowed for the LOOP construct so long - // as there isn't a COLLAPSE clause - if (isLoopConstruct) { - if (hasCollapseClause) { - // hasCollapseClause implies clause != nullptr - context_.Say(clause->source, - "DO CONCURRENT loops cannot be used with the COLLAPSE clause."_err_en_US); - } - } else { - auto &stmt = - std::get>(loop->t); - context_.Say(stmt.source, - "DO CONCURRENT loops cannot form part of a loop nest."_err_en_US); + const parser::OpenMPLoopConstruct *innerMostNest = &x; + while (auto *nested{innerMostNest->GetNestedConstruct()}) { + innerMostNest = nested; + } + + if (const auto *outer{innerMostNest->GetNestedLoop()}) { + for (const parser::DoConstruct *loop{&*outer}; loop && level > 0; --level) { + if (loop->IsDoConcurrent()) { + // DO CONCURRENT is explicitly allowed for the LOOP construct so long + // as there isn't a COLLAPSE clause + if (isLoopConstruct) { + if (hasCollapseClause) { + // hasCollapseClause implies clause != nullptr + context_.Say(clause->source, + "DO CONCURRENT loops cannot be used with the COLLAPSE clause."_err_en_US); } + } else { + auto &stmt = + std::get>(loop->t); + context_.Say(stmt.source, + "DO CONCURRENT loops cannot form part of a loop nest."_err_en_US); } - // go through all the nested do-loops and resolve index variables - const parser::Name *iv{GetLoopIndex(*loop)}; - if (iv) { - if (auto *symbol{ResolveOmp(*iv, ivDSA, currScope())}) { - SetSymbolDSA(*symbol, {Symbol::Flag::OmpPreDetermined, ivDSA}); - iv->symbol = symbol; // adjust the symbol within region - AddToContextObjectWithDSA(*symbol, ivDSA); - } - - const auto &block{std::get(loop->t)}; - const auto it{block.begin()}; - loop = it != block.end() ? GetDoConstructIf(*it) : nullptr; + } + // go through all the nested do-loops and resolve index variables + const parser::Name *iv{GetLoopIndex(*loop)}; + if (iv) { + if (auto *symbol{ResolveOmp(*iv, ivDSA, currScope())}) { + SetSymbolDSA(*symbol, {Symbol::Flag::OmpPreDetermined, ivDSA}); + iv->symbol = symbol; // adjust the symbol within region + AddToContextObjectWithDSA(*symbol, ivDSA); } + + const auto &block{std::get(loop->t)}; + const auto it{block.begin()}; + loop = it != block.end() ? GetDoConstructIf(*it) : nullptr; } - CheckAssocLoopLevel(level, GetAssociatedClause()); - } else { - context_.Say(GetContext().directiveSource, - "A DO loop must follow the %s directive"_err_en_US, - parser::ToUpperCaseLetters( - llvm::omp::getOpenMPDirectiveName(GetContext().directive, version) - .str())); } + CheckAssocLoopLevel(level, GetAssociatedClause()); + } else { + context_.Say(GetContext().directiveSource, + "A DO loop must follow the %s directive"_err_en_US, + parser::ToUpperCaseLetters( + llvm::omp::getOpenMPDirectiveName(GetContext().directive, version) + .str())); } } diff --git a/flang/lib/Semantics/rewrite-parse-tree.cpp b/flang/lib/Semantics/rewrite-parse-tree.cpp index 5b7dab309eda7..b5a07680a3377 100644 --- a/flang/lib/Semantics/rewrite-parse-tree.cpp +++ b/flang/lib/Semantics/rewrite-parse-tree.cpp @@ -195,18 +195,16 @@ void RewriteMutator::OpenMPSimdOnly( ++it; continue; } - auto &nest = - std::get>(ompLoop->t); - if (auto *doConstruct = - std::get_if(&nest.value())) { + const_cast(ompLoop->GetNestedLoop())) { auto &loopBody = std::get(doConstruct->t); // We can only remove some constructs from a loop when it's _not_ a // OpenMP simd loop - OpenMPSimdOnly(loopBody, /*isNonSimdLoopBody=*/true); - auto newDoConstruct = std::move(*doConstruct); + OpenMPSimdOnly(const_cast(loopBody), + /*isNonSimdLoopBody=*/true); + auto newLoop = parser::ExecutionPartConstruct{ - parser::ExecutableConstruct{std::move(newDoConstruct)}}; + parser::ExecutableConstruct{std::move(*doConstruct)}}; it = block.erase(it); block.insert(it, std::move(newLoop)); continue; @@ -386,11 +384,8 @@ bool RewriteMutator::Pre(parser::OpenMPLoopConstruct &ompLoop) { // If we're looking at a non-simd OpenMP loop, we need to explicitly // call OpenMPSimdOnly on the nested loop block while indicating where // the block comes from. - auto &nest = std::get>(ompLoop.t); - if (!nest.has_value()) { - return true; - } - if (auto *doConstruct = std::get_if(&*nest)) { + if (auto *doConstruct = + const_cast(ompLoop.GetNestedLoop())) { auto &innerBlock = std::get(doConstruct->t); OpenMPSimdOnly(innerBlock, /*isNonSimdLoopBody=*/true); } diff --git a/flang/test/Integration/unroll-loops.f90 b/flang/test/Integration/unroll-loops.f90 index 87ab9efeb703b..2c4a3495eb0b7 100644 --- a/flang/test/Integration/unroll-loops.f90 +++ b/flang/test/Integration/unroll-loops.f90 @@ -25,7 +25,7 @@ subroutine unroll(a) ! NO-UNROLL-NEXT: %[[GEP:.*]] = getelementptr i64, ptr %[[ARG0]], i64 %[[IND]] ! NO-UNROLL-NEXT: store <2 x i64> %[[VIND]], ptr %[[GEP]] ! NO-UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %{{.*}}, 2 - ! NO-UNROLL-NEXT: %[[NVIND]] = add <2 x i64> %[[VIND]], splat (i64 2) + ! NO-UNROLL-NEXT: %[[NVIND]] = add nuw nsw <2 x i64> %[[VIND]], splat (i64 2) ! ! UNROLL-NEXT: %[[VIND1:.*]] = add <2 x i64> %[[VIND]], splat (i64 2) ! UNROLL-NEXT: %[[GEP0:.*]] = getelementptr i64, ptr %[[ARG0]], i64 %[[IND]] diff --git a/flang/test/Lower/HLFIR/unroll-loops.fir b/flang/test/Lower/HLFIR/unroll-loops.fir index 89e8ce82d6f3f..1ccb6b1bd0315 100644 --- a/flang/test/Lower/HLFIR/unroll-loops.fir +++ b/flang/test/Lower/HLFIR/unroll-loops.fir @@ -27,7 +27,7 @@ func.func @unroll(%arg0: !fir.ref> {fir.bindc_name = "a // NO-UNROLL-NEXT: %[[GEP:.*]] = getelementptr i64, ptr %[[ARG0]], i64 %[[IND]] // NO-UNROLL-NEXT: store <2 x i64> %[[VIND]], ptr %[[GEP]] // NO-UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %{{.*}}, 2 - // NO-UNROLL-NEXT: %[[NVIND]] = add <2 x i64> %[[VIND]], splat (i64 2) + // NO-UNROLL-NEXT: %[[NVIND]] = add nuw nsw <2 x i64> %[[VIND]], splat (i64 2) // UNROLL-NEXT: %[[VIND1:.*]] = add <2 x i64> %[[VIND]], splat (i64 2) // UNROLL-NEXT: %[[GEP0:.*]] = getelementptr i64, ptr %[[ARG0]], i64 %[[IND]] diff --git a/flang/test/Lower/OpenACC/Todo/do-loops-to-acc-loops-todo.f90 b/flang/test/Lower/OpenACC/Todo/do-loops-to-acc-loops-todo.f90 index aa1d44365e5eb..3f2b77a9a1484 100644 --- a/flang/test/Lower/OpenACC/Todo/do-loops-to-acc-loops-todo.f90 +++ b/flang/test/Lower/OpenACC/Todo/do-loops-to-acc-loops-todo.f90 @@ -72,9 +72,9 @@ subroutine nested_loop_with_inner_goto() integer :: ii = 0, jj = 0 integer, parameter :: nn = 3 real, dimension(nn, nn) :: aa - + aa = -1 - + ! Nested loop with goto from inner loop - unstructured control flow is not converted. !$acc kernels do ii = 1, nn @@ -88,4 +88,4 @@ subroutine nested_loop_with_inner_goto() ! CHECK4: not yet implemented: unstructured do loop in acc kernels -end subroutine \ No newline at end of file +end subroutine diff --git a/flang/test/Lower/OpenACC/acc-atomic-capture.f90 b/flang/test/Lower/OpenACC/acc-atomic-capture.f90 index 30e60e34b13a2..ccdd4d3014e94 100644 --- a/flang/test/Lower/OpenACC/acc-atomic-capture.f90 +++ b/flang/test/Lower/OpenACC/acc-atomic-capture.f90 @@ -36,7 +36,7 @@ program acc_atomic_capture_test !CHECK: } !$acc atomic capture - y = x * y + y = x * y x = y !$acc end atomic @@ -53,8 +53,8 @@ program acc_atomic_capture_test !$acc atomic capture x = y - y = 2 * 10 + (8 - x) - !$acc end atomic + y = 2 * 10 + (8 - x) + !$acc end atomic end program @@ -123,8 +123,8 @@ subroutine capture_with_convert_f32_to_i32() ! CHECK: } subroutine capture_with_convert_i32_to_f64() - real(8) :: x - integer :: v, u + real(8) :: x + integer :: v, u x = 1.0 v = 0 u = 1 diff --git a/flang/test/Lower/OpenACC/acc-atomic-update-array.f90 b/flang/test/Lower/OpenACC/acc-atomic-update-array.f90 index 184c2a6fb0aeb..b04b0c9d5a143 100644 --- a/flang/test/Lower/OpenACC/acc-atomic-update-array.f90 +++ b/flang/test/Lower/OpenACC/acc-atomic-update-array.f90 @@ -5,7 +5,7 @@ subroutine atomic_update_array1(r, n, x) integer :: n real :: r(n), x integer :: i - + !$acc data copy(r) !$acc parallel loop @@ -51,7 +51,7 @@ subroutine atomic_write_array1(r, n, x) implicit none integer :: n real :: r(n), x - + !$acc atomic write x = r(n) end subroutine @@ -61,7 +61,7 @@ subroutine atomic_write_array1(r, n, x) ! CHECK: %[[DECL_X:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %{{[0-9]+}} arg {{[0-9]+}} {uniq_name = "_QFatomic_write_array1Ex"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) ! CHECK: %[[DECL_R:.*]]:2 = hlfir.declare %[[ARG0]](%{{.*}}) dummy_scope %{{[0-9]+}} arg {{[0-9]+}} {uniq_name = "_QFatomic_write_array1Er"} : (!fir.ref>, !fir.shape<1>, !fir.dscope) -> (!fir.box>, !fir.ref>) ! CHECK: %[[DES:.*]] = hlfir.designate %[[DECL_R]]#0 (%{{.*}}) : (!fir.box>, i64) -> !fir.ref -! CHECK: %[[LOAD:.*]] = fir.load %[[DES]] : !fir.ref +! CHECK: %[[LOAD:.*]] = fir.load %[[DES]] : !fir.ref ! CHECK: acc.atomic.write %[[DECL_X]]#0 = %[[LOAD]] : !fir.ref, f32 subroutine atomic_capture_array1(r, n, x, y) diff --git a/flang/test/Lower/OpenACC/acc-atomic-update.f90 b/flang/test/Lower/OpenACC/acc-atomic-update.f90 index 71aa69fd64eba..f4c305a332640 100644 --- a/flang/test/Lower/OpenACC/acc-atomic-update.f90 +++ b/flang/test/Lower/OpenACC/acc-atomic-update.f90 @@ -42,7 +42,7 @@ end function func !CHECK: } !$acc atomic update - a = a + b + a = a + b !CHECK: {{.*}} = arith.constant 1 : i32 !CHECK: acc.atomic.update %[[Y_DECL]]#0 : !fir.ref { @@ -56,10 +56,10 @@ end function func !CHECK: %[[RESULT:.*]] = arith.muli %[[LOADED_X]], %[[ARG]] : i32 !CHECK: acc.yield %[[RESULT]] : i32 !CHECK: } - !$acc atomic + !$acc atomic y = y + 1 !$acc atomic update - z = x * z + z = x * z !CHECK: %[[C1_VAL:.*]] = arith.constant 1 : i32 !CHECK: acc.atomic.update %[[I1_DECL]]#0 : !fir.ref { diff --git a/flang/test/Lower/OpenACC/acc-bounds.f90 b/flang/test/Lower/OpenACC/acc-bounds.f90 index 44ca2514f6eea..03779ac0cfe51 100644 --- a/flang/test/Lower/OpenACC/acc-bounds.f90 +++ b/flang/test/Lower/OpenACC/acc-bounds.f90 @@ -114,7 +114,7 @@ subroutine acc_optional_data(a) !$acc data attach(a) !$acc end data end subroutine - + ! CHECK-LABEL: func.func @_QMopenacc_boundsPacc_optional_data( ! CHECK-SAME: %[[ARG0:.*]]: !fir.ref>>> {fir.bindc_name = "a", fir.optional}) { ! CHECK: %[[ARG0_DECL:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} arg {{[0-9]+}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QMopenacc_boundsFacc_optional_dataEa"} : (!fir.ref>>>, !fir.dscope) -> (!fir.ref>>>, !fir.ref>>>) diff --git a/flang/test/Lower/OpenACC/acc-host-data.f90 b/flang/test/Lower/OpenACC/acc-host-data.f90 index 4d09b25b983b9..2cf8060bcf8d1 100644 --- a/flang/test/Lower/OpenACC/acc-host-data.f90 +++ b/flang/test/Lower/OpenACC/acc-host-data.f90 @@ -26,7 +26,7 @@ subroutine acc_host_data() ! CHECK: acc.host_data dataOperands(%[[DA0]], %[[DA1]] : !fir.ref>, !fir.ref>) ! CHECK: } attributes {ifPresent} - !$acc host_data use_device(a) if_present + !$acc host_data use_device(a) if_present !$acc end host_data ! CHECK: acc.host_data dataOperands(%{{.*}}{{.*}} : !fir.ref>{{.*}}) { ! CHECK: } attributes {ifPresent} diff --git a/flang/test/Lower/OpenACC/acc-loop-exit.f90 b/flang/test/Lower/OpenACC/acc-loop-exit.f90 index 0b35a86c41b2e..6ab215fdbd842 100644 --- a/flang/test/Lower/OpenACC/acc-loop-exit.f90 +++ b/flang/test/Lower/OpenACC/acc-loop-exit.f90 @@ -11,7 +11,7 @@ subroutine sub1(x, a) end do i = 2 -end +end ! CHECK-LABEL: func.func @_QPsub1 ! CHECK: %[[A:.*]]:2 = hlfir.declare %arg1 dummy_scope %{{[0-9]+}} arg {{[0-9]+}} {uniq_name = "_QFsub1Ea"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) @@ -20,9 +20,9 @@ subroutine sub1(x, a) ! CHECK: %[[I:.*]]:2 = hlfir.declare %{{[0-9]+}} {uniq_name = "_QFsub1Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: ^bb{{.*}}: ! CHECK: ^bb{{.*}}: -! CHECK: %[[LOAD_I:.*]] = fir.load %[[I]]#0 : !fir.ref -! CHECK: %[[LOAD_I:.*]] = fir.load %[[I]]#0 : !fir.ref -! CHECK: %[[LOAD_A:.*]] = fir.load %[[A]]#0 : !fir.ref +! CHECK: %[[LOAD_I:.*]] = fir.load %[[I]]#0 : !fir.ref +! CHECK: %[[LOAD_I:.*]] = fir.load %[[I]]#0 : !fir.ref +! CHECK: %[[LOAD_A:.*]] = fir.load %[[A]]#0 : !fir.ref ! CHECK: %[[CMP:.*]] = arith.cmpi eq, %[[LOAD_I]], %[[LOAD_A]] : i32 ! CHECK: cf.cond_br %[[CMP]], ^[[EARLY_RET:.*]], ^[[NO_RET:.*]] ! CHECK: ^[[EARLY_RET]]: diff --git a/flang/test/Lower/OpenACC/acc-loop.f90 b/flang/test/Lower/OpenACC/acc-loop.f90 index f9f5e8c2165d5..b3fadbc8b388b 100644 --- a/flang/test/Lower/OpenACC/acc-loop.f90 +++ b/flang/test/Lower/OpenACC/acc-loop.f90 @@ -376,8 +376,8 @@ subroutine sub1(i, j, k) ! CHECK-SAME: %[[ARG_J:.*]]: !fir.ref {fir.bindc_name = "j"} ! CHECK-SAME: %[[ARG_K:.*]]: !fir.ref {fir.bindc_name = "k"} ! CHECK: %[[DC_I:.*]]:2 = hlfir.declare %[[ARG_I]] dummy_scope %0 -! CHECK: %[[DC_J:.*]]:2 = hlfir.declare %[[ARG_J]] dummy_scope %0 -! CHECK: %[[DC_K:.*]]:2 = hlfir.declare %[[ARG_K]] dummy_scope %0 +! CHECK: %[[DC_J:.*]]:2 = hlfir.declare %[[ARG_J]] dummy_scope %0 +! CHECK: %[[DC_K:.*]]:2 = hlfir.declare %[[ARG_K]] dummy_scope %0 ! CHECK: acc.parallel combined(loop) ! CHECK: %[[P_I:.*]] = acc.private varPtr(%[[DC_I]]#0 : !fir.ref) -> !fir.ref {implicit = true, name = "i"} ! CHECK: %[[P_J:.*]] = acc.private varPtr(%[[DC_J]]#0 : !fir.ref) -> !fir.ref {implicit = true, name = "j"} diff --git a/flang/test/Lower/OpenACC/acc-private.f90 b/flang/test/Lower/OpenACC/acc-private.f90 index 10d103c84f8de..ea12da7a5a99f 100644 --- a/flang/test/Lower/OpenACC/acc-private.f90 +++ b/flang/test/Lower/OpenACC/acc-private.f90 @@ -195,8 +195,8 @@ ! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_ref_i32 : !fir.ref init { ! CHECK: ^bb0(%{{.*}}: !fir.ref): ! CHECK: %[[ALLOCA:.*]] = fir.alloca i32 -! CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]] {uniq_name = "acc.private.init"} : (!fir.ref) -> (!fir.ref, !fir.ref) -! CHECK: acc.yield %[[DECLARE]]#0 : !fir.ref +! CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]] {uniq_name = "acc.private.init"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: acc.yield %[[DECLARE]]#0 : !fir.ref ! CHECK: } copy { ! CHECK: ^bb0(%[[SRC:.*]]: !fir.ref, %[[DST:.*]]: !fir.ref): ! CHECK: %[[VALUE:.*]] = fir.load %[[SRC]] : !fir.ref @@ -223,8 +223,8 @@ ! CHECK-LABEL: acc.private.recipe @privatization_ref_i32 : !fir.ref init { ! CHECK: ^bb0(%{{.*}}: !fir.ref): ! CHECK: %[[ALLOCA:.*]] = fir.alloca i32 -! CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]] {uniq_name = "acc.private.init"} : (!fir.ref) -> (!fir.ref, !fir.ref) -! CHECK: acc.yield %[[DECLARE]]#0 : !fir.ref +! CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]] {uniq_name = "acc.private.init"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: acc.yield %[[DECLARE]]#0 : !fir.ref ! CHECK: } program acc_private diff --git a/flang/test/Lower/OpenACC/acc-routine-named.f90 b/flang/test/Lower/OpenACC/acc-routine-named.f90 index de9784a1146cc..24d47e58b6e1b 100644 --- a/flang/test/Lower/OpenACC/acc-routine-named.f90 +++ b/flang/test/Lower/OpenACC/acc-routine-named.f90 @@ -14,7 +14,7 @@ module acc_routines subroutine acc1() end subroutine -! CHECK-LABEL: func.func @_QMacc_routinesPacc1() +! CHECK-LABEL: func.func @_QMacc_routinesPacc1() ! CHECK-SAME:attributes {acc.routine_info = #acc.routine_info<[@[[r1]]]>} subroutine acc2() diff --git a/flang/test/Lower/OpenACC/acc-routine.f90 b/flang/test/Lower/OpenACC/acc-routine.f90 index 1a63b4120235c..c281ca5dfc287 100644 --- a/flang/test/Lower/OpenACC/acc-routine.f90 +++ b/flang/test/Lower/OpenACC/acc-routine.f90 @@ -127,13 +127,13 @@ subroutine acc_routine16() end subroutine subroutine acc_routine17() - !$acc routine device_type(host) worker dtype(multicore) vector + !$acc routine device_type(host) worker dtype(multicore) vector end subroutine subroutine acc_routine18() - !$acc routine device_type(host) bind(acc_routine17) dtype(multicore) bind(acc_routine16) + !$acc routine device_type(host) bind(acc_routine17) dtype(multicore) bind(acc_routine16) end subroutine subroutine acc_routine19() - !$acc routine device_type(host,default) bind(acc_routine17) dtype(multicore) bind(acc_routine16) + !$acc routine device_type(host,default) bind(acc_routine17) dtype(multicore) bind(acc_routine16) end subroutine diff --git a/flang/test/Lower/OpenACC/acc-routine02.f90 b/flang/test/Lower/OpenACC/acc-routine02.f90 index 1c15cb409e634..dd07cba4b20e3 100644 --- a/flang/test/Lower/OpenACC/acc-routine02.f90 +++ b/flang/test/Lower/OpenACC/acc-routine02.f90 @@ -17,4 +17,4 @@ program test ! CHECK-LABEL: acc.routine @acc_routine_0 func(@_QPsub1) -! CHECK: func.func @_QPsub1(%ar{{.*}}: !fir.ref> {fir.bindc_name = "a"}, %arg1: !fir.ref {fir.bindc_name = "n"}) attributes {acc.routine_info = #acc.routine_info<[@acc_routine_0]>} +! CHECK: func.func @_QPsub1(%ar{{.*}}: !fir.ref> {fir.bindc_name = "a"}, %arg1: !fir.ref {fir.bindc_name = "n"}) attributes {acc.routine_info = #acc.routine_info<[@acc_routine_0]>} diff --git a/flang/test/Lower/OpenACC/acc-routine03.f90 b/flang/test/Lower/OpenACC/acc-routine03.f90 index ddd6bda0367e4..3fc307746849f 100644 --- a/flang/test/Lower/OpenACC/acc-routine03.f90 +++ b/flang/test/Lower/OpenACC/acc-routine03.f90 @@ -20,7 +20,7 @@ subroutine sub1(a) !$acc routine worker bind(sub2) real :: a(:) end subroutine - + subroutine sub2(a) !$acc routine worker nohost real :: a(:) diff --git a/flang/test/Lower/OpenACC/acc-routine04.f90 b/flang/test/Lower/OpenACC/acc-routine04.f90 index 655e2762b9694..470440728d2f5 100644 --- a/flang/test/Lower/OpenACC/acc-routine04.f90 +++ b/flang/test/Lower/OpenACC/acc-routine04.f90 @@ -14,17 +14,17 @@ subroutine sub1(i) program test_acc_routine use dummy_mod - + !$acc routine(sub2) seq - + implicit none - + integer :: i contains subroutine sub2() end subroutine - + end program ! CHECK: acc.routine @acc_routine_1 func(@_QFPsub2) seq diff --git a/flang/test/Lower/OpenACC/acc-shutdown.f90 b/flang/test/Lower/OpenACC/acc-shutdown.f90 index 304dd4fae6db5..de6191d7f0cd2 100644 --- a/flang/test/Lower/OpenACC/acc-shutdown.f90 +++ b/flang/test/Lower/OpenACC/acc-shutdown.f90 @@ -23,9 +23,9 @@ subroutine acc_shutdown !$acc shutdown device_num(1) device_type(default, nvidia) !CHECK: [[DEVNUM:%.*]] = arith.constant 1 : i32 -!CHECK: acc.shutdown device_num([[DEVNUM]] : i32) attributes {device_types = [#acc.device_type, #acc.device_type]} +!CHECK: acc.shutdown device_num([[DEVNUM]] : i32) attributes {device_types = [#acc.device_type, #acc.device_type]} !$acc shutdown device_type(default) device_type(nvidia) -!CHECK: acc.shutdown attributes {device_types = [#acc.device_type, #acc.device_type]} +!CHECK: acc.shutdown attributes {device_types = [#acc.device_type, #acc.device_type]} end subroutine acc_shutdown diff --git a/flang/test/Lower/OpenACC/acc-terminator.f90 b/flang/test/Lower/OpenACC/acc-terminator.f90 index 53ae1a5e54675..16100f9e36cd5 100644 --- a/flang/test/Lower/OpenACC/acc-terminator.f90 +++ b/flang/test/Lower/OpenACC/acc-terminator.f90 @@ -17,7 +17,7 @@ program main !$acc data copyin(a(:,:,i),b(:,:,i),c(:,:,i)) copyout(c2(:,:,i)) !$acc host_data use_device(a(:,:,i),b(:,:,i),c(:,:,i)) - + !$acc end host_data if ( stat .ne. 0 ) then diff --git a/flang/test/Lower/OpenACC/acc-use-device.f90 b/flang/test/Lower/OpenACC/acc-use-device.f90 index 30fefdb44a2bf..4f9ed2d70b3ec 100644 --- a/flang/test/Lower/OpenACC/acc-use-device.f90 +++ b/flang/test/Lower/OpenACC/acc-use-device.f90 @@ -9,7 +9,7 @@ subroutine test() ! CHECK: %[[A0:.*]] = fir.alloca !fir.array, %{{.*}} {bindc_name = "b", uniq_name = "_QFtestEb"} ! CHECK: %[[A1:.*]] = fir.shape_shift {{.*}} : (index, index) -> !fir.shapeshift<1> ! CHECK: %[[A:.*]]:2 = hlfir.declare %[[A0]](%[[A1]]) {uniq_name = "_QFtestEb"} : (!fir.ref>, !fir.shapeshift<1>) -> (!fir.box>, !fir.ref>) - + !$acc data copy(b) ! CHECK: %[[B:.*]] = acc.copyin var(%[[A]]#0 : !fir.box>) -> !fir.box> {dataClause = #acc, name = "b"} ! CHECK: acc.data dataOperands(%[[B]] : !fir.box>) { @@ -23,7 +23,7 @@ subroutine test() ! CHECK: fir.call @_QPvadd(%[[A]]#1) fastmath : (!fir.ref>) -> () !$acc end data ! CHECK: acc.copyout accVar(%[[B]] : !fir.box>) to var(%[[A]]#0 : !fir.box>) {dataClause = #acc, name = "b"} -end +end ! Test for allocatable, pointer and assumed-shape variables appearing in use_device clause. subroutine test2(a, b, c) diff --git a/flang/test/Lower/OpenACC/locations.f90 b/flang/test/Lower/OpenACC/locations.f90 index 69873b3fbca4f..8e00721ba2b4d 100644 --- a/flang/test/Lower/OpenACC/locations.f90 +++ b/flang/test/Lower/OpenACC/locations.f90 @@ -114,7 +114,7 @@ subroutine if_clause_expr_location(arr) subroutine atomic_read_loc() integer(4) :: x integer(8) :: y - + !$acc atomic read y = x end @@ -123,10 +123,10 @@ subroutine atomic_read_loc() subroutine atomic_capture_loc() implicit none integer :: k, v, i - + k = 1 v = 0 - + !$acc atomic capture v = k k = (i + 1) * 3.14 @@ -142,13 +142,13 @@ subroutine atomic_capture_loc() subroutine atomic_update_loc() implicit none integer :: x, y, z - - !$acc atomic + + !$acc atomic y = y + 1 ! CHECK: acc.atomic.update %{{.*}} : !fir.ref { ! CHECK: ^bb0(%{{.*}}: i32 loc("{{.*}}locations.f90":142:3)): ! CHECK: } loc("{{.*}}locations.f90":142:3) - + !$acc atomic update z = x * z end subroutine @@ -183,5 +183,3 @@ subroutine data_end_locations(arr) !CHECK-SAME: loc("{{.*}}locations.f90":181:11) end subroutine end module - - diff --git a/flang/test/Parser/OpenMP/bind-clause.f90 b/flang/test/Parser/OpenMP/bind-clause.f90 index a4fb3aa66c1c8..6910ffbba204f 100644 --- a/flang/test/Parser/OpenMP/bind-clause.f90 +++ b/flang/test/Parser/OpenMP/bind-clause.f90 @@ -22,5 +22,5 @@ subroutine f00 !PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = loop !PARSE-TREE: | | OmpClauseList -> OmpClause -> Bind -> OmpBindClause -> Binding = Parallel !PARSE-TREE: | | Flags = None -!PARSE-TREE: | DoConstruct +!PARSE-TREE: | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct diff --git a/flang/test/Parser/OpenMP/declare-reduction-multi.f90 b/flang/test/Parser/OpenMP/declare-reduction-multi.f90 index 88566613bd412..f8104254aa6b1 100644 --- a/flang/test/Parser/OpenMP/declare-reduction-multi.f90 +++ b/flang/test/Parser/OpenMP/declare-reduction-multi.f90 @@ -198,7 +198,8 @@ program omp_examples !PARSE-TREE: | | | Modifier -> OmpReductionIdentifier -> DefinedOperator -> IntrinsicOperator = Add !PARSE-TREE: | | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'sum' !PARSE-TREE: | | Flags = None -!PARSE-TREE: | DoConstruct +!PARSE-TREE: | Block +!PARSE-TREE: | | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct do i = 1, n sum%r = sum%r + values(i)%r @@ -215,7 +216,8 @@ program omp_examples !PARSE-TREE: | | | Modifier -> OmpReductionIdentifier -> DefinedOperator -> IntrinsicOperator = Multiply !PARSE-TREE: | | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'prod' !PARSE-TREE: | | Flags = None -!PARSE-TREE: | DoConstruct +!PARSE-TREE: | Block +!PARSE-TREE: | | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct do i = 1, n prod%r = prod%r * (values(i)%r+0.6) @@ -232,7 +234,8 @@ program omp_examples !PARSE-TREE: | | | Modifier -> OmpReductionIdentifier -> ProcedureDesignator -> Name = 'max' !PARSE-TREE: | | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'big' !PARSE-TREE: | | Flags = None -!PARSE-TREE: | DoConstruct +!PARSE-TREE: | Block +!PARSE-TREE: | | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct do i = 1, n big = mymax(values(i), big) @@ -249,7 +252,8 @@ program omp_examples !PARSE-TREE: | | | Modifier -> OmpReductionIdentifier -> ProcedureDesignator -> Name = 'min' !PARSE-TREE: | | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'small' !PARSE-TREE: | | Flags = None -!PARSE-TREE: | DoConstruct +!PARSE-TREE: | Block +!PARSE-TREE: | | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct do i = 1, n small%r = min(values(i)%r, small%r) diff --git a/flang/test/Parser/OpenMP/declare-reduction-unparse.f90 b/flang/test/Parser/OpenMP/declare-reduction-unparse.f90 index 7897eb0fb46f0..31431f5d20c45 100644 --- a/flang/test/Parser/OpenMP/declare-reduction-unparse.f90 +++ b/flang/test/Parser/OpenMP/declare-reduction-unparse.f90 @@ -70,7 +70,8 @@ end subroutine initme !PARSE-TREE: | | | Modifier -> OmpReductionIdentifier -> ProcedureDesignator -> Name = 'red_add' !PARSE-TREE: | | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'res' !PARSE-TREE: | | Flags = None -!PARSE-TREE: | DoConstruct +!PARSE-TREE: | Block +!PARSE-TREE: | | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct do i=1,n res=res+x(i) diff --git a/flang/test/Parser/OpenMP/do-tile-size.f90 b/flang/test/Parser/OpenMP/do-tile-size.f90 index 9ba6a3a6c2c41..b8d175c236bf9 100644 --- a/flang/test/Parser/OpenMP/do-tile-size.f90 +++ b/flang/test/Parser/OpenMP/do-tile-size.f90 @@ -21,9 +21,11 @@ subroutine openmp_do_tiles(x) !PARSE-TREE:| | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct !PARSE-TREE:| | | OmpBeginLoopDirective -!PARSE-TREE:| | | OpenMPLoopConstruct -!PARSE-TREE:| | | | OmpBeginLoopDirective -!PARSE-TREE:| | | | | OmpDirectiveName -> llvm::omp::Directive = tile -!PARSE-TREE:| | | | | OmpClauseList -> OmpClause -> Sizes -> Scalar -> Integer -> Expr = '2_4' -!PARSE-TREE: | | | | DoConstruct +!PARSE-TREE:| | | Block +!PARSE-TREE:| | | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct +!PARSE-TREE:| | | | | OmpBeginLoopDirective +!PARSE-TREE:| | | | | | OmpDirectiveName -> llvm::omp::Directive = tile +!PARSE-TREE:| | | | | | OmpClauseList -> OmpClause -> Sizes -> Scalar -> Integer -> Expr = '2_4' +!PARSE-TREE:| | | | | Block +!PARSE-TREE:| | | | | | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct END subroutine openmp_do_tiles diff --git a/flang/test/Parser/OpenMP/loop-transformation-construct01.f90 b/flang/test/Parser/OpenMP/loop-transformation-construct01.f90 index 9595889b1bf98..8b314d8d823db 100644 --- a/flang/test/Parser/OpenMP/loop-transformation-construct01.f90 +++ b/flang/test/Parser/OpenMP/loop-transformation-construct01.f90 @@ -24,40 +24,42 @@ subroutine loop_transformation_construct !CHECK-PARSE-NEXT: | | | | OmpDirectiveName -> llvm::omp::Directive = do !CHECK-PARSE-NEXT: | | | | OmpClauseList -> !CHECK-PARSE-NEXT: | | | | Flags = None -!CHECK-PARSE-NEXT: | | | OpenMPLoopConstruct -!CHECK-PARSE-NEXT: | | | | OmpBeginLoopDirective -!CHECK-PARSE-NEXT: | | | | | OmpDirectiveName -> llvm::omp::Directive = unroll -!CHECK-PARSE-NEXT: | | | | | OmpClauseList -> -!CHECK-PARSE-NEXT: | | | | | Flags = None -!CHECK-PARSE-NEXT: | | | | DoConstruct -!CHECK-PARSE-NEXT: | | | | | NonLabelDoStmt -!CHECK-PARSE-NEXT: | | | | | | LoopControl -> LoopBounds -!CHECK-PARSE-NEXT: | | | | | | | Scalar -> Name = 'i' -!CHECK-PARSE-NEXT: | | | | | | | Scalar -> Expr = '1_4' -!CHECK-PARSE-NEXT: | | | | | | | | LiteralConstant -> IntLiteralConstant = '1' -!CHECK-PARSE-NEXT: | | | | | | | Scalar -> Expr = 'i' -!CHECK-PARSE-NEXT: | | | | | | | | Designator -> DataRef -> Name = 'i' +!CHECK-PARSE-NEXT: | | | Block +!CHECK-PARSE-NEXT: | | | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct +!CHECK-PARSE-NEXT: | | | | | OmpBeginLoopDirective +!CHECK-PARSE-NEXT: | | | | | | OmpDirectiveName -> llvm::omp::Directive = unroll +!CHECK-PARSE-NEXT: | | | | | | OmpClauseList -> +!CHECK-PARSE-NEXT: | | | | | | Flags = None !CHECK-PARSE-NEXT: | | | | | Block -!CHECK-PARSE-NEXT: | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt = 'y(int(i,kind=8))=5_4*y(int(i,kind=8))' -!CHECK-PARSE-NEXT: | | | | | | | Variable = 'y(int(i,kind=8))' -!CHECK-PARSE-NEXT: | | | | | | | | Designator -> DataRef -> ArrayElement -!CHECK-PARSE-NEXT: | | | | | | | | | DataRef -> Name = 'y' -!CHECK-PARSE-NEXT: | | | | | | | | | SectionSubscript -> Integer -> Expr = 'i' +!CHECK-PARSE-NEXT: | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct +!CHECK-PARSE-NEXT: | | | | | | | NonLabelDoStmt +!CHECK-PARSE-NEXT: | | | | | | | | LoopControl -> LoopBounds +!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Name = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Expr = '1_4' +!CHECK-PARSE-NEXT: | | | | | | | | | | LiteralConstant -> IntLiteralConstant = '1' +!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Expr = 'i' !CHECK-PARSE-NEXT: | | | | | | | | | | Designator -> DataRef -> Name = 'i' -!CHECK-PARSE-NEXT: | | | | | | | Expr = '5_4*y(int(i,kind=8))' -!CHECK-PARSE-NEXT: | | | | | | | | Multiply -!CHECK-PARSE-NEXT: | | | | | | | | | Expr = 'y(int(i,kind=8))' +!CHECK-PARSE-NEXT: | | | | | | | Block +!CHECK-PARSE-NEXT: | | | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt = 'y(int(i,kind=8))=5_4*y(int(i,kind=8))' +!CHECK-PARSE-NEXT: | | | | | | | | | Variable = 'y(int(i,kind=8))' !CHECK-PARSE-NEXT: | | | | | | | | | | Designator -> DataRef -> ArrayElement !CHECK-PARSE-NEXT: | | | | | | | | | | | DataRef -> Name = 'y' !CHECK-PARSE-NEXT: | | | | | | | | | | | SectionSubscript -> Integer -> Expr = 'i' !CHECK-PARSE-NEXT: | | | | | | | | | | | | Designator -> DataRef -> Name = 'i' -!CHECK-PARSE-NEXT: | | | | | | | | | Expr = '5_4' -!CHECK-PARSE-NEXT: | | | | | | | | | | LiteralConstant -> IntLiteralConstant = '5' -!CHECK-PARSE-NEXT: | | | | | EndDoStmt -> -!CHECK-PARSE-NEXT: | | | | OmpEndLoopDirective -!CHECK-PARSE-NEXT: | | | | | OmpDirectiveName -> llvm::omp::Directive = unroll -!CHECK-PARSE-NEXT: | | | | | OmpClauseList -> -!CHECK-PARSE-NEXT: | | | | | Flags = None +!CHECK-PARSE-NEXT: | | | | | | | | | Expr = '5_4*y(int(i,kind=8))' +!CHECK-PARSE-NEXT: | | | | | | | | | | Multiply +!CHECK-PARSE-NEXT: | | | | | | | | | | | Expr = 'y(int(i,kind=8))' +!CHECK-PARSE-NEXT: | | | | | | | | | | | | Designator -> DataRef -> ArrayElement +!CHECK-PARSE-NEXT: | | | | | | | | | | | | | DataRef -> Name = 'y' +!CHECK-PARSE-NEXT: | | | | | | | | | | | | | SectionSubscript -> Integer -> Expr = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | | | | | | | Designator -> DataRef -> Name = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | | | | Expr = '5_4' +!CHECK-PARSE-NEXT: | | | | | | | | | | | | LiteralConstant -> IntLiteralConstant = '5' +!CHECK-PARSE-NEXT: | | | | | | | EndDoStmt -> +!CHECK-PARSE-NEXT: | | | | | OmpEndLoopDirective +!CHECK-PARSE-NEXT: | | | | | | OmpDirectiveName -> llvm::omp::Directive = unroll +!CHECK-PARSE-NEXT: | | | | | | OmpClauseList -> +!CHECK-PARSE-NEXT: | | | | | | Flags = None !CHECK-PARSE-NEXT: | | | OmpEndLoopDirective !CHECK-PARSE-NEXT: | | | | OmpDirectiveName -> llvm::omp::Directive = do !CHECK-PARSE-NEXT: | | | | OmpClauseList -> diff --git a/flang/test/Parser/OpenMP/loop-transformation-construct02.f90 b/flang/test/Parser/OpenMP/loop-transformation-construct02.f90 index a876c77a274b5..5b5b591b35f8f 100644 --- a/flang/test/Parser/OpenMP/loop-transformation-construct02.f90 +++ b/flang/test/Parser/OpenMP/loop-transformation-construct02.f90 @@ -26,50 +26,53 @@ subroutine loop_transformation_construct !CHECK-PARSE-NEXT: | | | | OmpDirectiveName -> llvm::omp::Directive = do !CHECK-PARSE-NEXT: | | | | OmpClauseList -> !CHECK-PARSE-NEXT: | | | | Flags = None -!CHECK-PARSE-NEXT: | | | OpenMPLoopConstruct -!CHECK-PARSE-NEXT: | | | | OmpBeginLoopDirective -!CHECK-PARSE-NEXT: | | | | | OmpDirectiveName -> llvm::omp::Directive = unroll -!CHECK-PARSE-NEXT: | | | | | OmpClauseList -> -!CHECK-PARSE-NEXT: | | | | | Flags = None -!CHECK-PARSE-NEXT: | | | | OpenMPLoopConstruct +!CHECK-PARSE-NEXT: | | | Block +!CHECK-PARSE-NEXT: | | | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct !CHECK-PARSE-NEXT: | | | | | OmpBeginLoopDirective -!CHECK-PARSE-NEXT: | | | | | | OmpDirectiveName -> llvm::omp::Directive = tile -!CHECK-PARSE-NEXT: | | | | | | OmpClauseList -> OmpClause -> Sizes -> Scalar -> Integer -> Expr = '2_4' -!CHECK-PARSE-NEXT: | | | | | | | LiteralConstant -> IntLiteralConstant = '2' +!CHECK-PARSE-NEXT: | | | | | | OmpDirectiveName -> llvm::omp::Directive = unroll +!CHECK-PARSE-NEXT: | | | | | | OmpClauseList -> !CHECK-PARSE-NEXT: | | | | | | Flags = None -!CHECK-PARSE-NEXT: | | | | | DoConstruct -!CHECK-PARSE-NEXT: | | | | | | NonLabelDoStmt -!CHECK-PARSE-NEXT: | | | | | | | LoopControl -> LoopBounds -!CHECK-PARSE-NEXT: | | | | | | | | Scalar -> Name = 'i' -!CHECK-PARSE-NEXT: | | | | | | | | Scalar -> Expr = '1_4' -!CHECK-PARSE-NEXT: | | | | | | | | | LiteralConstant -> IntLiteralConstant = '1' -!CHECK-PARSE-NEXT: | | | | | | | | Scalar -> Expr = 'i' -!CHECK-PARSE-NEXT: | | | | | | | | | Designator -> DataRef -> Name = 'i' -!CHECK-PARSE-NEXT: | | | | | | Block -!CHECK-PARSE-NEXT: | | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt = 'y(int(i,kind=8))=5_4*y(int(i,kind=8))' -!CHECK-PARSE-NEXT: | | | | | | | | Variable = 'y(int(i,kind=8))' -!CHECK-PARSE-NEXT: | | | | | | | | | Designator -> DataRef -> ArrayElement -!CHECK-PARSE-NEXT: | | | | | | | | | | DataRef -> Name = 'y' -!CHECK-PARSE-NEXT: | | | | | | | | | | SectionSubscript -> Integer -> Expr = 'i' -!CHECK-PARSE-NEXT: | | | | | | | | | | | Designator -> DataRef -> Name = 'i' -!CHECK-PARSE-NEXT: | | | | | | | | Expr = '5_4*y(int(i,kind=8))' -!CHECK-PARSE-NEXT: | | | | | | | | | Multiply -!CHECK-PARSE-NEXT: | | | | | | | | | | Expr = 'y(int(i,kind=8))' -!CHECK-PARSE-NEXT: | | | | | | | | | | | Designator -> DataRef -> ArrayElement -!CHECK-PARSE-NEXT: | | | | | | | | | | | | DataRef -> Name = 'y' -!CHECK-PARSE-NEXT: | | | | | | | | | | | | SectionSubscript -> Integer -> Expr = 'i' -!CHECK-PARSE-NEXT: | | | | | | | | | | | | | Designator -> DataRef -> Name = 'i' -!CHECK-PARSE-NEXT: | | | | | | | | | | Expr = '5_4' -!CHECK-PARSE-NEXT: | | | | | | | | | | | LiteralConstant -> IntLiteralConstant = '5' -!CHECK-PARSE-NEXT: | | | | | | EndDoStmt -> +!CHECK-PARSE-NEXT: | | | | | Block +!CHECK-PARSE-NEXT: | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct +!CHECK-PARSE-NEXT: | | | | | | | OmpBeginLoopDirective +!CHECK-PARSE-NEXT: | | | | | | | | OmpDirectiveName -> llvm::omp::Directive = tile +!CHECK-PARSE-NEXT: | | | | | | | | OmpClauseList -> OmpClause -> Sizes -> Scalar -> Integer -> Expr = '2_4' +!CHECK-PARSE-NEXT: | | | | | | | | | LiteralConstant -> IntLiteralConstant = '2' +!CHECK-PARSE-NEXT: | | | | | | | | Flags = None +!CHECK-PARSE-NEXT: | | | | | | | Block +!CHECK-PARSE-NEXT: | | | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct +!CHECK-PARSE-NEXT: | | | | | | | | | NonLabelDoStmt +!CHECK-PARSE-NEXT: | | | | | | | | | | LoopControl -> LoopBounds +!CHECK-PARSE-NEXT: | | | | | | | | | | | Scalar -> Name = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | | | | Scalar -> Expr = '1_4' +!CHECK-PARSE-NEXT: | | | | | | | | | | | | LiteralConstant -> IntLiteralConstant = '1' +!CHECK-PARSE-NEXT: | | | | | | | | | | | Scalar -> Expr = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | | | | | Designator -> DataRef -> Name = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | | Block +!CHECK-PARSE-NEXT: | | | | | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt = 'y(int(i,kind=8))=5_4*y(int(i,kind=8))' +!CHECK-PARSE-NEXT: | | | | | | | | | | | Variable = 'y(int(i,kind=8))' +!CHECK-PARSE-NEXT: | | | | | | | | | | | | Designator -> DataRef -> ArrayElement +!CHECK-PARSE-NEXT: | | | | | | | | | | | | | DataRef -> Name = 'y' +!CHECK-PARSE-NEXT: | | | | | | | | | | | | | SectionSubscript -> Integer -> Expr = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | | | | | | | Designator -> DataRef -> Name = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | | | | Expr = '5_4*y(int(i,kind=8))' +!CHECK-PARSE-NEXT: | | | | | | | | | | | | Multiply +!CHECK-PARSE-NEXT: | | | | | | | | | | | | | Expr = 'y(int(i,kind=8))' +!CHECK-PARSE-NEXT: | | | | | | | | | | | | | | Designator -> DataRef -> ArrayElement +!CHECK-PARSE-NEXT: | | | | | | | | | | | | | | | DataRef -> Name = 'y' +!CHECK-PARSE-NEXT: | | | | | | | | | | | | | | | SectionSubscript -> Integer -> Expr = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | | | | | | | | | Designator -> DataRef -> Name = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | | | | | | Expr = '5_4' +!CHECK-PARSE-NEXT: | | | | | | | | | | | | | | LiteralConstant -> IntLiteralConstant = '5' +!CHECK-PARSE-NEXT: | | | | | | | | | EndDoStmt -> +!CHECK-PARSE-NEXT: | | | | | | | OmpEndLoopDirective +!CHECK-PARSE-NEXT: | | | | | | | | OmpDirectiveName -> llvm::omp::Directive = tile +!CHECK-PARSE-NEXT: | | | | | | | | OmpClauseList -> +!CHECK-PARSE-NEXT: | | | | | | | | Flags = None !CHECK-PARSE-NEXT: | | | | | OmpEndLoopDirective -!CHECK-PARSE-NEXT: | | | | | | OmpDirectiveName -> llvm::omp::Directive = tile +!CHECK-PARSE-NEXT: | | | | | | OmpDirectiveName -> llvm::omp::Directive = unroll !CHECK-PARSE-NEXT: | | | | | | OmpClauseList -> !CHECK-PARSE-NEXT: | | | | | | Flags = None -!CHECK-PARSE-NEXT: | | | | OmpEndLoopDirective -!CHECK-PARSE-NEXT: | | | | | OmpDirectiveName -> llvm::omp::Directive = unroll -!CHECK-PARSE-NEXT: | | | | | OmpClauseList -> -!CHECK-PARSE-NEXT: | | | | | Flags = None !CHECK-PARSE-NEXT: | | | OmpEndLoopDirective !CHECK-PARSE-NEXT: | | | | OmpDirectiveName -> llvm::omp::Directive = do !CHECK-PARSE-NEXT: | | | | OmpClauseList -> diff --git a/flang/test/Parser/OpenMP/loop-transformation-construct03.f90 b/flang/test/Parser/OpenMP/loop-transformation-construct03.f90 index 8725025a51321..e431b6d535ff5 100644 --- a/flang/test/Parser/OpenMP/loop-transformation-construct03.f90 +++ b/flang/test/Parser/OpenMP/loop-transformation-construct03.f90 @@ -26,41 +26,42 @@ subroutine loop_transformation_construct7 !CHECK-PARSE-NEXT: | | | | | LiteralConstant -> IntLiteralConstant = '2' !CHECK-PARSE-NEXT: | | | | OmpClause -> Private -> OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'b' !CHECK-PARSE-NEXT: | | | | Flags = None -!CHECK-PARSE-NEXT: | | | DoConstruct -!CHECK-PARSE-NEXT: | | | | NonLabelDoStmt -!CHECK-PARSE-NEXT: | | | | | LoopControl -> LoopBounds -!CHECK-PARSE-NEXT: | | | | | | Scalar -> Name = 'b' -!CHECK-PARSE-NEXT: | | | | | | Scalar -> Expr = '1_4' -!CHECK-PARSE-NEXT: | | | | | | | LiteralConstant -> IntLiteralConstant = '1' -!CHECK-PARSE-NEXT: | | | | | | Scalar -> Expr = '10_4' -!CHECK-PARSE-NEXT: | | | | | | | LiteralConstant -> IntLiteralConstant = '10' -!CHECK-PARSE-NEXT: | | | | Block -!CHECK-PARSE-NEXT: | | | | | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct -!CHECK-PARSE-NEXT: | | | | | | NonLabelDoStmt -!CHECK-PARSE-NEXT: | | | | | | | LoopControl -> LoopBounds -!CHECK-PARSE-NEXT: | | | | | | | | Scalar -> Name = 'c' -!CHECK-PARSE-NEXT: | | | | | | | | Scalar -> Expr = '1_4' -!CHECK-PARSE-NEXT: | | | | | | | | | LiteralConstant -> IntLiteralConstant = '1' -!CHECK-PARSE-NEXT: | | | | | | | | Scalar -> Expr = '10_4' -!CHECK-PARSE-NEXT: | | | | | | | | | LiteralConstant -> IntLiteralConstant = '10' -!CHECK-PARSE-NEXT: | | | | | | Block -!CHECK-PARSE-NEXT: | | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt = 'a(int(b,kind=8),2_8)=a(int(c,kind=8),1_8)' -!CHECK-PARSE-NEXT: | | | | | | | | Variable = 'a(int(b,kind=8),2_8)' -!CHECK-PARSE-NEXT: | | | | | | | | | Designator -> DataRef -> ArrayElement -!CHECK-PARSE-NEXT: | | | | | | | | | | DataRef -> Name = 'a' -!CHECK-PARSE-NEXT: | | | | | | | | | | SectionSubscript -> Integer -> Expr = 'b' -!CHECK-PARSE-NEXT: | | | | | | | | | | | Designator -> DataRef -> Name = 'b' -!CHECK-PARSE-NEXT: | | | | | | | | | | SectionSubscript -> Integer -> Expr = '2_4' -!CHECK-PARSE-NEXT: | | | | | | | | | | | LiteralConstant -> IntLiteralConstant = '2' -!CHECK-PARSE-NEXT: | | | | | | | | Expr = 'a(int(c,kind=8),1_8)' -!CHECK-PARSE-NEXT: | | | | | | | | | Designator -> DataRef -> ArrayElement -!CHECK-PARSE-NEXT: | | | | | | | | | | DataRef -> Name = 'a' -!CHECK-PARSE-NEXT: | | | | | | | | | | SectionSubscript -> Integer -> Expr = 'c' -!CHECK-PARSE-NEXT: | | | | | | | | | | | Designator -> DataRef -> Name = 'c' -!CHECK-PARSE-NEXT: | | | | | | | | | | SectionSubscript -> Integer -> Expr = '1_4' -!CHECK-PARSE-NEXT: | | | | | | | | | | | LiteralConstant -> IntLiteralConstant = '1' -!CHECK-PARSE-NEXT: | | | | | | EndDoStmt -> -!CHECK-PARSE-NEXT: | | | | EndDoStmt -> +!CHECK-PARSE-NEXT: | | | Block +!CHECK-PARSE-NEXT: | | | | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct +!CHECK-PARSE-NEXT: | | | | | NonLabelDoStmt +!CHECK-PARSE-NEXT: | | | | | | LoopControl -> LoopBounds +!CHECK-PARSE-NEXT: | | | | | | | Scalar -> Name = 'b' +!CHECK-PARSE-NEXT: | | | | | | | Scalar -> Expr = '1_4' +!CHECK-PARSE-NEXT: | | | | | | | | LiteralConstant -> IntLiteralConstant = '1' +!CHECK-PARSE-NEXT: | | | | | | | Scalar -> Expr = '10_4' +!CHECK-PARSE-NEXT: | | | | | | | | LiteralConstant -> IntLiteralConstant = '10' +!CHECK-PARSE-NEXT: | | | | | Block +!CHECK-PARSE-NEXT: | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct +!CHECK-PARSE-NEXT: | | | | | | | NonLabelDoStmt +!CHECK-PARSE-NEXT: | | | | | | | | LoopControl -> LoopBounds +!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Name = 'c' +!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Expr = '1_4' +!CHECK-PARSE-NEXT: | | | | | | | | | | LiteralConstant -> IntLiteralConstant = '1' +!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Expr = '10_4' +!CHECK-PARSE-NEXT: | | | | | | | | | | LiteralConstant -> IntLiteralConstant = '10' +!CHECK-PARSE-NEXT: | | | | | | | Block +!CHECK-PARSE-NEXT: | | | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt = 'a(int(b,kind=8),2_8)=a(int(c,kind=8),1_8)' +!CHECK-PARSE-NEXT: | | | | | | | | | Variable = 'a(int(b,kind=8),2_8)' +!CHECK-PARSE-NEXT: | | | | | | | | | | Designator -> DataRef -> ArrayElement +!CHECK-PARSE-NEXT: | | | | | | | | | | | DataRef -> Name = 'a' +!CHECK-PARSE-NEXT: | | | | | | | | | | | SectionSubscript -> Integer -> Expr = 'b' +!CHECK-PARSE-NEXT: | | | | | | | | | | | | Designator -> DataRef -> Name = 'b' +!CHECK-PARSE-NEXT: | | | | | | | | | | | SectionSubscript -> Integer -> Expr = '2_4' +!CHECK-PARSE-NEXT: | | | | | | | | | | | | LiteralConstant -> IntLiteralConstant = '2' +!CHECK-PARSE-NEXT: | | | | | | | | | Expr = 'a(int(c,kind=8),1_8)' +!CHECK-PARSE-NEXT: | | | | | | | | | | Designator -> DataRef -> ArrayElement +!CHECK-PARSE-NEXT: | | | | | | | | | | | DataRef -> Name = 'a' +!CHECK-PARSE-NEXT: | | | | | | | | | | | SectionSubscript -> Integer -> Expr = 'c' +!CHECK-PARSE-NEXT: | | | | | | | | | | | | Designator -> DataRef -> Name = 'c' +!CHECK-PARSE-NEXT: | | | | | | | | | | | SectionSubscript -> Integer -> Expr = '1_4' +!CHECK-PARSE-NEXT: | | | | | | | | | | | | LiteralConstant -> IntLiteralConstant = '1' +!CHECK-PARSE-NEXT: | | | | | | | EndDoStmt -> +!CHECK-PARSE-NEXT: | | | | | EndDoStmt -> !CHECK-PARSE-NEXT: | EndSubroutineStmt -> !CHECK-UNPARSE: SUBROUTINE loop_transformation_construct7 diff --git a/flang/test/Parser/OpenMP/name-with-space.f b/flang/test/Parser/OpenMP/name-with-space.f new file mode 100644 index 0000000000000..603ebc40c9f4c --- /dev/null +++ b/flang/test/Parser/OpenMP/name-with-space.f @@ -0,0 +1,15 @@ +! RUN: %flang_fc1 -fopenmp -fdebug-unparse-no-sema %s 2>&1 | FileCheck %s + + program name_with_space +!CHECK: !$OMP THREADPRIVATE(/cc/, var1) +!$omp threadprivate(/c c/, var 1) + +!CHECK: !$OMP PARALLEL PRIVATE(somevar,expr1,expr2) IF(expr2>expr1) +!$omp parallel private(some var, expr 1, ex pr2) +!$omp+ if (exp r2 > ex pr1) +!$omp critical (x_x) + print '(a)', 'Hello World' +!CHECK: !$OMP END CRITICAL(x_x) +!$omp end critical (x _x) +!$omp end parallel + end program name_with_space diff --git a/flang/test/Parser/OpenMP/transparent-clause.f90 b/flang/test/Parser/OpenMP/transparent-clause.f90 index 8f669546f2dea..3512326b321e6 100644 --- a/flang/test/Parser/OpenMP/transparent-clause.f90 +++ b/flang/test/Parser/OpenMP/transparent-clause.f90 @@ -74,4 +74,5 @@ subroutine f02 !PARSE-TREE: | | OmpClauseList -> OmpClause -> Transparent -> OmpTransparentClause -> Scalar -> Integer -> Expr = '2_4' !PARSE-TREE: | | | LiteralConstant -> IntLiteralConstant = '2' !PARSE-TREE: | | Flags = None -!PARSE-TREE: | DoConstruct +!PARSE-TREE: | Block +!PARSE-TREE: | | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct diff --git a/flang/test/Parser/OpenMP/unroll-heuristic.f90 b/flang/test/Parser/OpenMP/unroll-heuristic.f90 index bbc2df3b57df6..c181a06b457f3 100644 --- a/flang/test/Parser/OpenMP/unroll-heuristic.f90 +++ b/flang/test/Parser/OpenMP/unroll-heuristic.f90 @@ -23,22 +23,23 @@ END subroutine openmp_parse_unroll_heuristic !PTREE-NEXT: | | OmpDirectiveName -> llvm::omp::Directive = unroll !PTREE-NEXT: | | OmpClauseList -> !PTREE-NEXT: | | Flags = None -!PTREE-NEXT: | DoConstruct -!PTREE-NEXT: | | NonLabelDoStmt -!PTREE-NEXT: | | | LoopControl -> LoopBounds -!PTREE-NEXT: | | | | Scalar -> Name = 'i' -!PTREE-NEXT: | | | | Scalar -> Expr = '1_4' -!PTREE-NEXT: | | | | | LiteralConstant -> IntLiteralConstant = '1' -!PTREE-NEXT: | | | | Scalar -> Expr = '100_4' -!PTREE-NEXT: | | | | | LiteralConstant -> IntLiteralConstant = '100' -!PTREE-NEXT: | | Block -!PTREE-NEXT: | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> CallStmt = 'CALL func(i)' -!PTREE-NEXT: | | | | | | Call -!PTREE-NEXT: | | | | | ProcedureDesignator -> Name = 'func' -!PTREE-NEXT: | | | | | ActualArgSpec -!PTREE-NEXT: | | | | | | ActualArg -> Expr = 'i' -!PTREE-NEXT: | | | | | | | Designator -> DataRef -> Name = 'i' -!PTREE-NEXT: | | EndDoStmt -> +!PTREE-NEXT: | Block +!PTREE-NEXT: | | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct +!PTREE-NEXT: | | | NonLabelDoStmt +!PTREE-NEXT: | | | | LoopControl -> LoopBounds +!PTREE-NEXT: | | | | | Scalar -> Name = 'i' +!PTREE-NEXT: | | | | | Scalar -> Expr = '1_4' +!PTREE-NEXT: | | | | | | LiteralConstant -> IntLiteralConstant = '1' +!PTREE-NEXT: | | | | | Scalar -> Expr = '100_4' +!PTREE-NEXT: | | | | | | LiteralConstant -> IntLiteralConstant = '100' +!PTREE-NEXT: | | | Block +!PTREE-NEXT: | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> CallStmt = 'CALL func(i)' +!PTREE-NEXT: | | | | | | | Call +!PTREE-NEXT: | | | | | | ProcedureDesignator -> Name = 'func' +!PTREE-NEXT: | | | | | | ActualArgSpec +!PTREE-NEXT: | | | | | | | ActualArg -> Expr = 'i' +!PTREE-NEXT: | | | | | | | | Designator -> DataRef -> Name = 'i' +!PTREE-NEXT: | | | EndDoStmt -> !PTREE-NEXT: | OmpEndLoopDirective !PTREE-NEXT: | | OmpDirectiveName -> llvm::omp::Directive = unroll !PTREE-NEXT: | | OmpClauseList -> diff --git a/flang/test/Semantics/OpenMP/simd-only.f90 b/flang/test/Semantics/OpenMP/simd-only.f90 index e137ef7d82929..4e29329e15cac 100644 --- a/flang/test/Semantics/OpenMP/simd-only.f90 +++ b/flang/test/Semantics/OpenMP/simd-only.f90 @@ -10,7 +10,7 @@ subroutine test_simd() ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct ! CHECK: OmpDirectiveName -> llvm::omp::Directive = simd - ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct + ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct !$omp simd do i = 1, 100 end do @@ -22,7 +22,7 @@ subroutine test_do_simd() ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct ! CHECK: OmpDirectiveName -> llvm::omp::Directive = do simd - ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct + ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct !$omp do simd do i = 1, 100 end do @@ -35,7 +35,7 @@ subroutine test_parallel_do_simd() ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct ! CHECK: OmpDirectiveName -> llvm::omp::Directive = parallel do simd - ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct + ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct !$omp parallel do simd do i = 1, 100 end do @@ -65,7 +65,7 @@ subroutine test_simd_atomic() ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct ! CHECK: OmpDirectiveName -> llvm::omp::Directive = simd - ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct + ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct !$omp simd do i = 1, 100 ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAtomicConstruct diff --git a/libcxx/include/string b/libcxx/include/string index 09fc6228c4fdb..c4806069d0b44 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -2273,7 +2273,9 @@ private: // Allocate a buffer of __capacity size with __alloc and return it _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX20 __long __allocate_long_buffer(_Allocator& __alloc, size_type __capacity) { - auto __buffer = std::__allocate_at_least(__alloc, __recommend(__capacity) + 1); + _LIBCPP_ASSERT_INTERNAL(!__fits_in_sso(__capacity), + "Trying to allocate long buffer for a capacity what would fit into the small buffer"); + auto __buffer = std::__allocate_at_least(__alloc, __align_allocation_size(__capacity)); if (__libcpp_is_constant_evaluated()) { for (size_type __i = 0; __i != __buffer.count; ++__i) @@ -2365,16 +2367,20 @@ private: return (__s + (__a - 1)) & ~(__a - 1); } enum { __alignment = 8 }; - static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type __recommend(size_type __s) _NOEXCEPT { - if (__s < __min_cap) { - return static_cast(__min_cap) - 1; - } + + // This makes sure that we're using a capacity with some extra alignment, since allocators almost always over-align + // the allocations anyways, improving memory usage. More importantly, this ensures that the lowest bit is never set + // if __endian_factor == 2, allowing us to store whether we're in the long string inside the lowest bit. + _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type + __align_allocation_size(size_type __size) _NOEXCEPT { + _LIBCPP_ASSERT_INTERNAL( + !__fits_in_sso(__size), "Trying to align allocation of a size which would fit into the SSO"); const size_type __boundary = sizeof(value_type) < __alignment ? __alignment / sizeof(value_type) : __endian_factor; - size_type __guess = __align_it<__boundary>(__s + 1) - 1; - if (__guess == __min_cap) + size_type __guess = __align_it<__boundary>(__size + 1); + if (__guess == __min_cap + 1) __guess += __endian_factor; - _LIBCPP_ASSERT_INTERNAL(__guess >= __s, "recommendation is below the requested size"); + _LIBCPP_ASSERT_INTERNAL(__guess >= __size, "aligned allocation size is below the requested size"); return __guess; } @@ -2712,8 +2718,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void basic_string<_CharT, _Traits, _Allocator>::__ if (__delta_cap > __ms - __old_cap) __throw_length_error(); pointer __old_p = __get_pointer(); - size_type __cap = - __old_cap < __ms / 2 - __alignment ? __recommend(std::max(__old_cap + __delta_cap, 2 * __old_cap)) : __ms; + size_type __cap = __old_cap < __ms / 2 - __alignment ? std::max(__old_cap + __delta_cap, 2 * __old_cap) : __ms; __annotate_delete(); auto __guard = std::__make_scope_guard(__annotate_new_size(*this)); __long __buffer = __allocate_long_buffer(__alloc_, __cap); @@ -2750,8 +2755,7 @@ _LIBCPP_DEPRECATED_("use __grow_by_without_replace") basic_string<_CharT, _Trait if (__delta_cap > __ms - __old_cap) this->__throw_length_error(); pointer __old_p = __get_pointer(); - size_type __cap = - __old_cap < __ms / 2 - __alignment ? __recommend(std::max(__old_cap + __delta_cap, 2 * __old_cap)) : __ms; + size_type __cap = __old_cap < __ms / 2 - __alignment ? std::max(__old_cap + __delta_cap, 2 * __old_cap) : __ms; __long __buffer = __allocate_long_buffer(__alloc_, __cap); if (__n_copy != 0) traits_type::copy(std::__to_address(__buffer.__data_), std::__to_address(__old_p), __n_copy); @@ -3417,18 +3421,15 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void basic_string<_CharT, _Traits, _Allocator>::re template inline _LIBCPP_CONSTEXPR_SINCE_CXX20 void basic_string<_CharT, _Traits, _Allocator>::shrink_to_fit() _NOEXCEPT { - size_type __target_capacity = __recommend(size()); - if (__target_capacity == capacity()) + if (!__is_long()) return; - _LIBCPP_ASSERT_INTERNAL(__is_long(), "Trying to shrink small string"); - - // We're a long string and we're shrinking into the small buffer. const auto __ptr = __get_long_pointer(); const auto __size = __get_long_size(); const auto __cap = __get_long_cap(); - if (__fits_in_sso(__target_capacity)) { + // We're a long string and we're shrinking into the small buffer. + if (__fits_in_sso(__size)) { __annotation_guard __g(*this); __set_short_size(__size); traits_type::copy(std::__to_address(__get_short_pointer()), std::__to_address(__ptr), __size + 1); @@ -3436,6 +3437,9 @@ inline _LIBCPP_CONSTEXPR_SINCE_CXX20 void basic_string<_CharT, _Traits, _Allocat return; } + if (__align_allocation_size(__size) == __cap) + return; + # if _LIBCPP_HAS_EXCEPTIONS try { # endif // _LIBCPP_HAS_EXCEPTIONS diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h index aa5d8a8729647..a706f68fab81b 100644 --- a/llvm/include/llvm/ADT/DenseMap.h +++ b/llvm/include/llvm/ADT/DenseMap.h @@ -162,7 +162,7 @@ class DenseMapBase : public DebugEpochBase { return; } derived().deallocateBuckets(); - derived().init(NewNumBuckets); + initWithExactBucketCount(NewNumBuckets); } /// Return true if the specified key is in the map, false otherwise. @@ -558,7 +558,12 @@ class DenseMapBase : public DebugEpochBase { void grow(unsigned MinNumBuckets) { unsigned NumBuckets = DerivedT::roundUpNumBuckets(MinNumBuckets); - derived().grow(NumBuckets); + DerivedT Tmp(NumBuckets, ExactBucketCount{}); + Tmp.moveFrom(derived()); + if (derived().maybeMoveFast(std::move(Tmp))) + return; + initWithExactBucketCount(NumBuckets); + moveFrom(Tmp); } template @@ -750,9 +755,9 @@ class DenseMap : public DenseMapBase, public: /// Create a DenseMap with an optional \p NumElementsToReserve to guarantee /// that this number of elements can be inserted in the map without grow(). - explicit DenseMap(unsigned NumElementsToReserve = 0) { - init(NumElementsToReserve); - } + explicit DenseMap(unsigned NumElementsToReserve = 0) + : DenseMap(BaseT::getMinBucketToReserveForEntries(NumElementsToReserve), + typename BaseT::ExactBucketCount{}) {} DenseMap(const DenseMap &other) : DenseMap() { this->copyFrom(other); } @@ -784,7 +789,7 @@ class DenseMap : public DenseMapBase, DenseMap &operator=(DenseMap &&other) { this->destroyAll(); deallocateBuckets(); - init(0); + this->initWithExactBucketCount(0); this->swap(other); return *this; } @@ -825,11 +830,6 @@ class DenseMap : public DenseMapBase, return true; } - void init(unsigned InitNumEntries) { - auto InitBuckets = BaseT::getMinBucketToReserveForEntries(InitNumEntries); - this->initWithExactBucketCount(InitBuckets); - } - // Put the zombie instance in a known good state after a move. void kill() { deallocateBuckets(); @@ -842,10 +842,9 @@ class DenseMap : public DenseMapBase, static_cast(NextPowerOf2(MinNumBuckets - 1))); } - void grow(unsigned AtLeast) { - DenseMap Tmp(AtLeast, typename BaseT::ExactBucketCount{}); - Tmp.moveFrom(*this); - swapImpl(Tmp); + bool maybeMoveFast(DenseMap &&Other) { + swapImpl(Other); + return true; } // Plan how to shrink the bucket table. Return: @@ -898,9 +897,10 @@ class SmallDenseMap } public: - explicit SmallDenseMap(unsigned NumElementsToReserve = 0) { - init(NumElementsToReserve); - } + explicit SmallDenseMap(unsigned NumElementsToReserve = 0) + : SmallDenseMap( + BaseT::getMinBucketToReserveForEntries(NumElementsToReserve), + typename BaseT::ExactBucketCount{}) {} SmallDenseMap(const SmallDenseMap &other) : SmallDenseMap() { this->copyFrom(other); @@ -935,7 +935,7 @@ class SmallDenseMap SmallDenseMap &operator=(SmallDenseMap &&other) { this->destroyAll(); deallocateBuckets(); - init(0); + this->initWithExactBucketCount(0); this->swap(other); return *this; } @@ -1091,11 +1091,6 @@ class SmallDenseMap return true; } - void init(unsigned InitNumEntries) { - auto InitBuckets = BaseT::getMinBucketToReserveForEntries(InitNumEntries); - this->initWithExactBucketCount(InitBuckets); - } - // Put the zombie instance in a known good state after a move. void kill() { deallocateBuckets(); @@ -1110,23 +1105,16 @@ class SmallDenseMap static_cast(NextPowerOf2(MinNumBuckets - 1))); } - void grow(unsigned NumBuckets) { - SmallDenseMap Tmp(NumBuckets, typename BaseT::ExactBucketCount{}); - Tmp.moveFrom(*this); + bool maybeMoveFast(SmallDenseMap &&Other) { + if (Other.Small) + return false; - if (Tmp.Small) { - // Use moveFrom in those rare cases where we stay in the small mode. This - // can happen when we have many tombstones. - Small = true; - this->BaseT::initEmpty(); - this->moveFrom(Tmp); - } else { - Small = false; - NumEntries = Tmp.NumEntries; - NumTombstones = Tmp.NumTombstones; - *getLargeRep() = std::move(*Tmp.getLargeRep()); - Tmp.getLargeRep()->NumBuckets = 0; - } + Small = false; + NumEntries = Other.NumEntries; + NumTombstones = Other.NumTombstones; + *getLargeRep() = std::move(*Other.getLargeRep()); + Other.getLargeRep()->NumBuckets = 0; + return true; } // Plan how to shrink the bucket table. Return: diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 96cb7cdf2d531..9de1a643f1000 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -647,6 +647,12 @@ class CombinerHelper { bool matchRotateOutOfRange(MachineInstr &MI) const; void applyRotateOutOfRange(MachineInstr &MI) const; + bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, + Register &UnmergeSrc) const; + void applyCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, + Register &UnmergeSrc) const; + bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const; void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const; diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h index 4e08e78545701..ded870c925552 100644 --- a/llvm/include/llvm/LTO/Config.h +++ b/llvm/include/llvm/LTO/Config.h @@ -94,6 +94,11 @@ struct Config { /// need to create copies, so it can set this field to false. bool KeepSymbolNameCopies = true; + /// This flag is used as one of parameters to calculate cache entries and to + /// ensure that in-process cache and out-of-process (DTLTO) cache are + /// distinguished. + mutable bool Dtlto = 0; + /// Allows non-imported definitions to get the potentially more constraining /// visibility from the prevailing definition. FromPrevailing is the default /// because it works for many binary formats. ELF can use the more optimized diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 119695e53c3cb..0ab2d9487a295 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -921,6 +921,15 @@ def merge_of_x_and_zero : GICombineRule < [{ return Helper.matchMergeXAndZero(*${MI}, ${matchinfo}); }]), (apply [{ Helper.applyBuildFn(*${MI}, ${matchinfo}); }])>; +// Transform build_vector(unmerge(src, 0), ... unmerge(src, n), undef, ..., undef) +// => concat_vectors(src, undef) +def combine_build_unmerge : GICombineRule< + (defs root:$root, register_matchinfo:$unmergeSrc), + (match (G_BUILD_VECTOR $dst, GIVariadic<>:$unused):$root, + [{ return Helper.matchCombineBuildUnmerge(*${root}, MRI, ${unmergeSrc}); }]), + (apply [{ Helper.applyCombineBuildUnmerge(*${root}, MRI, B, ${unmergeSrc}); }]) +>; + def merge_combines: GICombineGroup<[ unmerge_anyext_build_vector, unmerge_merge, @@ -930,7 +939,8 @@ def merge_combines: GICombineGroup<[ unmerge_dead_to_trunc, unmerge_zext_to_zext, merge_of_x_and_undef, - merge_of_x_and_zero + merge_of_x_and_zero, + combine_build_unmerge ]>; // Under certain conditions, transform: diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index ec4d13f1cd1b3..45a08347b1ec2 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -3463,6 +3463,88 @@ static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, isConstTrueVal(TLI, Cst, IsVector, IsFP); } +// This pattern aims to match the following shape to avoid extra mov +// instructions +// G_BUILD_VECTOR( +// G_UNMERGE_VALUES(src, 0) +// G_UNMERGE_VALUES(src, 1) +// G_IMPLICIT_DEF +// G_IMPLICIT_DEF +// ) +// -> +// G_CONCAT_VECTORS( +// src, +// undef +// ) +bool CombinerHelper::matchCombineBuildUnmerge(MachineInstr &MI, + MachineRegisterInfo &MRI, + Register &UnmergeSrc) const { + auto &BV = cast(MI); + + unsigned BuildUseCount = BV.getNumSources(); + if (BuildUseCount % 2 != 0) + return false; + + unsigned NumUnmerge = BuildUseCount / 2; + + auto *Unmerge = getOpcodeDef(BV.getSourceReg(0), MRI); + + // Check the first operand is an unmerge and has the correct number of + // operands + if (!Unmerge || Unmerge->getNumDefs() != NumUnmerge) + return false; + + UnmergeSrc = Unmerge->getSourceReg(); + + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + LLT UnmergeSrcTy = MRI.getType(UnmergeSrc); + + // Ensure we only generate legal instructions post-legalizer + if (!IsPreLegalize && + !isLegal({TargetOpcode::G_CONCAT_VECTORS, {DstTy, UnmergeSrcTy}})) + return false; + + // Check that all of the operands before the midpoint come from the same + // unmerge and are in the same order as they are used in the build_vector + for (unsigned I = 0; I < NumUnmerge; ++I) { + auto MaybeUnmergeReg = BV.getSourceReg(I); + auto *LoopUnmerge = getOpcodeDef(MaybeUnmergeReg, MRI); + + if (!LoopUnmerge || LoopUnmerge != Unmerge) + return false; + + if (LoopUnmerge->getOperand(I).getReg() != MaybeUnmergeReg) + return false; + } + + // Check that all of the unmerged values are used + if (Unmerge->getNumDefs() != NumUnmerge) + return false; + + // Check that all of the operands after the mid point are undefs. + for (unsigned I = NumUnmerge; I < BuildUseCount; ++I) { + auto *Undef = getDefIgnoringCopies(BV.getSourceReg(I), MRI); + + if (Undef->getOpcode() != TargetOpcode::G_IMPLICIT_DEF) + return false; + } + + return true; +} + +void CombinerHelper::applyCombineBuildUnmerge(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B, + Register &UnmergeSrc) const { + assert(UnmergeSrc && "Expected there to be one matching G_UNMERGE_VALUES"); + B.setInstrAndDebugLoc(MI); + + Register UndefVec = B.buildUndef(MRI.getType(UnmergeSrc)).getReg(0); + B.buildConcatVectors(MI.getOperand(0), {UnmergeSrc, UndefVec}); + + MI.eraseFromParent(); +} + // This combine tries to reduce the number of scalarised G_TRUNC instructions by // using vector truncates instead // @@ -8426,4 +8508,4 @@ bool CombinerHelper::matchSuboCarryOut(const MachineInstr &MI, } return false; -} +} \ No newline at end of file diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index fefc733fa7697..a02af59600c44 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -169,6 +169,7 @@ std::string llvm::computeLTOCacheKey( AddString(Conf.OverrideTriple); AddString(Conf.DefaultTriple); AddString(Conf.DwoDir); + AddUint8(Conf.Dtlto); // Include the hash for the current module auto ModHash = Index.getModuleHash(ModuleID); @@ -2226,7 +2227,8 @@ class OutOfProcessThinBackend : public CGThinBackend { SmallVector CodegenOptions; DenseSet CommonInputs; - + // Number of the object files that have been already cached. + std::atomic CachedJobs{0}; // Information specific to individual backend compilation job. struct Job { unsigned Task; @@ -2234,6 +2236,9 @@ class OutOfProcessThinBackend : public CGThinBackend { StringRef NativeObjectPath; StringRef SummaryIndexPath; ImportsFilesContainer ImportsFiles; + std::string CacheKey; + AddStreamFn CacheAddStream; + bool Cached = false; }; // The set of backend compilations jobs. SmallVector Jobs; @@ -2247,12 +2252,15 @@ class OutOfProcessThinBackend : public CGThinBackend { // The target triple to supply for backend compilations. llvm::Triple Triple; + // Cache + FileCache Cache; + public: OutOfProcessThinBackend( const Config &Conf, ModuleSummaryIndex &CombinedIndex, ThreadPoolStrategy ThinLTOParallelism, const DenseMap &ModuleToDefinedGVSummaries, - AddStreamFn AddStream, lto::IndexWriteCallback OnWrite, + AddStreamFn AddStream, FileCache CacheFn, lto::IndexWriteCallback OnWrite, bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles, StringRef LinkerOutputFile, StringRef Distributor, ArrayRef DistributorArgs, StringRef RemoteCompiler, @@ -2264,7 +2272,8 @@ class OutOfProcessThinBackend : public CGThinBackend { LinkerOutputFile(LinkerOutputFile), DistributorPath(Distributor), DistributorArgs(DistributorArgs), RemoteCompiler(RemoteCompiler), RemoteCompilerPrependArgs(RemoteCompilerPrependArgs), - RemoteCompilerArgs(RemoteCompilerArgs), SaveTemps(SaveTemps) {} + RemoteCompilerArgs(RemoteCompilerArgs), SaveTemps(SaveTemps), + Cache(std::move(CacheFn)) {} void setup(unsigned ThinLTONumTasks, unsigned ThinLTOTaskOffset, llvm::Triple Triple) override { @@ -2272,6 +2281,54 @@ class OutOfProcessThinBackend : public CGThinBackend { Jobs.resize((size_t)ThinLTONumTasks); this->ThinLTOTaskOffset = ThinLTOTaskOffset; this->Triple = Triple; + this->Conf.Dtlto = 1; + } + + virtual Error runThinLTOBackendThread( + Job &J, const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map + &ResolvedODR) { + + llvm::TimeTraceScope timeScope( + "Run ThinLTO backend thread (out-of-process)", J.ModuleID); + + if (auto E = emitFiles(ImportList, J.ModuleID, J.ModuleID.str(), + J.SummaryIndexPath, J.ImportsFiles)) + return E; + + if (!Cache.isValid() || !CombinedIndex.modulePaths().count(J.ModuleID) || + all_of(CombinedIndex.getModuleHash(J.ModuleID), + [](uint32_t V) { return V == 0; })) + // Cache disabled or no entry for this module in the combined index or + // no module hash. + return Error::success(); + + const GVSummaryMapTy &DefinedGlobals = + ModuleToDefinedGVSummaries.find(J.ModuleID)->second; + + // The module may be cached, this helps handling it. + J.CacheKey = computeLTOCacheKey(Conf, CombinedIndex, J.ModuleID, ImportList, + ExportList, ResolvedODR, DefinedGlobals, + CfiFunctionDefs, CfiFunctionDecls); + + // The module may be cached, this helps handling it. + auto CacheAddStreamExp = Cache(J.Task, J.CacheKey, J.ModuleID); + if (Error Err = CacheAddStreamExp.takeError()) + return Err; + AddStreamFn &CacheAddStream = *CacheAddStreamExp; + // If CacheAddStream is null, we have a cache hit and at this point + // object file is already passed back to the linker. + if (!CacheAddStream) { + J.Cached = true; // Cache hit, mark the job as cached. + CachedJobs.fetch_add(1); + } else { + // If CacheAddStream is not null, we have a cache miss and we need to + // run the backend for codegen. Save cache 'add stream' + // function for a later use. + J.CacheAddStream = std::move(CacheAddStream); + } + return Error::success(); } Error start( @@ -2288,22 +2345,27 @@ class OutOfProcessThinBackend : public CGThinBackend { itostr(Task) + "." + UID + ".native.o"); Job &J = Jobs[Task - ThinLTOTaskOffset]; - J = { - Task, - ModulePath, - Saver.save(ObjFilePath.str()), - Saver.save(ObjFilePath.str() + ".thinlto.bc"), - {} // Filled in by emitFiles below. - }; + J = {Task, + ModulePath, + Saver.save(ObjFilePath.str()), + Saver.save(ObjFilePath.str() + ".thinlto.bc"), + {}, // Filled in by emitFiles below. + "", /*CacheKey=*/ + nullptr, + false}; assert(ModuleToDefinedGVSummaries.count(ModulePath)); // The BackendThreadPool is only used here to write the sharded index files // (similar to WriteIndexesThinBackend). BackendThreadPool.async( - [=](Job &J, const FunctionImporter::ImportMapTy &ImportList) { - if (auto E = emitFiles(ImportList, J.ModuleID, J.ModuleID.str(), - J.SummaryIndexPath, J.ImportsFiles)) { + [=](Job &J, const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map + &ResolvedODR) { + Error E = + runThinLTOBackendThread(J, ImportList, ExportList, ResolvedODR); + if (E) { std::unique_lock L(ErrMu); if (Err) Err = joinErrors(std::move(*Err), std::move(E)); @@ -2311,7 +2373,8 @@ class OutOfProcessThinBackend : public CGThinBackend { Err = std::move(E); } }, - std::ref(J), std::ref(ImportList)); + std::ref(J), std::ref(ImportList), std::ref(ExportList), + std::ref(ResolvedODR)); return Error::success(); } @@ -2405,6 +2468,10 @@ class OutOfProcessThinBackend : public CGThinBackend { JOS.attributeArray("jobs", [&]() { for (const auto &J : Jobs) { assert(J.Task != 0); + if (J.Cached) { + assert(!Cache.getCacheDirectoryPath().empty()); + continue; + } SmallVector Inputs; SmallVector Outputs; @@ -2477,20 +2544,28 @@ class OutOfProcessThinBackend : public CGThinBackend { removeFile(JsonFile); }); - SmallVector Args = {DistributorPath}; - llvm::append_range(Args, DistributorArgs); - Args.push_back(JsonFile); - std::string ErrMsg; - if (sys::ExecuteAndWait(Args[0], Args, - /*Env=*/std::nullopt, /*Redirects=*/{}, - /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg)) { - return make_error( - BCError + "distributor execution failed" + - (!ErrMsg.empty() ? ": " + ErrMsg + Twine(".") : Twine(".")), - inconvertibleErrorCode()); + // Checks if we have any jobs that don't have corresponding cache entries. + if (CachedJobs.load() < Jobs.size()) { + SmallVector Args = {DistributorPath}; + llvm::append_range(Args, DistributorArgs); + Args.push_back(JsonFile); + std::string ErrMsg; + if (sys::ExecuteAndWait(Args[0], Args, + /*Env=*/std::nullopt, /*Redirects=*/{}, + /*SecondsToWait=*/0, /*MemoryLimit=*/0, + &ErrMsg)) { + return make_error( + BCError + "distributor execution failed" + + (!ErrMsg.empty() ? ": " + ErrMsg + Twine(".") : Twine(".")), + inconvertibleErrorCode()); + } } for (auto &Job : Jobs) { + if (!Job.CacheKey.empty() && Job.Cached) { + assert(Cache.isValid()); + continue; + } // Load the native object from a file into a memory buffer // and store its contents in the output buffer. auto ObjFileMbOrErr = @@ -2501,15 +2576,35 @@ class OutOfProcessThinBackend : public CGThinBackend { BCError + "cannot open native object file: " + Job.NativeObjectPath + ": " + EC.message(), inconvertibleErrorCode()); - auto StreamOrErr = AddStream(Job.Task, Job.ModuleID); - if (Error Err = StreamOrErr.takeError()) - report_fatal_error(std::move(Err)); - auto &Stream = *StreamOrErr->get(); - *Stream.OS << ObjFileMbOrErr->get()->getMemBufferRef().getBuffer(); - if (Error Err = Stream.commit()) - report_fatal_error(std::move(Err)); - } + MemoryBufferRef ObjFileMbRef = ObjFileMbOrErr->get()->getMemBufferRef(); + if (Cache.isValid()) { + // Cache hits are taken care of earlier. At this point, we could only + // have cache misses. + assert(Job.CacheAddStream); + // Obtain a file stream for a storing a cache entry. + auto CachedFileStreamOrErr = Job.CacheAddStream(Job.Task, Job.ModuleID); + if (!CachedFileStreamOrErr) + return joinErrors( + CachedFileStreamOrErr.takeError(), + createStringError(inconvertibleErrorCode(), + "Cannot get a cache file stream: %s", + Job.NativeObjectPath.data())); + // Store a file buffer into the cache stream. + auto &CacheStream = *(CachedFileStreamOrErr->get()); + *(CacheStream.OS) << ObjFileMbRef.getBuffer(); + if (Error Err = CacheStream.commit()) + return Err; + } else { + auto StreamOrErr = AddStream(Job.Task, Job.ModuleID); + if (Error Err = StreamOrErr.takeError()) + report_fatal_error(std::move(Err)); + auto &Stream = *StreamOrErr->get(); + *Stream.OS << ObjFileMbRef.getBuffer(); + if (Error Err = Stream.commit()) + report_fatal_error(std::move(Err)); + } + } return Error::success(); } }; @@ -2525,12 +2620,13 @@ ThinBackend lto::createOutOfProcessThinBackend( auto Func = [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex, const DenseMap &ModuleToDefinedGVSummaries, - AddStreamFn AddStream, FileCache /*Cache*/) { + AddStreamFn AddStream, FileCache Cache) { return std::make_unique( Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries, - AddStream, OnWrite, ShouldEmitIndexFiles, ShouldEmitImportsFiles, - LinkerOutputFile, Distributor, DistributorArgs, RemoteCompiler, - RemoteCompilerPrependArgs, RemoteCompilerArgs, SaveTemps); + AddStream, Cache, OnWrite, ShouldEmitIndexFiles, + ShouldEmitImportsFiles, LinkerOutputFile, Distributor, + DistributorArgs, RemoteCompiler, RemoteCompilerPrependArgs, + RemoteCompilerArgs, SaveTemps); }; return ThinBackend(Func, Parallelism); } diff --git a/llvm/lib/Option/OptTable.cpp b/llvm/lib/Option/OptTable.cpp index 20398b5f582f4..065036cedc2ae 100644 --- a/llvm/lib/Option/OptTable.cpp +++ b/llvm/lib/Option/OptTable.cpp @@ -796,8 +796,7 @@ void OptTable::internalPrintHelp( unsigned ActiveSubCommandID = ActiveSubCommand - &SubCommands[0]; // Print if the ActiveSubCommandID is registered with the CandidateInfo // Option. - return std::find(SubCommandIDs.begin(), SubCommandIDs.end(), - ActiveSubCommandID) != SubCommandIDs.end(); + return llvm::is_contained(SubCommandIDs, ActiveSubCommandID); }; for (unsigned Id = 1, e = getNumOptions() + 1; Id != e; ++Id) { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index ff71be9a6bcb4..f313d3f1347d4 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -88,9 +88,10 @@ static cl::opt cl::init(true)); // TODO: Support more ops -static const unsigned ZvfbfaVPOps[] = {ISD::VP_FNEG, ISD::VP_FABS, - ISD::VP_FCOPYSIGN}; -static const unsigned ZvfbfaOps[] = {ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN}; +static const unsigned ZvfbfaVPOps[] = { + ISD::VP_FNEG, ISD::VP_FABS, ISD::VP_FCOPYSIGN, ISD::EXPERIMENTAL_VP_SPLAT}; +static const unsigned ZvfbfaOps[] = {ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, + ISD::SPLAT_VECTOR}; RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI) @@ -1272,17 +1273,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, VT, Custom); setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); + setOperationAction(ISD::EXPERIMENTAL_VP_SPLAT, VT, Custom); setOperationAction(ISD::FCOPYSIGN, VT, Legal); + setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); setOperationAction(ZvfbfaVPOps, VT, Custom); - MVT EltVT = VT.getVectorElementType(); - if (isTypeLegal(EltVT)) - setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT}, VT, - Custom); - else - setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT}, - EltVT, Custom); setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, @@ -4870,7 +4866,7 @@ static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, if (VT.isFloatingPoint()) { if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || - EltVT == MVT::bf16) { + (EltVT == MVT::bf16 && !Subtarget.hasVInstructionsBF16())) { if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) || (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar); diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 9e291a6ae431f..27ec052cfda40 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1334,8 +1334,18 @@ def ProcessorFeatures { !listremove(ARLSFeatures, [FeatureWIDEKL]); // Novalake + list NVLAdditionalFeatures = [FeatureAVX10_2, + FeatureMOVRS, + FeatureEGPR, + FeaturePush2Pop2, + FeaturePPX, + FeatureNF, + FeatureNDD, + FeatureZU, + FeatureCCMP, + FeaturePREFETCHI]; list NVLFeatures = - !listconcat(PTLFeatures, [FeaturePREFETCHI]); + !listconcat(PTLFeatures, NVLAdditionalFeatures); // Clearwaterforest list CWFAdditionalFeatures = [FeaturePREFETCHI, diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 630bcb63be6ed..d8620b9865508 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -512,11 +512,11 @@ StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) { // Look for the CPU features. SmallVector CPUFeatures; - for (unsigned I = 0, E = Lines.size(); I != E; ++I) - if (Lines[I].starts_with("features")) { - size_t Pos = Lines[I].find(':'); + for (StringRef Line : Lines) + if (Line.starts_with("features")) { + size_t Pos = Line.find(':'); if (Pos != StringRef::npos) { - Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' '); + Line.drop_front(Pos + 1).split(CPUFeatures, ' '); break; } } @@ -524,20 +524,16 @@ StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) { // We need to check for the presence of vector support independently of // the machine type, since we may only use the vector register set when // supported by the kernel (and hypervisor). - bool HaveVectorSupport = false; - for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { - if (CPUFeatures[I] == "vx") - HaveVectorSupport = true; - } + bool HaveVectorSupport = llvm::is_contained(CPUFeatures, "vx"); // Now check the processor machine type. - for (unsigned I = 0, E = Lines.size(); I != E; ++I) { - if (Lines[I].starts_with("processor ")) { - size_t Pos = Lines[I].find("machine = "); + for (StringRef Line : Lines) { + if (Line.starts_with("processor ")) { + size_t Pos = Line.find("machine = "); if (Pos != StringRef::npos) { Pos += sizeof("machine = ") - 1; unsigned int Id; - if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) + if (!Line.drop_front(Pos).getAsInteger(10, Id)) return getCPUNameFromS390Model(Id, HaveVectorSupport); } break; @@ -554,9 +550,9 @@ StringRef sys::detail::getHostCPUNameForRISCV(StringRef ProcCpuinfoContent) { // Look for uarch line to determine cpu name StringRef UArch; - for (unsigned I = 0, E = Lines.size(); I != E; ++I) { - if (Lines[I].starts_with("uarch")) { - UArch = Lines[I].substr(5).ltrim("\t :"); + for (StringRef Line : Lines) { + if (Line.starts_with("uarch")) { + UArch = Line.substr(5).ltrim("\t :"); break; } } diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index 293cc42ab81c1..02c33b0af2e2f 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -176,7 +176,9 @@ constexpr FeatureBitset FeaturesArrowlakeS = constexpr FeatureBitset FeaturesPantherlake = (FeaturesArrowlakeS ^ FeatureWIDEKL); constexpr FeatureBitset FeaturesNovalake = - FeaturesPantherlake | FeaturePREFETCHI; + FeaturesPantherlake | FeaturePREFETCHI | FeatureAVX10_2 | FeatureMOVRS | + FeatureEGPR | FeatureZU | FeatureCCMP | FeaturePush2Pop2 | FeaturePPX | + FeatureNDD | FeatureNF; constexpr FeatureBitset FeaturesClearwaterforest = (FeaturesSierraforest ^ FeatureWIDEKL) | FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 | FeatureSM4 | FeaturePREFETCHI | FeatureUSERMSR; diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp index eab776d7f25a3..d14b5ec2d457b 100644 --- a/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -2111,8 +2111,7 @@ static void createAndInsertBasicBlocks(DenseMap &OldMap, for (Value *RetVal : SortedKeys) { BasicBlock *NewBB = BasicBlock::Create( - ParentFunc->getContext(), - Twine(BaseName) + Twine("_") + Twine(static_cast(Idx++)), + ParentFunc->getContext(), Twine(BaseName) + Twine("_") + Twine(Idx++), ParentFunc); NewMap.insert(std::make_pair(RetVal, NewBB)); } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 04b05627fa769..5dc3175382254 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -178,11 +178,10 @@ class VPBuilder { new VPInstructionWithType(Opcode, Operands, ResultTy, Flags, DL, Name)); } - VPInstruction *createOverflowingOp(unsigned Opcode, - ArrayRef Operands, - VPRecipeWithIRFlags::WrapFlagsTy WrapFlags, - DebugLoc DL = DebugLoc::getUnknown(), - const Twine &Name = "") { + VPInstruction *createOverflowingOp( + unsigned Opcode, ArrayRef Operands, + VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false}, + DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") { return tryInsertInstruction( new VPInstruction(Opcode, Operands, WrapFlags, {}, DL, Name)); } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index cbfbc29360b0b..10bd6cd471152 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7639,6 +7639,10 @@ createWidenInductionRecipes(VPInstruction *PhiR, assert(Plan.getLiveIn(IndDesc.getStartValue()) == Start && "Start VPValue must match IndDesc's start value"); + // It is always safe to copy over the NoWrap and FastMath flags. In + // particular, when folding tail by masking, the masked-off lanes are never + // used, so it is safe. + VPIRFlags Flags = vputils::getFlagsFromIndDesc(IndDesc); VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep()); @@ -7651,7 +7655,7 @@ createWidenInductionRecipes(VPInstruction *PhiR, PHINode *Phi = cast(PhiR->getUnderlyingInstr()); return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(), - IndDesc, PhiR->getDebugLoc()); + IndDesc, Flags, PhiR->getDebugLoc()); } VPHeaderPHIRecipe * @@ -7705,10 +7709,15 @@ VPRecipeBuilder::tryToOptimizeInductionTruncate(VPInstruction *VPI, PHINode *Phi = WidenIV->getPHINode(); VPValue *Start = WidenIV->getStartValue(); const InductionDescriptor &IndDesc = WidenIV->getInductionDescriptor(); + + // It is always safe to copy over the NoWrap and FastMath flags. In + // particular, when folding tail by masking, the masked-off lanes are never + // used, so it is safe. + VPIRFlags Flags = vputils::getFlagsFromIndDesc(IndDesc); VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep()); - return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(), - IndDesc, I, VPI->getDebugLoc()); + return new VPWidenIntOrFpInductionRecipe( + Phi, Start, Step, &Plan.getVF(), IndDesc, I, Flags, VPI->getDebugLoc()); } VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(VPInstruction *VPI, diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 13131a2b61722..0932922c07126 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2151,7 +2151,8 @@ class VPWidenInductionRecipe : public VPHeaderPHIRecipe { /// A recipe for handling phi nodes of integer and floating-point inductions, /// producing their vector values. This is an abstract recipe and must be /// converted to concrete recipes before executing. -class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { +class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe, + public VPIRFlags { TruncInst *Trunc; // If this recipe is unrolled it will have 2 additional operands. @@ -2160,19 +2161,20 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { public: VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, - DebugLoc DL) + const VPIRFlags &Flags, DebugLoc DL) : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start, Step, IndDesc, DL), - Trunc(nullptr) { + VPIRFlags(Flags), Trunc(nullptr) { addOperand(VF); } VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, - TruncInst *Trunc, DebugLoc DL) + TruncInst *Trunc, const VPIRFlags &Flags, + DebugLoc DL) : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start, Step, IndDesc, DL), - Trunc(Trunc) { + VPIRFlags(Flags), Trunc(Trunc) { addOperand(VF); SmallVector> Metadata; (void)Metadata; @@ -2186,7 +2188,7 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { VPWidenIntOrFpInductionRecipe *clone() override { return new VPWidenIntOrFpInductionRecipe( getPHINode(), getStartValue(), getStepValue(), getVFValue(), - getInductionDescriptor(), Trunc, getDebugLoc()); + getInductionDescriptor(), Trunc, *this, getDebugLoc()); } VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index cf95b4eac9d75..e2a8e495d5ed5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2387,7 +2387,9 @@ void VPWidenIntOrFpInductionRecipe::printRecipe( raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent; printAsOperand(O, SlotTracker); - O << " = WIDEN-INDUCTION "; + O << " = WIDEN-INDUCTION"; + printFlags(O); + O << " "; printOperands(O, SlotTracker); if (auto *TI = getTruncInst()) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 9bb61308cb7d9..bbeb447de45cb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -76,8 +76,13 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( VPValue *Start = Plan.getOrAddLiveIn(II->getStartValue()); VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, II->getStep()); + // It is always safe to copy over the NoWrap and FastMath flags. In + // particular, when folding tail by masking, the masked-off lanes are + // never used, so it is safe. + VPIRFlags Flags = vputils::getFlagsFromIndDesc(*II); NewRecipe = new VPWidenIntOrFpInductionRecipe( - Phi, Start, Step, &Plan.getVF(), *II, Ingredient.getDebugLoc()); + Phi, Start, Step, &Plan.getVF(), *II, Flags, + Ingredient.getDebugLoc()); } } else { assert(isa(&Ingredient) && @@ -542,6 +547,11 @@ static void removeRedundantCanonicalIVs(VPlan &Plan) { // only. if (!vputils::onlyScalarValuesUsed(WidenOriginalIV) || vputils::onlyFirstLaneUsed(WidenNewIV)) { + // We are replacing a wide canonical iv with a suitable wide induction. + // This is used to compute header mask, hence all lanes will be used and + // we need to drop wrap flags only applying to lanes guranteed to execute + // in the original scalar loop. + WidenOriginalIV->dropPoisonGeneratingFlags(); WidenNewIV->replaceAllUsesWith(WidenOriginalIV); WidenNewIV->eraseFromParent(); return; @@ -3285,16 +3295,13 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, const InductionDescriptor &ID = WidenIVR->getInductionDescriptor(); Instruction::BinaryOps AddOp; Instruction::BinaryOps MulOp; - // FIXME: The newly created binary instructions should contain nsw/nuw - // flags, which can be found from the original scalar operations. - VPIRFlags Flags; + VPIRFlags Flags = *WidenIVR; if (ID.getKind() == InductionDescriptor::IK_IntInduction) { AddOp = Instruction::Add; MulOp = Instruction::Mul; } else { AddOp = ID.getInductionOpcode(); MulOp = Instruction::FMul; - Flags = ID.getInductionBinOp()->getFastMathFlags(); } // If the phi is truncated, truncate the start and step values. @@ -3406,7 +3413,7 @@ static void expandVPWidenPointerInduction(VPWidenPointerInductionRecipe *R, Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi()); Type *StepTy = TypeInfo.inferScalarType(Step); VPValue *Offset = Builder.createNaryOp(VPInstruction::StepVector, {}, StepTy); - Offset = Builder.createNaryOp(Instruction::Mul, {Offset, Step}); + Offset = Builder.createOverflowingOp(Instruction::Mul, {Offset, Step}); VPValue *PtrAdd = Builder.createNaryOp( VPInstruction::WidePtrAdd, {ScalarPtrPhi, Offset}, DL, "vector.gep"); R->replaceAllUsesWith(PtrAdd); @@ -3416,7 +3423,7 @@ static void expandVPWidenPointerInduction(VPWidenPointerInductionRecipe *R, Builder.setInsertPoint(ExitingBB, ExitingBB->getTerminator()->getIterator()); VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.inferScalarType(VF), DL); - VPValue *Inc = Builder.createNaryOp(Instruction::Mul, {Step, VF}); + VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF}); VPValue *InductionGEP = Builder.createPtrAdd(ScalarPtrPhi, Inc, DL, "ptr.ind"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h index df1613d760a04..51bafe0846141 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h @@ -73,6 +73,19 @@ std::optional getRecipesForUncountableExit(VPlan &Plan, SmallVectorImpl &Recipes, SmallVectorImpl &GEPs); + +/// Extracts and returns NoWrap and FastMath flags from the induction binop in +/// \p ID. +inline VPIRFlags getFlagsFromIndDesc(const InductionDescriptor &ID) { + if (ID.getKind() == InductionDescriptor::IK_FpInduction) + return ID.getInductionBinOp()->getFastMathFlags(); + + if (auto *OBO = dyn_cast_if_present( + ID.getInductionBinOp())) + return VPIRFlags::WrapFlagsTy(OBO->hasNoUnsignedWrap(), + OBO->hasNoSignedWrap()); + return {}; +} } // namespace vputils //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/fptrunc.ll b/llvm/test/CodeGen/AArch64/fptrunc.ll index da19991d56259..ae86129286ddc 100644 --- a/llvm/test/CodeGen/AArch64/fptrunc.ll +++ b/llvm/test/CodeGen/AArch64/fptrunc.ll @@ -345,19 +345,11 @@ entry: } define <2 x half> @fptrunc_v2f32_v2f16(<2 x float> %a) { -; CHECK-SD-LABEL: fptrunc_v2f32_v2f16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: fcvtn v0.4h, v0.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fptrunc_v2f32_v2f16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-GI-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fptrunc_v2f32_v2f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NEXT: ret entry: %c = fptrunc <2 x float> %a to <2 x half> ret <2 x half> %c diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll index fce4f8e69f14d..e526a9f7bc0f6 100644 --- a/llvm/test/CodeGen/AArch64/itofp.ll +++ b/llvm/test/CodeGen/AArch64/itofp.ll @@ -5763,18 +5763,14 @@ define <2 x half> @stofp_v2i64_v2f16(<2 x i64> %a) { ; CHECK-NOFP16-GI: // %bb.0: // %entry ; CHECK-NOFP16-GI-NEXT: scvtf v0.2d, v0.2d ; CHECK-NOFP16-GI-NEXT: fcvtn v0.2s, v0.2d -; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: stofp_v2i64_v2f16: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: scvtf v0.2d, v0.2d ; CHECK-FP16-GI-NEXT: fcvtn v0.2s, v0.2d -; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s ; CHECK-FP16-GI-NEXT: ret entry: %c = sitofp <2 x i64> %a to <2 x half> @@ -5808,18 +5804,14 @@ define <2 x half> @utofp_v2i64_v2f16(<2 x i64> %a) { ; CHECK-NOFP16-GI: // %bb.0: // %entry ; CHECK-NOFP16-GI-NEXT: ucvtf v0.2d, v0.2d ; CHECK-NOFP16-GI-NEXT: fcvtn v0.2s, v0.2d -; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: utofp_v2i64_v2f16: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: ucvtf v0.2d, v0.2d ; CHECK-FP16-GI-NEXT: fcvtn v0.2s, v0.2d -; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s ; CHECK-FP16-GI-NEXT: ret entry: %c = uitofp <2 x i64> %a to <2 x half> @@ -6232,17 +6224,13 @@ define <2 x half> @stofp_v2i32_v2f16(<2 x i32> %a) { ; CHECK-NOFP16-GI-LABEL: stofp_v2i32_v2f16: ; CHECK-NOFP16-GI: // %bb.0: // %entry ; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s -; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: stofp_v2i32_v2f16: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: scvtf v0.2s, v0.2s -; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s ; CHECK-FP16-GI-NEXT: ret entry: %c = sitofp <2 x i32> %a to <2 x half> @@ -6267,17 +6255,13 @@ define <2 x half> @utofp_v2i32_v2f16(<2 x i32> %a) { ; CHECK-NOFP16-GI-LABEL: utofp_v2i32_v2f16: ; CHECK-NOFP16-GI: // %bb.0: // %entry ; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s -; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: utofp_v2i32_v2f16: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: ucvtf v0.2s, v0.2s -; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s ; CHECK-FP16-GI-NEXT: ret entry: %c = uitofp <2 x i32> %a to <2 x half> @@ -6480,9 +6464,7 @@ define <2 x half> @stofp_v2i16_v2f16(<2 x i16> %a) { ; CHECK-NOFP16-GI-NEXT: shl v0.2s, v0.2s, #16 ; CHECK-NOFP16-GI-NEXT: sshr v0.2s, v0.2s, #16 ; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s -; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NOFP16-GI-NEXT: ret entry: %c = sitofp <2 x i16> %a to <2 x half> @@ -6509,9 +6491,7 @@ define <2 x half> @utofp_v2i16_v2f16(<2 x i16> %a) { ; CHECK-NOFP16-GI-NEXT: movi d1, #0x00ffff0000ffff ; CHECK-NOFP16-GI-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s -; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NOFP16-GI-NEXT: ret entry: %c = uitofp <2 x i16> %a to <2 x half> @@ -6766,9 +6746,7 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) { ; CHECK-NOFP16-GI-NEXT: shl v0.2s, v0.2s, #24 ; CHECK-NOFP16-GI-NEXT: sshr v0.2s, v0.2s, #24 ; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s -; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: stofp_v2i8_v2f16: @@ -6817,9 +6795,7 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) { ; CHECK-NOFP16-GI-NEXT: movi d1, #0x0000ff000000ff ; CHECK-NOFP16-GI-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s -; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: utofp_v2i8_v2f16: diff --git a/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll b/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll index c1b8bc6031b18..f7dbcd137e742 100644 --- a/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll +++ b/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll @@ -762,25 +762,13 @@ define void @undef_hi3_v4f16(half %arg0) { } define void @undef_hi2_v4i16(<2 x i16> %arg0) { -; GFX8-SDAG-LABEL: undef_hi2_v4i16: -; GFX8-SDAG: ; %bb.0: -; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-SDAG-NEXT: ;;#ASMSTART -; GFX8-SDAG-NEXT: ; use v[0:1] -; GFX8-SDAG-NEXT: ;;#ASMEND -; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-GISEL-LABEL: undef_hi2_v4i16: -; GFX8-GISEL: ; %bb.0: -; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX8-GISEL-NEXT: ;;#ASMSTART -; GFX8-GISEL-NEXT: ; use v[0:1] -; GFX8-GISEL-NEXT: ;;#ASMEND -; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX8-LABEL: undef_hi2_v4i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use v[0:1] +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: undef_hi2_v4i16: ; GFX9: ; %bb.0: @@ -803,25 +791,13 @@ define void @undef_hi2_v4i16(<2 x i16> %arg0) { } define void @undef_hi2_v4f16(<2 x half> %arg0) { -; GFX8-SDAG-LABEL: undef_hi2_v4f16: -; GFX8-SDAG: ; %bb.0: -; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-SDAG-NEXT: ;;#ASMSTART -; GFX8-SDAG-NEXT: ; use v[0:1] -; GFX8-SDAG-NEXT: ;;#ASMEND -; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-GISEL-LABEL: undef_hi2_v4f16: -; GFX8-GISEL: ; %bb.0: -; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX8-GISEL-NEXT: ;;#ASMSTART -; GFX8-GISEL-NEXT: ; use v[0:1] -; GFX8-GISEL-NEXT: ;;#ASMEND -; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX8-LABEL: undef_hi2_v4f16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use v[0:1] +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: undef_hi2_v4f16: ; GFX9: ; %bb.0: @@ -842,5 +818,3 @@ define void @undef_hi2_v4f16(<2 x half> %arg0) { call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi); ret void } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX8: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat-bf16.ll index c94cdadc8ca59..82e199b4969db 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat-bf16.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat-bf16.ll @@ -3,6 +3,8 @@ ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFBFMIN ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZFBFMIN-ZVFBFMIN ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFBFMIN +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+experimental-zvfbfa -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFBFA +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+experimental-zvfbfa -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFBFA define <8 x bfloat> @splat_v8bf16(ptr %x, bfloat %y) { ; ZFBFMIN-ZVFBFMIN-LABEL: splat_v8bf16: @@ -18,6 +20,12 @@ define <8 x bfloat> @splat_v8bf16(ptr %x, bfloat %y) { ; ZVFBFMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFBFMIN-NEXT: vmv.v.x v8, a0 ; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: splat_v8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmv.v.f v8, fa0 +; ZVFBFA-NEXT: ret %a = insertelement <8 x bfloat> poison, bfloat %y, i32 0 %b = shufflevector <8 x bfloat> %a, <8 x bfloat> poison, <8 x i32> zeroinitializer ret <8 x bfloat> %b @@ -37,6 +45,12 @@ define <16 x bfloat> @splat_16bf16(ptr %x, bfloat %y) { ; ZVFBFMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFBFMIN-NEXT: vmv.v.x v8, a0 ; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: splat_16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfmv.v.f v8, fa0 +; ZVFBFA-NEXT: ret %a = insertelement <16 x bfloat> poison, bfloat %y, i32 0 %b = shufflevector <16 x bfloat> %a, <16 x bfloat> poison, <16 x i32> zeroinitializer ret <16 x bfloat> %b @@ -58,6 +72,12 @@ define <64 x bfloat> @splat_64bf16(ptr %x, bfloat %y) { ; ZVFBFMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; ZVFBFMIN-NEXT: vmv.v.x v8, a0 ; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: splat_64bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfmv.v.f v8, fa0 +; ZVFBFA-NEXT: ret %a = insertelement <64 x bfloat> poison, bfloat %y, i32 0 %b = shufflevector <64 x bfloat> %a, <64 x bfloat> poison, <64 x i32> zeroinitializer ret <64 x bfloat> %b @@ -75,6 +95,12 @@ define <8 x bfloat> @splat_zero_v8bf16(ptr %x) { ; ZVFBFMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFBFMIN-NEXT: vmv.v.i v8, 0 ; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: splat_zero_v8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.v.i v8, 0 +; ZVFBFA-NEXT: ret ret <8 x bfloat> splat (bfloat 0.0) } @@ -90,6 +116,12 @@ define <16 x bfloat> @splat_zero_16bf16(ptr %x) { ; ZVFBFMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFBFMIN-NEXT: vmv.v.i v8, 0 ; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: splat_zero_16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFBFA-NEXT: vmv.v.i v8, 0 +; ZVFBFA-NEXT: ret ret <16 x bfloat> splat (bfloat 0.0) } @@ -107,6 +139,13 @@ define <8 x bfloat> @splat_negzero_v8bf16(ptr %x) { ; ZVFBFMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFBFMIN-NEXT: vmv.v.x v8, a0 ; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: splat_negzero_v8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 1048568 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.v.x v8, a0 +; ZVFBFA-NEXT: ret ret <8 x bfloat> splat (bfloat -0.0) } @@ -124,5 +163,12 @@ define <16 x bfloat> @splat_negzero_16bf16(ptr %x) { ; ZVFBFMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFBFMIN-NEXT: vmv.v.x v8, a0 ; ZVFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: splat_negzero_16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 1048568 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFBFA-NEXT: vmv.v.x v8, a0 +; ZVFBFA-NEXT: ret ret <16 x bfloat> splat (bfloat -0.0) } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splat.ll index 40e337c811e8b..7901f8c290543 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splat.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH_RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH_RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zvfh,+experimental-zvfbfa -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA,ZVFBFA_RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zvfh,+experimental-zvfbfa -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA,ZVFBFA_RV64 define <1 x i8> @vp_splat_v1i8(i8 %val, <1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_splat_v1i8: @@ -183,97 +185,275 @@ define <16 x i32> @vp_splat_v16i32(i32 %val, <16 x i1> %m, i32 zeroext %evl) { } define <1 x i64> @vp_splat_v1i64(i64 %val, <1 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_splat_v1i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: .cfi_def_cfa_offset 0 -; RV32-NEXT: ret +; ZVFH_RV32-LABEL: vp_splat_v1i64: +; ZVFH_RV32: # %bb.0: +; ZVFH_RV32-NEXT: addi sp, sp, -16 +; ZVFH_RV32-NEXT: .cfi_def_cfa_offset 16 +; ZVFH_RV32-NEXT: sw a0, 8(sp) +; ZVFH_RV32-NEXT: sw a1, 12(sp) +; ZVFH_RV32-NEXT: addi a0, sp, 8 +; ZVFH_RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; ZVFH_RV32-NEXT: vlse64.v v8, (a0), zero +; ZVFH_RV32-NEXT: addi sp, sp, 16 +; ZVFH_RV32-NEXT: .cfi_def_cfa_offset 0 +; ZVFH_RV32-NEXT: ret ; -; RV64-LABEL: vp_splat_v1i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: ret +; ZVFH_RV64-LABEL: vp_splat_v1i64: +; ZVFH_RV64: # %bb.0: +; ZVFH_RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; ZVFH_RV64-NEXT: vmv.v.x v8, a0 +; ZVFH_RV64-NEXT: ret +; +; ZVFBFA_RV32-LABEL: vp_splat_v1i64: +; ZVFBFA_RV32: # %bb.0: +; ZVFBFA_RV32-NEXT: addi sp, sp, -16 +; ZVFBFA_RV32-NEXT: .cfi_def_cfa_offset 16 +; ZVFBFA_RV32-NEXT: sw a0, 8(sp) +; ZVFBFA_RV32-NEXT: sw a1, 12(sp) +; ZVFBFA_RV32-NEXT: addi a0, sp, 8 +; ZVFBFA_RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; ZVFBFA_RV32-NEXT: vlse64.v v8, (a0), zero +; ZVFBFA_RV32-NEXT: addi sp, sp, 16 +; ZVFBFA_RV32-NEXT: .cfi_def_cfa_offset 0 +; ZVFBFA_RV32-NEXT: ret +; +; ZVFBFA_RV64-LABEL: vp_splat_v1i64: +; ZVFBFA_RV64: # %bb.0: +; ZVFBFA_RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; ZVFBFA_RV64-NEXT: vmv.v.x v8, a0 +; ZVFBFA_RV64-NEXT: ret %splat = call <1 x i64> @llvm.experimental.vp.splat.v1i64(i64 %val, <1 x i1> %m, i32 %evl) ret <1 x i64> %splat } define <2 x i64> @vp_splat_v2i64(i64 %val, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_splat_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: .cfi_def_cfa_offset 0 -; RV32-NEXT: ret +; ZVFH_RV32-LABEL: vp_splat_v2i64: +; ZVFH_RV32: # %bb.0: +; ZVFH_RV32-NEXT: addi sp, sp, -16 +; ZVFH_RV32-NEXT: .cfi_def_cfa_offset 16 +; ZVFH_RV32-NEXT: sw a0, 8(sp) +; ZVFH_RV32-NEXT: sw a1, 12(sp) +; ZVFH_RV32-NEXT: addi a0, sp, 8 +; ZVFH_RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZVFH_RV32-NEXT: vlse64.v v8, (a0), zero +; ZVFH_RV32-NEXT: addi sp, sp, 16 +; ZVFH_RV32-NEXT: .cfi_def_cfa_offset 0 +; ZVFH_RV32-NEXT: ret +; +; ZVFH_RV64-LABEL: vp_splat_v2i64: +; ZVFH_RV64: # %bb.0: +; ZVFH_RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; ZVFH_RV64-NEXT: vmv.v.x v8, a0 +; ZVFH_RV64-NEXT: ret +; +; ZVFBFA_RV32-LABEL: vp_splat_v2i64: +; ZVFBFA_RV32: # %bb.0: +; ZVFBFA_RV32-NEXT: addi sp, sp, -16 +; ZVFBFA_RV32-NEXT: .cfi_def_cfa_offset 16 +; ZVFBFA_RV32-NEXT: sw a0, 8(sp) +; ZVFBFA_RV32-NEXT: sw a1, 12(sp) +; ZVFBFA_RV32-NEXT: addi a0, sp, 8 +; ZVFBFA_RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZVFBFA_RV32-NEXT: vlse64.v v8, (a0), zero +; ZVFBFA_RV32-NEXT: addi sp, sp, 16 +; ZVFBFA_RV32-NEXT: .cfi_def_cfa_offset 0 +; ZVFBFA_RV32-NEXT: ret ; -; RV64-LABEL: vp_splat_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: ret +; ZVFBFA_RV64-LABEL: vp_splat_v2i64: +; ZVFBFA_RV64: # %bb.0: +; ZVFBFA_RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; ZVFBFA_RV64-NEXT: vmv.v.x v8, a0 +; ZVFBFA_RV64-NEXT: ret %splat = call <2 x i64> @llvm.experimental.vp.splat.v2i64(i64 %val, <2 x i1> %m, i32 %evl) ret <2 x i64> %splat } define <4 x i64> @vp_splat_v4i64(i64 %val, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_splat_v4i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: .cfi_def_cfa_offset 0 -; RV32-NEXT: ret +; ZVFH_RV32-LABEL: vp_splat_v4i64: +; ZVFH_RV32: # %bb.0: +; ZVFH_RV32-NEXT: addi sp, sp, -16 +; ZVFH_RV32-NEXT: .cfi_def_cfa_offset 16 +; ZVFH_RV32-NEXT: sw a0, 8(sp) +; ZVFH_RV32-NEXT: sw a1, 12(sp) +; ZVFH_RV32-NEXT: addi a0, sp, 8 +; ZVFH_RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZVFH_RV32-NEXT: vlse64.v v8, (a0), zero +; ZVFH_RV32-NEXT: addi sp, sp, 16 +; ZVFH_RV32-NEXT: .cfi_def_cfa_offset 0 +; ZVFH_RV32-NEXT: ret ; -; RV64-LABEL: vp_splat_v4i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: ret +; ZVFH_RV64-LABEL: vp_splat_v4i64: +; ZVFH_RV64: # %bb.0: +; ZVFH_RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; ZVFH_RV64-NEXT: vmv.v.x v8, a0 +; ZVFH_RV64-NEXT: ret +; +; ZVFBFA_RV32-LABEL: vp_splat_v4i64: +; ZVFBFA_RV32: # %bb.0: +; ZVFBFA_RV32-NEXT: addi sp, sp, -16 +; ZVFBFA_RV32-NEXT: .cfi_def_cfa_offset 16 +; ZVFBFA_RV32-NEXT: sw a0, 8(sp) +; ZVFBFA_RV32-NEXT: sw a1, 12(sp) +; ZVFBFA_RV32-NEXT: addi a0, sp, 8 +; ZVFBFA_RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZVFBFA_RV32-NEXT: vlse64.v v8, (a0), zero +; ZVFBFA_RV32-NEXT: addi sp, sp, 16 +; ZVFBFA_RV32-NEXT: .cfi_def_cfa_offset 0 +; ZVFBFA_RV32-NEXT: ret +; +; ZVFBFA_RV64-LABEL: vp_splat_v4i64: +; ZVFBFA_RV64: # %bb.0: +; ZVFBFA_RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; ZVFBFA_RV64-NEXT: vmv.v.x v8, a0 +; ZVFBFA_RV64-NEXT: ret %splat = call <4 x i64> @llvm.experimental.vp.splat.v4i64(i64 %val, <4 x i1> %m, i32 %evl) ret <4 x i64> %splat } define <8 x i64> @vp_splat_v8i64(i64 %val, <8 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_splat_v8i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: .cfi_def_cfa_offset 0 -; RV32-NEXT: ret +; ZVFH_RV32-LABEL: vp_splat_v8i64: +; ZVFH_RV32: # %bb.0: +; ZVFH_RV32-NEXT: addi sp, sp, -16 +; ZVFH_RV32-NEXT: .cfi_def_cfa_offset 16 +; ZVFH_RV32-NEXT: sw a0, 8(sp) +; ZVFH_RV32-NEXT: sw a1, 12(sp) +; ZVFH_RV32-NEXT: addi a0, sp, 8 +; ZVFH_RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; ZVFH_RV32-NEXT: vlse64.v v8, (a0), zero +; ZVFH_RV32-NEXT: addi sp, sp, 16 +; ZVFH_RV32-NEXT: .cfi_def_cfa_offset 0 +; ZVFH_RV32-NEXT: ret +; +; ZVFH_RV64-LABEL: vp_splat_v8i64: +; ZVFH_RV64: # %bb.0: +; ZVFH_RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; ZVFH_RV64-NEXT: vmv.v.x v8, a0 +; ZVFH_RV64-NEXT: ret +; +; ZVFBFA_RV32-LABEL: vp_splat_v8i64: +; ZVFBFA_RV32: # %bb.0: +; ZVFBFA_RV32-NEXT: addi sp, sp, -16 +; ZVFBFA_RV32-NEXT: .cfi_def_cfa_offset 16 +; ZVFBFA_RV32-NEXT: sw a0, 8(sp) +; ZVFBFA_RV32-NEXT: sw a1, 12(sp) +; ZVFBFA_RV32-NEXT: addi a0, sp, 8 +; ZVFBFA_RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; ZVFBFA_RV32-NEXT: vlse64.v v8, (a0), zero +; ZVFBFA_RV32-NEXT: addi sp, sp, 16 +; ZVFBFA_RV32-NEXT: .cfi_def_cfa_offset 0 +; ZVFBFA_RV32-NEXT: ret ; -; RV64-LABEL: vp_splat_v8i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: ret +; ZVFBFA_RV64-LABEL: vp_splat_v8i64: +; ZVFBFA_RV64: # %bb.0: +; ZVFBFA_RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; ZVFBFA_RV64-NEXT: vmv.v.x v8, a0 +; ZVFBFA_RV64-NEXT: ret %splat = call <8 x i64> @llvm.experimental.vp.splat.v8i64(i64 %val, <8 x i1> %m, i32 %evl) ret <8 x i64> %splat } +define <1 x bfloat> @vp_splat_v1bf16(bfloat %val, <1 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vp_splat_v1bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.w a1, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vmv.v.x v8, a1 +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vp_splat_v1bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfmv.v.f v8, fa0 +; ZVFBFA-NEXT: ret + %splat = call <1 x bfloat> @llvm.experimental.vp.splat.v1bf16(bfloat %val, <1 x i1> %m, i32 %evl) + ret <1 x bfloat> %splat +} + +define <2 x bfloat> @vp_splat_v2bf16(bfloat %val, <2 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vp_splat_v2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.w a1, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vmv.v.x v8, a1 +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vp_splat_v2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfmv.v.f v8, fa0 +; ZVFBFA-NEXT: ret + %splat = call <2 x bfloat> @llvm.experimental.vp.splat.v2bf16(bfloat %val, <2 x i1> %m, i32 %evl) + ret <2 x bfloat> %splat +} + +define <4 x bfloat> @vp_splat_v4bf16(bfloat %val, <4 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vp_splat_v4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.w a1, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vmv.v.x v8, a1 +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vp_splat_v4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfmv.v.f v8, fa0 +; ZVFBFA-NEXT: ret + %splat = call <4 x bfloat> @llvm.experimental.vp.splat.v4bf16(bfloat %val, <4 x i1> %m, i32 %evl) + ret <4 x bfloat> %splat +} + +define <8 x bfloat> @vp_splat_v8bf16(bfloat %val, <8 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vp_splat_v8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.w a1, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vmv.v.x v8, a1 +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vp_splat_v8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmv.v.f v8, fa0 +; ZVFBFA-NEXT: ret + %splat = call <8 x bfloat> @llvm.experimental.vp.splat.v8bf16(bfloat %val, <8 x i1> %m, i32 %evl) + ret <8 x bfloat> %splat +} + +define <16 x bfloat> @vp_splat_v16bf16(bfloat %val, <16 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vp_splat_v16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.w a1, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vmv.v.x v8, a1 +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vp_splat_v16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfmv.v.f v8, fa0 +; ZVFBFA-NEXT: ret + %splat = call <16 x bfloat> @llvm.experimental.vp.splat.v16bf16(bfloat %val, <16 x i1> %m, i32 %evl) + ret <16 x bfloat> %splat +} + +define <32 x bfloat> @vp_splat_v32bf16(bfloat %val, <32 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vp_splat_v32bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: fmv.x.w a1, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vmv.v.x v8, a1 +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vp_splat_v32bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfmv.v.f v8, fa0 +; ZVFBFA-NEXT: ret + %splat = call <32 x bfloat> @llvm.experimental.vp.splat.v32bf16(bfloat %val, <32 x i1> %m, i32 %evl) + ret <32 x bfloat> %splat +} + define <1 x half> @vp_splat_v1f16(half %val, <1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_splat_v1f16: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll index abd00b647e374..c4232915895cf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll @@ -617,25 +617,22 @@ define @vfadd_vf_nxv32bf16( %va, bf ; ZVFBFA-NEXT: slli a0, a0, 3 ; ZVFBFA-NEXT: sub sp, sp, a0 ; ZVFBFA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFBFA-NEXT: fmv.x.h a0, fa0 -; ZVFBFA-NEXT: vsetvli a1, zero, e16alt, m4, ta, ma -; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFBFA-NEXT: addi a1, sp, 16 -; ZVFBFA-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFBFA-NEXT: vsetvli a1, zero, e16alt, m8, ta, ma -; ZVFBFA-NEXT: vmv.v.x v8, a0 +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfmv.v.f v16, fa0 ; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma -; ZVFBFA-NEXT: vfwcvt.f.f.v v0, v8 -; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFBFA-NEXT: addi a0, sp, 16 -; ZVFBFA-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; ZVFBFA-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill +; ZVFBFA-NEXT: vfwcvt.f.f.v v0, v12 +; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v16 +; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v20 +; ZVFBFA-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload ; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFBFA-NEXT: vfadd.vv v0, v8, v0 +; ZVFBFA-NEXT: vfadd.vv v16, v16, v8 ; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma -; ZVFBFA-NEXT: vfncvt.f.f.w v8, v0 +; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16 ; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFBFA-NEXT: vfadd.vv v16, v24, v16 +; ZVFBFA-NEXT: vfadd.vv v16, v0, v24 ; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma ; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16 ; ZVFBFA-NEXT: csrr a0, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll index 633a201c0131a..1ab2209647c80 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll @@ -120,9 +120,8 @@ define @vfadd_vf_nxv1bf16( %va, bfloa ; ; ZVFBFA-LABEL: vfadd_vf_nxv1bf16: ; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: fmv.x.h a1, fa0 ; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma -; ZVFBFA-NEXT: vmv.v.x v9, a1 +; ZVFBFA-NEXT: vfmv.v.f v9, fa0 ; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9, v0.t ; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -165,9 +164,8 @@ define @vfadd_vf_nxv1bf16_commute( %v ; ; ZVFBFA-LABEL: vfadd_vf_nxv1bf16_commute: ; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: fmv.x.h a1, fa0 ; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma -; ZVFBFA-NEXT: vmv.v.x v9, a1 +; ZVFBFA-NEXT: vfmv.v.f v9, fa0 ; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9, v0.t ; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -210,9 +208,8 @@ define @vfadd_vf_nxv1bf16_unmasked( % ; ; ZVFBFA-LABEL: vfadd_vf_nxv1bf16_unmasked: ; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: fmv.x.h a1, fa0 ; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma -; ZVFBFA-NEXT: vmv.v.x v9, a1 +; ZVFBFA-NEXT: vfmv.v.f v9, fa0 ; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -255,9 +252,8 @@ define @vfadd_vf_nxv1bf16_unmasked_commute( @vfadd_vf_nxv2bf16( %va, bfloa ; ; ZVFBFA-LABEL: vfadd_vf_nxv2bf16: ; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: fmv.x.h a1, fa0 ; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma -; ZVFBFA-NEXT: vmv.v.x v9, a1 +; ZVFBFA-NEXT: vfmv.v.f v9, fa0 ; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9, v0.t ; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -421,9 +416,8 @@ define @vfadd_vf_nxv2bf16_unmasked( % ; ; ZVFBFA-LABEL: vfadd_vf_nxv2bf16_unmasked: ; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: fmv.x.h a1, fa0 ; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma -; ZVFBFA-NEXT: vmv.v.x v9, a1 +; ZVFBFA-NEXT: vfmv.v.f v9, fa0 ; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -542,9 +536,8 @@ define @vfadd_vf_nxv4bf16( %va, bfloa ; ; ZVFBFA-LABEL: vfadd_vf_nxv4bf16: ; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: fmv.x.h a1, fa0 ; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma -; ZVFBFA-NEXT: vmv.v.x v12, a1 +; ZVFBFA-NEXT: vfmv.v.f v12, fa0 ; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v12, v0.t ; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -587,9 +580,8 @@ define @vfadd_vf_nxv4bf16_unmasked( % ; ; ZVFBFA-LABEL: vfadd_vf_nxv4bf16_unmasked: ; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: fmv.x.h a1, fa0 ; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma -; ZVFBFA-NEXT: vmv.v.x v12, a1 +; ZVFBFA-NEXT: vfmv.v.f v12, fa0 ; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v12 ; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -708,9 +700,8 @@ define @vfadd_vf_nxv8bf16( %va, bfloa ; ; ZVFBFA-LABEL: vfadd_vf_nxv8bf16: ; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: fmv.x.h a1, fa0 ; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma -; ZVFBFA-NEXT: vmv.v.x v16, a1 +; ZVFBFA-NEXT: vfmv.v.f v16, fa0 ; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8, v0.t ; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v16, v0.t ; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma @@ -753,9 +744,8 @@ define @vfadd_vf_nxv8bf16_unmasked( % ; ; ZVFBFA-LABEL: vfadd_vf_nxv8bf16_unmasked: ; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: fmv.x.h a1, fa0 ; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma -; ZVFBFA-NEXT: vmv.v.x v16, a1 +; ZVFBFA-NEXT: vfmv.v.f v16, fa0 ; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v16 ; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma @@ -874,9 +864,8 @@ define @vfadd_vf_nxv16bf16( %va, bf ; ; ZVFBFA-LABEL: vfadd_vf_nxv16bf16: ; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: fmv.x.h a1, fa0 ; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma -; ZVFBFA-NEXT: vmv.v.x v24, a1 +; ZVFBFA-NEXT: vfmv.v.f v24, fa0 ; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8, v0.t ; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v24, v0.t ; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma @@ -919,9 +908,8 @@ define @vfadd_vf_nxv16bf16_unmasked( @vfadd_vf_nxv32bf16( %va, bf ; ZVFBFA-NEXT: addi sp, sp, -16 ; ZVFBFA-NEXT: .cfi_def_cfa_offset 16 ; ZVFBFA-NEXT: csrr a1, vlenb -; ZVFBFA-NEXT: slli a1, a1, 4 +; ZVFBFA-NEXT: slli a1, a1, 3 ; ZVFBFA-NEXT: sub sp, sp, a1 -; ZVFBFA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; ZVFBFA-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFBFA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFBFA-NEXT: vsetvli a1, zero, e16alt, m8, ta, ma ; ZVFBFA-NEXT: vmv1r.v v7, v0 -; ZVFBFA-NEXT: fmv.x.h a1, fa0 +; ZVFBFA-NEXT: vfmv.v.f v24, fa0 ; ZVFBFA-NEXT: csrr a2, vlenb -; ZVFBFA-NEXT: vmv.v.x v24, a1 ; ZVFBFA-NEXT: slli a1, a2, 1 ; ZVFBFA-NEXT: srli a2, a2, 2 ; ZVFBFA-NEXT: sub a3, a0, a1 -; ZVFBFA-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFBFA-NEXT: vsetvli a4, zero, e8alt, mf2, ta, ma ; ZVFBFA-NEXT: vslidedown.vx v0, v0, a2 ; ZVFBFA-NEXT: sltu a2, a0, a3 ; ZVFBFA-NEXT: addi a2, a2, -1 ; ZVFBFA-NEXT: and a2, a2, a3 -; ZVFBFA-NEXT: csrr a3, vlenb -; ZVFBFA-NEXT: slli a3, a3, 3 -; ZVFBFA-NEXT: add a3, sp, a3 -; ZVFBFA-NEXT: addi a3, a3, 16 +; ZVFBFA-NEXT: addi a3, sp, 16 ; ZVFBFA-NEXT: vs8r.v v24, (a3) # vscale x 64-byte Folded Spill ; ZVFBFA-NEXT: vsetvli zero, a2, e16alt, m4, ta, ma ; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v28, v0.t @@ -1402,24 +1386,17 @@ define @vfadd_vf_nxv32bf16( %va, bf ; ZVFBFA-NEXT: mv a0, a1 ; ZVFBFA-NEXT: .LBB24_2: ; ZVFBFA-NEXT: vmv1r.v v0, v7 +; ZVFBFA-NEXT: addi a1, sp, 16 +; ZVFBFA-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload ; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma -; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8, v0.t -; ZVFBFA-NEXT: addi a0, sp, 16 -; ZVFBFA-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; ZVFBFA-NEXT: csrr a0, vlenb -; ZVFBFA-NEXT: slli a0, a0, 3 -; ZVFBFA-NEXT: add a0, sp, a0 -; ZVFBFA-NEXT: addi a0, a0, 16 -; ZVFBFA-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v16, v0.t -; ZVFBFA-NEXT: addi a0, sp, 16 -; ZVFBFA-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v24, v0.t +; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8, v0.t ; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFBFA-NEXT: vfadd.vv v16, v16, v24, v0.t +; ZVFBFA-NEXT: vfadd.vv v16, v24, v16, v0.t ; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma ; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFBFA-NEXT: csrr a0, vlenb -; ZVFBFA-NEXT: slli a0, a0, 4 +; ZVFBFA-NEXT: slli a0, a0, 3 ; ZVFBFA-NEXT: add sp, sp, a0 ; ZVFBFA-NEXT: .cfi_def_cfa sp, 16 ; ZVFBFA-NEXT: addi sp, sp, 16 @@ -1542,15 +1519,14 @@ define @vfadd_vf_nxv32bf16_unmasked( @vfadd_vf_nxv32bf16_unmasked( @vp_splat_nxv1i8(i8 %val, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_splat_nxv1i8: @@ -292,6 +294,12 @@ define @vp_splat_nxv1bf16(bfloat %val, % ; ZFMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZFMIN-NEXT: vmv.v.x v8, a1 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vp_splat_nxv1bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfmv.v.f v8, fa0 +; ZVFBFA-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv1bf16(bfloat %val, %m, i32 %evl) ret %splat } @@ -310,6 +318,12 @@ define @vp_splat_nxv2bf16(bfloat %val, % ; ZFMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZFMIN-NEXT: vmv.v.x v8, a1 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vp_splat_nxv2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfmv.v.f v8, fa0 +; ZVFBFA-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv2bf16(bfloat %val, %m, i32 %evl) ret %splat } @@ -328,6 +342,12 @@ define @vp_splat_nxv4bf16(bfloat %val, % ; ZFMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZFMIN-NEXT: vmv.v.x v8, a1 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vp_splat_nxv4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfmv.v.f v8, fa0 +; ZVFBFA-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv4bf16(bfloat %val, %m, i32 %evl) ret %splat } @@ -346,6 +366,12 @@ define @vp_splat_nxv8bf16(bfloat %val, % ; ZFMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZFMIN-NEXT: vmv.v.x v8, a1 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vp_splat_nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfmv.v.f v8, fa0 +; ZVFBFA-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv8bf16(bfloat %val, %m, i32 %evl) ret %splat } @@ -364,6 +390,12 @@ define @vp_splat_nxv16bf16(bfloat %val, @llvm.experimental.vp.splat.nxv16bf16(bfloat %val, %m, i32 %evl) ret %splat } @@ -382,6 +414,12 @@ define @vp_splat_nxv32bf16(bfloat %val, @llvm.experimental.vp.splat.nxv32bf16(bfloat %val, %m, i32 %evl) ret %splat } @@ -406,6 +444,13 @@ define @vp_splat_nxv1f16(half %val, %m, i3 ; ZFMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZFMIN-NEXT: vmv.v.x v8, a1 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vp_splat_nxv1f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a1, fa0 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFBFA-NEXT: vmv.v.x v8, a1 +; ZVFBFA-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv1f16(half %val, %m, i32 %evl) ret %splat } @@ -430,6 +475,13 @@ define @vp_splat_nxv2f16(half %val, %m, i3 ; ZFMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZFMIN-NEXT: vmv.v.x v8, a1 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vp_splat_nxv2f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a1, fa0 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFBFA-NEXT: vmv.v.x v8, a1 +; ZVFBFA-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv2f16(half %val, %m, i32 %evl) ret %splat } @@ -454,6 +506,13 @@ define @vp_splat_nxv4f16(half %val, %m, i3 ; ZFMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZFMIN-NEXT: vmv.v.x v8, a1 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vp_splat_nxv4f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a1, fa0 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.v.x v8, a1 +; ZVFBFA-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv4f16(half %val, %m, i32 %evl) ret %splat } @@ -478,6 +537,13 @@ define @vp_splat_nxv8f16(half %val, %m, i3 ; ZFMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZFMIN-NEXT: vmv.v.x v8, a1 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vp_splat_nxv8f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a1, fa0 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFBFA-NEXT: vmv.v.x v8, a1 +; ZVFBFA-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv8f16(half %val, %m, i32 %evl) ret %splat } @@ -502,6 +568,13 @@ define @vp_splat_nxv16f16(half %val, %m, ; ZFMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZFMIN-NEXT: vmv.v.x v8, a1 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vp_splat_nxv16f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a1, fa0 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFBFA-NEXT: vmv.v.x v8, a1 +; ZVFBFA-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv16f16(half %val, %m, i32 %evl) ret %splat } @@ -526,6 +599,13 @@ define @vp_splat_nxv32f16(half %val, %m, ; ZFMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZFMIN-NEXT: vmv.v.x v8, a1 ; ZFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vp_splat_nxv32f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a1, fa0 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFBFA-NEXT: vmv.v.x v8, a1 +; ZVFBFA-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv32f16(half %val, %m, i32 %evl) ret %splat } diff --git a/llvm/test/CodeGen/RISCV/rvv/vsplats-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/vsplats-bf16.ll index af9881aca03bc..24ed31cc55225 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsplats-bf16.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsplats-bf16.ll @@ -7,6 +7,10 @@ ; RUN: | FileCheck %s --check-prefixes=NOZFBFMIN ; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+zvfbfmin,+v -target-abi lp64d -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=NOZFBFMIN +; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+experimental-zvfbfa,+v -target-abi ilp32d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+experimental-zvfbfa,+v -target-abi lp64d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=ZVFBFA define @vsplat_nxv8bf16(bfloat %f) { ; CHECK-LABEL: vsplat_nxv8bf16: @@ -22,6 +26,12 @@ define @vsplat_nxv8bf16(bfloat %f) { ; NOZFBFMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; NOZFBFMIN-NEXT: vmv.v.x v8, a0 ; NOZFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vsplat_nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfmv.v.f v8, fa0 +; ZVFBFA-NEXT: ret %head = insertelement poison, bfloat %f, i32 0 %splat = shufflevector %head, poison, zeroinitializer ret %splat @@ -39,6 +49,12 @@ define @vsplat_zero_nxv8bf16() { ; NOZFBFMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; NOZFBFMIN-NEXT: vmv.v.i v8, 0 ; NOZFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vsplat_zero_nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFBFA-NEXT: vmv.v.i v8, 0 +; ZVFBFA-NEXT: ret ret splat (bfloat zeroinitializer) } @@ -56,5 +72,12 @@ define @vsplat_negzero_nxv8bf16() { ; NOZFBFMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; NOZFBFMIN-NEXT: vmv.v.x v8, a0 ; NOZFBFMIN-NEXT: ret +; +; ZVFBFA-LABEL: vsplat_negzero_nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 1048568 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFBFA-NEXT: vmv.v.x v8, a0 +; ZVFBFA-NEXT: ret ret splat (bfloat -0.0) } diff --git a/llvm/test/CodeGen/X86/bittest-big-integer.ll b/llvm/test/CodeGen/X86/bittest-big-integer.ll index 9b7569ff8b29f..b85a20b9d6b6e 100644 --- a/llvm/test/CodeGen/X86/bittest-big-integer.ll +++ b/llvm/test/CodeGen/X86/bittest-big-integer.ll @@ -1488,3 +1488,618 @@ define i1 @sequence_i128(ptr %word, i32 %pos0, i32 %pos1, i32 %pos2) nounwind { store i128 %res2, ptr %word ret i1 %cmp1 } + +define i32 @blsr_u512(ptr %word) nounwind { +; X86-LABEL: blsr_u512: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $240, %esp +; X86-NEXT: movl 8(%ebp), %ebx +; X86-NEXT: movl 12(%ebx), %esi +; X86-NEXT: movl 28(%ebx), %eax +; X86-NEXT: movl 60(%ebx), %ecx +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl 44(%ebx), %edx +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, %ecx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %edx, %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl 20(%ebx), %edx +; X86-NEXT: movl 52(%ebx), %eax +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %eax, %edx +; X86-NEXT: movl 4(%ebx), %edi +; X86-NEXT: movl 36(%ebx), %esi +; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %esi, %eax +; X86-NEXT: orl %edx, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl 24(%ebx), %edx +; X86-NEXT: movl 56(%ebx), %ecx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: movl 8(%ebx), %ecx +; X86-NEXT: movl 40(%ebx), %esi +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %esi, %ecx +; X86-NEXT: orl %edx, %ecx +; X86-NEXT: movl 16(%ebx), %edx +; X86-NEXT: movl 48(%ebx), %esi +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %esi, %edx +; X86-NEXT: movl (%ebx), %esi +; X86-NEXT: movl 32(%ebx), %ebx +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %ebx, %esi +; X86-NEXT: orl %edx, %esi +; X86-NEXT: orl %ecx, %esi +; X86-NEXT: orl %eax, %esi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: je .LBB26_1 +; X86-NEXT: # %bb.2: # %cond.false +; X86-NEXT: testl %ebx, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: jne .LBB26_3 +; X86-NEXT: # %bb.4: # %cond.false +; X86-NEXT: rep bsfl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: addl $32, %eax +; X86-NEXT: jmp .LBB26_5 +; X86-NEXT: .LBB26_1: +; X86-NEXT: movl $512, %ecx # imm = 0x200 +; X86-NEXT: jmp .LBB26_41 +; X86-NEXT: .LBB26_3: +; X86-NEXT: rep bsfl %ebx, %eax +; X86-NEXT: .LBB26_5: # %cond.false +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: testl %ecx, %ecx +; X86-NEXT: jne .LBB26_6 +; X86-NEXT: # %bb.7: # %cond.false +; X86-NEXT: rep bsfl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: addl $32, %ecx +; X86-NEXT: jmp .LBB26_8 +; X86-NEXT: .LBB26_6: +; X86-NEXT: rep bsfl %ecx, %ecx +; X86-NEXT: .LBB26_8: # %cond.false +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: jne .LBB26_10 +; X86-NEXT: # %bb.9: # %cond.false +; X86-NEXT: addl $64, %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: .LBB26_10: # %cond.false +; X86-NEXT: testl %esi, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: jne .LBB26_11 +; X86-NEXT: # %bb.12: # %cond.false +; X86-NEXT: rep bsfl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: addl $32, %ecx +; X86-NEXT: testl %edx, %edx +; X86-NEXT: je .LBB26_15 +; X86-NEXT: .LBB26_14: +; X86-NEXT: rep bsfl %edx, %edx +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: je .LBB26_17 +; X86-NEXT: jmp .LBB26_18 +; X86-NEXT: .LBB26_11: +; X86-NEXT: rep bsfl %esi, %ecx +; X86-NEXT: testl %edx, %edx +; X86-NEXT: jne .LBB26_14 +; X86-NEXT: .LBB26_15: # %cond.false +; X86-NEXT: rep bsfl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: addl $32, %edx +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: jne .LBB26_18 +; X86-NEXT: .LBB26_17: # %cond.false +; X86-NEXT: addl $64, %edx +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: .LBB26_18: # %cond.false +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: orl %edx, %esi +; X86-NEXT: jne .LBB26_20 +; X86-NEXT: # %bb.19: # %cond.false +; X86-NEXT: subl $-128, %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: .LBB26_20: # %cond.false +; X86-NEXT: addl $256, %eax # imm = 0x100 +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: testl %edx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: jne .LBB26_21 +; X86-NEXT: # %bb.22: # %cond.false +; X86-NEXT: rep bsfl %edi, %ebx +; X86-NEXT: addl $32, %ebx +; X86-NEXT: jmp .LBB26_23 +; X86-NEXT: .LBB26_21: +; X86-NEXT: rep bsfl %edx, %ebx +; X86-NEXT: .LBB26_23: # %cond.false +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: testl %ecx, %ecx +; X86-NEXT: jne .LBB26_24 +; X86-NEXT: # %bb.25: # %cond.false +; X86-NEXT: rep bsfl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: addl $32, %ecx +; X86-NEXT: orl %edi, %edx +; X86-NEXT: je .LBB26_27 +; X86-NEXT: jmp .LBB26_28 +; X86-NEXT: .LBB26_24: +; X86-NEXT: rep bsfl %ecx, %ecx +; X86-NEXT: orl %edi, %edx +; X86-NEXT: jne .LBB26_28 +; X86-NEXT: .LBB26_27: # %cond.false +; X86-NEXT: addl $64, %ecx +; X86-NEXT: movl %ecx, %ebx +; X86-NEXT: .LBB26_28: # %cond.false +; X86-NEXT: testl %esi, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: jne .LBB26_29 +; X86-NEXT: # %bb.30: # %cond.false +; X86-NEXT: rep bsfl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: addl $32, %ecx +; X86-NEXT: testl %edx, %edx +; X86-NEXT: je .LBB26_33 +; X86-NEXT: .LBB26_32: +; X86-NEXT: rep bsfl %edx, %edx +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: je .LBB26_35 +; X86-NEXT: jmp .LBB26_36 +; X86-NEXT: .LBB26_29: +; X86-NEXT: rep bsfl %esi, %ecx +; X86-NEXT: testl %edx, %edx +; X86-NEXT: jne .LBB26_32 +; X86-NEXT: .LBB26_33: # %cond.false +; X86-NEXT: rep bsfl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: addl $32, %edx +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: jne .LBB26_36 +; X86-NEXT: .LBB26_35: # %cond.false +; X86-NEXT: addl $64, %edx +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: .LBB26_36: # %cond.false +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl %edi, %esi +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: orl %edx, %esi +; X86-NEXT: jne .LBB26_38 +; X86-NEXT: # %bb.37: # %cond.false +; X86-NEXT: subl $-128, %ecx +; X86-NEXT: movl %ecx, %ebx +; X86-NEXT: .LBB26_38: # %cond.false +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: movl %edi, %ecx +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: orl %ecx, %esi +; X86-NEXT: orl %edx, %esi +; X86-NEXT: movl %ebx, %ecx +; X86-NEXT: jne .LBB26_40 +; X86-NEXT: # %bb.39: # %cond.false +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: .LBB26_40: # %cond.false +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: .LBB26_41: # %cond.end +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: shrl $3, %esi +; X86-NEXT: andl $60, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: leal {{[0-9]+}}(%esp), %edx +; X86-NEXT: subl %esi, %edx +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $1, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl $31, %ecx +; X86-NEXT: movl 56(%edx), %edi +; X86-NEXT: movl 60(%edx), %esi +; X86-NEXT: shldl %cl, %edi, %esi +; X86-NEXT: notl %esi +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 52(%edx), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %esi, %edi +; X86-NEXT: notl %edi +; X86-NEXT: andl %eax, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 40(%edx), %eax +; X86-NEXT: movl 44(%edx), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %eax, %esi +; X86-NEXT: notl %esi +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 36(%edx), %esi +; X86-NEXT: shldl %cl, %esi, %eax +; X86-NEXT: notl %eax +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 32(%edx), %eax +; X86-NEXT: shldl %cl, %eax, %esi +; X86-NEXT: notl %esi +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 28(%edx), %esi +; X86-NEXT: shldl %cl, %esi, %eax +; X86-NEXT: notl %eax +; X86-NEXT: andl %ebx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 24(%edx), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %eax, %esi +; X86-NEXT: notl %esi +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 4(%edx), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 8(%edx), %eax +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shldl %cl, %esi, %edx +; X86-NEXT: notl %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 12(%ebx), %edx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: shldl %cl, %eax, %esi +; X86-NEXT: notl %esi +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 16(%ebx), %eax +; X86-NEXT: movl %eax, %esi +; X86-NEXT: shldl %cl, %edx, %esi +; X86-NEXT: notl %esi +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 20(%ebx), %edx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: shldl %cl, %eax, %esi +; X86-NEXT: notl %esi +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: notl %eax +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: negl %eax +; X86-NEXT: movl 208(%esp,%eax), %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: notl %eax +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl %cl, %eax, %edx +; X86-NEXT: notl %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl (%ebx), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shll %cl, %eax +; X86-NEXT: notl %eax +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: shldl %cl, %edi, %ebx +; X86-NEXT: notl %ebx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: movl %ebx, %edi +; X86-NEXT: movl 8(%ebp), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: movl %ebx, 24(%ecx) +; X86-NEXT: movl %esi, 20(%ecx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: movl %esi, 16(%ecx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: movl %esi, 12(%ecx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: movl %esi, 8(%ecx) +; X86-NEXT: movl %edi, 4(%ecx) +; X86-NEXT: movl %eax, (%ecx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 28(%ecx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 32(%ecx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 36(%ecx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 40(%ecx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 44(%ecx) +; X86-NEXT: movl %edx, 48(%ecx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 52(%ecx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 56(%ecx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 60(%ecx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; SSE-LABEL: blsr_u512: +; SSE: # %bb.0: +; SSE-NEXT: pushq %r15 +; SSE-NEXT: pushq %r14 +; SSE-NEXT: pushq %r12 +; SSE-NEXT: pushq %rbx +; SSE-NEXT: pushq %rax +; SSE-NEXT: movq 56(%rdi), %rcx +; SSE-NEXT: movq 48(%rdi), %rdx +; SSE-NEXT: movq 40(%rdi), %rsi +; SSE-NEXT: movq 32(%rdi), %r11 +; SSE-NEXT: movq 24(%rdi), %r8 +; SSE-NEXT: movq 16(%rdi), %r9 +; SSE-NEXT: movq (%rdi), %rax +; SSE-NEXT: movq 8(%rdi), %r10 +; SSE-NEXT: rep bsfq %rax, %rbx +; SSE-NEXT: rep bsfq %r10, %r14 +; SSE-NEXT: addq $64, %r14 +; SSE-NEXT: testq %rax, %rax +; SSE-NEXT: cmovneq %rbx, %r14 +; SSE-NEXT: rep bsfq %r9, %r15 +; SSE-NEXT: rep bsfq %r8, %rbx +; SSE-NEXT: addq $64, %rbx +; SSE-NEXT: testq %r9, %r9 +; SSE-NEXT: cmovneq %r15, %rbx +; SSE-NEXT: subq $-128, %rbx +; SSE-NEXT: movq %rax, %r15 +; SSE-NEXT: movq %rax, %r12 +; SSE-NEXT: orq %r10, %r12 +; SSE-NEXT: cmovneq %r14, %rbx +; SSE-NEXT: rep bsfq %r11, %r12 +; SSE-NEXT: rep bsfq %rsi, %r14 +; SSE-NEXT: addq $64, %r14 +; SSE-NEXT: testq %r11, %r11 +; SSE-NEXT: cmovneq %r12, %r14 +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; SSE-NEXT: rep bsfq %rdx, %r12 +; SSE-NEXT: movl $64, %eax +; SSE-NEXT: rep bsfq %rcx, %rax +; SSE-NEXT: addq $64, %rax +; SSE-NEXT: testq %rdx, %rdx +; SSE-NEXT: cmovneq %r12, %rax +; SSE-NEXT: subq $-128, %rax +; SSE-NEXT: movq %r11, -{{[0-9]+}}(%rsp) +; SSE-NEXT: orq %rsi, %r11 +; SSE-NEXT: cmovneq %r14, %rax +; SSE-NEXT: addq $256, %rax # imm = 0x100 +; SSE-NEXT: movq %r10, -{{[0-9]+}}(%rsp) +; SSE-NEXT: orq %r8, %r10 +; SSE-NEXT: orq %r9, %r15 +; SSE-NEXT: orq %r10, %r15 +; SSE-NEXT: cmovneq %rbx, %rax +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movq %r9, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movl %eax, %ecx +; SSE-NEXT: andl $32, %ecx +; SSE-NEXT: movl %eax, %edx +; SSE-NEXT: andl $480, %edx # imm = 0x1E0 +; SSE-NEXT: shrl $3, %edx +; SSE-NEXT: movl %edx, %esi +; SSE-NEXT: andl $-8, %esi +; SSE-NEXT: movq -128(%rsp,%rsi), %r8 +; SSE-NEXT: shrq %cl, %r8 +; SSE-NEXT: movl -120(%rsp,%rsi), %esi +; SSE-NEXT: addl %esi, %esi +; SSE-NEXT: notl %ecx +; SSE-NEXT: # kill: def $cl killed $cl killed $ecx +; SSE-NEXT: shlq %cl, %rsi +; SSE-NEXT: orl %r8d, %esi +; SSE-NEXT: btrl %eax, %esi +; SSE-NEXT: movl %esi, (%rdi,%rdx) +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: addq $8, %rsp +; SSE-NEXT: popq %rbx +; SSE-NEXT: popq %r12 +; SSE-NEXT: popq %r14 +; SSE-NEXT: popq %r15 +; SSE-NEXT: retq +; +; AVX2-LABEL: blsr_u512: +; AVX2: # %bb.0: +; AVX2-NEXT: pushq %r15 +; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: pushq %r13 +; AVX2-NEXT: pushq %r12 +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: movq 56(%rdi), %rcx +; AVX2-NEXT: movq 40(%rdi), %rdx +; AVX2-NEXT: movq 32(%rdi), %r11 +; AVX2-NEXT: movq 24(%rdi), %rsi +; AVX2-NEXT: movq 16(%rdi), %r8 +; AVX2-NEXT: movq (%rdi), %r9 +; AVX2-NEXT: movq 8(%rdi), %r10 +; AVX2-NEXT: xorl %ebx, %ebx +; AVX2-NEXT: tzcntq %r9, %rbx +; AVX2-NEXT: tzcntq %r10, %rax +; AVX2-NEXT: addq $64, %rax +; AVX2-NEXT: testq %r9, %r9 +; AVX2-NEXT: cmovneq %rbx, %rax +; AVX2-NEXT: xorl %r14d, %r14d +; AVX2-NEXT: tzcntq %r8, %r14 +; AVX2-NEXT: xorl %ebx, %ebx +; AVX2-NEXT: tzcntq %rsi, %rbx +; AVX2-NEXT: addq $64, %rbx +; AVX2-NEXT: testq %r8, %r8 +; AVX2-NEXT: cmovneq %r14, %rbx +; AVX2-NEXT: subq $-128, %rbx +; AVX2-NEXT: movq %r9, %r14 +; AVX2-NEXT: movq %r9, %r15 +; AVX2-NEXT: orq %r10, %r15 +; AVX2-NEXT: cmovneq %rax, %rbx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq %r11, %rax +; AVX2-NEXT: xorl %r12d, %r12d +; AVX2-NEXT: tzcntq %rdx, %r12 +; AVX2-NEXT: addq $64, %r12 +; AVX2-NEXT: testq %r11, %r11 +; AVX2-NEXT: cmovneq %rax, %r12 +; AVX2-NEXT: movq 48(%rdi), %r15 +; AVX2-NEXT: xorl %r13d, %r13d +; AVX2-NEXT: tzcntq %r15, %r13 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq %rcx, %rax +; AVX2-NEXT: addq $64, %rax +; AVX2-NEXT: testq %r15, %r15 +; AVX2-NEXT: cmovneq %r13, %rax +; AVX2-NEXT: subq $-128, %rax +; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: movq %r11, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: orq %rdx, %r11 +; AVX2-NEXT: cmovneq %r12, %rax +; AVX2-NEXT: addq $256, %rax # imm = 0x100 +; AVX2-NEXT: movq %r10, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: orq %rsi, %r10 +; AVX2-NEXT: orq %r8, %r14 +; AVX2-NEXT: orq %r10, %r14 +; AVX2-NEXT: cmovneq %rbx, %rax +; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: movq %r9, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: movq %r15, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: movl %eax, %ecx +; AVX2-NEXT: andl $32, %ecx +; AVX2-NEXT: movl %eax, %edx +; AVX2-NEXT: andl $480, %edx # imm = 0x1E0 +; AVX2-NEXT: shrl $3, %edx +; AVX2-NEXT: movl %edx, %esi +; AVX2-NEXT: andl $-8, %esi +; AVX2-NEXT: shrxq %rcx, -128(%rsp,%rsi), %r8 +; AVX2-NEXT: notl %ecx +; AVX2-NEXT: movl -120(%rsp,%rsi), %esi +; AVX2-NEXT: addl %esi, %esi +; AVX2-NEXT: shlxq %rcx, %rsi, %rcx +; AVX2-NEXT: orl %r8d, %ecx +; AVX2-NEXT: btrl %eax, %ecx +; AVX2-NEXT: movl %ecx, (%rdi,%rdx) +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r12 +; AVX2-NEXT: popq %r13 +; AVX2-NEXT: popq %r14 +; AVX2-NEXT: popq %r15 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: blsr_u512: +; AVX512: # %bb.0: +; AVX512-NEXT: pushq %rax +; AVX512-NEXT: vmovups (%rdi), %ymm0 +; AVX512-NEXT: vmovups 32(%rdi), %ymm1 +; AVX512-NEXT: vmovdqu64 (%rdi), %zmm2 +; AVX512-NEXT: vpternlogd {{.*#+}} zmm3 = -1 +; AVX512-NEXT: vpaddq %zmm3, %zmm2, %zmm3 +; AVX512-NEXT: vpandnq %zmm3, %zmm2, %zmm3 +; AVX512-NEXT: vplzcntq %zmm3, %zmm3 +; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm4 = [64,128,192,256,320,384,448,512] +; AVX512-NEXT: vpsubq %zmm3, %zmm4, %zmm3 +; AVX512-NEXT: vptestmq %zmm2, %zmm2, %k1 +; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm2 = [512,512,512,512,512,512,512,512] +; AVX512-NEXT: vpcompressq %zmm3, %zmm2 {%k1} +; AVX512-NEXT: vmovq %xmm2, %rax +; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512-NEXT: vmovdqu %ymm2, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovdqu %ymm2, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: movl %eax, %ecx +; AVX512-NEXT: andl $32, %ecx +; AVX512-NEXT: movl %ecx, %edx +; AVX512-NEXT: notl %edx +; AVX512-NEXT: movl %eax, %esi +; AVX512-NEXT: shrl $3, %esi +; AVX512-NEXT: movl %esi, %r8d +; AVX512-NEXT: andl $56, %r8d +; AVX512-NEXT: movl -120(%rsp,%r8), %r9d +; AVX512-NEXT: addl %r9d, %r9d +; AVX512-NEXT: shlxq %rdx, %r9, %rdx +; AVX512-NEXT: shrl $3, %ecx +; AVX512-NEXT: addq %rsp, %r8 +; AVX512-NEXT: addq $-128, %r8 +; AVX512-NEXT: orl (%rcx,%r8), %edx +; AVX512-NEXT: btrl %eax, %edx +; AVX512-NEXT: andl $60, %esi +; AVX512-NEXT: movl %edx, (%rdi,%rsi) +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: popq %rcx +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %ld = load i512, ptr %word + %tz = tail call range(i512 0, 513) i512 @llvm.cttz.i512(i512 %ld, i1 false) + %tz.cast = trunc nuw nsw i512 %tz to i32 + %tz.mask = and i512 %tz, 511 + %mask = shl nuw i512 1, %tz.mask + %mask.not = xor i512 %mask, -1 + %blsr = and i512 %ld, %mask.not + store i512 %blsr, ptr %word + ret i32 %tz.cast +} diff --git a/llvm/test/ThinLTO/X86/dtlto/dtlto-cache.ll b/llvm/test/ThinLTO/X86/dtlto/dtlto-cache.ll new file mode 100644 index 0000000000000..df98c5e90b1ae --- /dev/null +++ b/llvm/test/ThinLTO/X86/dtlto/dtlto-cache.ll @@ -0,0 +1,74 @@ +; Test DTLTO output with llvm-lto2. + +RUN: rm -rf %t && split-file %s %t && cd %t + +; Generate bitcode files with summary. +RUN: opt -thinlto-bc t1.ll -o t1.bc +RUN: opt -thinlto-bc t2.ll -o t2.bc + +; Generate fake object files for mock.py to return. +RUN: touch t1.o t2.o + +; Create an empty subdirectory to avoid having to account for the input files. +RUN: mkdir %t/out && cd %t/out + +; Define a substitution to share the common DTLTO arguments with caching enabled. +DEFINE: %{command} = llvm-lto2 run ../t1.bc ../t2.bc -o t.o -cache-dir cache-dir \ +DEFINE: -dtlto-distributor=%python \ +DEFINE: -dtlto-distributor-arg=%llvm_src_root/utils/dtlto/mock.py,../t1.o,../t2.o \ +DEFINE: -r=../t1.bc,t1,px \ +DEFINE: -r=../t2.bc,t2,px + +; Perform out of process ThinLTO (DTLTO). +; Note: mock.py does not do any compilation, instead it simply writes +; the contents of the object files supplied on the command line into the +; output object files in job order. +RUN: %{command} + +; Check that the expected output files have been created. +RUN: ls | count 3 +; Check that two native object files has been created +RUN: ls | FileCheck %s --check-prefix=THINLTO +; Check that DTLTO cache directory has been created +RUN: ls cache-dir/* | count 2 +; Check that 2 cache entries are created +RUN: ls cache-dir/llvmcache-* | count 2 + +; llvm-lto2 ThinLTO output files. +THINLTO-DAG: {{^}}t.o.1{{$}} +THINLTO-DAG: {{^}}t.o.2{{$}} + +# Execute llvm-lto2 again and check that a fully populated cache is used correctly, +# i.e., no additional cache entries are created for cache hits. + +RUN: %{command} + +; Check that the expected output files have been created. +RUN: ls | count 3 +; Check that two native object files has been created +RUN: ls | FileCheck %s --check-prefix=THINLTO +; Check that DTLTO cache directory has been created +RUN: ls cache-dir/* | count 2 +; Check that 2 cache entries are created +RUN: ls cache-dir/llvmcache-* | count 2 + + + + +;--- t1.ll + +target triple = "x86_64-unknown-linux-gnu" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +define void @t1() { + ret void +} + +;--- t2.ll + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @t2() { + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll index 0415b01d78b46..ac8095ae5c3e7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll @@ -14,8 +14,8 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() -; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP8]], splat (i64 1) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; CHECK-NEXT: [[TMP3:%.*]] = mul [[TMP8]], splat (i64 1) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP3]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP1]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -76,8 +76,8 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() -; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP8]], splat (i64 1) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; CHECK-NEXT: [[TMP3:%.*]] = mul [[TMP8]], splat (i64 1) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP3]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP1]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index 2f7e3568d5654..cb4bd793013b1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -1052,7 +1052,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) { ; DEFAULT-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 ; DEFAULT-NEXT: store i32 [[TMP2]], ptr [[DST]], align 4 ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD]], splat (i64 4) ; DEFAULT-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; DEFAULT-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll index a49f089bd2085..2180f18750bf2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll @@ -35,7 +35,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[TMP10]] = select <16 x i1> [[TMP17]], <16 x i8> [[VEC_IND]], <16 x i8> [[VEC_PHI]] ; CHECK-NEXT: [[TMP11]] = select <16 x i1> [[TMP23]], <16 x i8> [[STEP_ADD]], <16 x i8> [[VEC_PHI2]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i8> [[STEP_ADD]], splat (i8 16) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <16 x i8> [[STEP_ADD]], splat (i8 16) ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -48,7 +48,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: ; CHECK-NEXT: [[IND_END:%.*]] = trunc i32 [[N_VEC]] to i8 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_MOD_VF]], 8 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: [[VEC_EPILOG_PH]]: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[TMP3]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] @@ -62,11 +62,11 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i8> [[DOTSPLATINSERT]], <8 x i8> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <8 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <8 x i8> [[DOTSPLATINSERT10]], <8 x i8> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i8> [[DOTSPLAT11]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <8 x i8> [[DOTSPLAT11]], ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX6:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND7:%.*]] = phi <8 x i8> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT8:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND7:%.*]] = phi <8 x i8> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT13:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI9:%.*]] = phi <8 x i8> [ [[DOTSPLAT]], %[[VEC_EPILOG_PH]] ], [ [[TMP20:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[IV:%.*]] = trunc i32 [[INDEX6]] to i8 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[A]], i8 [[IV]] @@ -74,9 +74,9 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD12]], splat (i8 3) ; CHECK-NEXT: [[TMP20]] = select <8 x i1> [[TMP19]], <8 x i8> [[VEC_IND7]], <8 x i8> [[VEC_PHI9]] ; CHECK-NEXT: [[INDEX_NEXT13]] = add nuw i32 [[INDEX6]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <8 x i8> [[VEC_IND7]], splat (i8 8) +; CHECK-NEXT: [[VEC_IND_NEXT13]] = add nuw nsw <8 x i8> [[VEC_IND7]], splat (i8 8) ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT13]], [[N_VEC5]] -; CHECK-NEXT: br i1 [[TMP21]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP21]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP22:%.*]] = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> [[TMP20]]) ; CHECK-NEXT: [[RDX_SELECT_CMP14:%.*]] = icmp ne i8 [[TMP22]], -128 @@ -96,7 +96,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[SEL]] = select i1 [[C]], i8 [[IV1]], i8 [[RDX]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i8 [[IV1]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i8 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[SEL_LCSSA:%.*]] = phi i8 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_SELECT15]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i8 [[SEL_LCSSA]] @@ -158,7 +158,7 @@ define i32 @select_icmp_var_start_iv_trunc(i32 %N, i32 %start) #0 { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP3]], <4 x i32> [[TMP4]]) ; CHECK-NEXT: [[RDX_MINMAX5:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[RDX_MINMAX]], <4 x i32> [[TMP5]]) @@ -170,7 +170,7 @@ define i32 @select_icmp_var_start_iv_trunc(i32 %N, i32 %start) #0 { ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF7:![0-9]+]] ; CHECK: [[VEC_EPILOG_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[FR]], %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] @@ -197,7 +197,7 @@ define i32 @select_icmp_var_start_iv_trunc(i32 %N, i32 %start) #0 { ; CHECK-NEXT: [[INDEX_NEXT17]] = add nuw i64 [[INDEX11]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT16]] = add <4 x i32> [[VEC_IND15]], splat (i32 4) ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT17]], [[N_VEC8]] -; CHECK-NEXT: br i1 [[TMP15]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP15]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP14]]) ; CHECK-NEXT: [[RDX_SELECT_CMP18:%.*]] = icmp ne i32 [[TMP16]], -2147483648 @@ -216,7 +216,7 @@ define i32 @select_icmp_var_start_iv_trunc(i32 %N, i32 %start) #0 { ; CHECK-NEXT: [[RED_NEXT]] = select i1 [[C]], i32 [[IV_TRUNC]], i32 [[RED]] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N_EXT]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_SELECT19]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[RED_NEXT_LCSSA]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll index d23e3c29b59e5..3010a9d75d039 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll @@ -46,7 +46,7 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) { ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[IND_END4:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000 -; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000 @@ -128,7 +128,7 @@ define void @test_widen_induction(ptr %A, i64 %N) { ; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP1]], align 4 ; CHECK-NEXT: store <2 x i64> [[STEP_ADD]], ptr [[TMP3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[STEP_ADD]], splat (i64 2) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: middle.block: @@ -136,14 +136,14 @@ define void @test_widen_induction(ptr %A, i64 %N) { ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 2 ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[VEC_EPILOG_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i64> [[DOTSPLAT]], ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] @@ -151,7 +151,7 @@ define void @test_widen_induction(ptr %A, i64 %N) { ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX5]] ; CHECK-NEXT: store <2 x i64> [[VEC_IND6]], ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX5]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT7]] = add <2 x i64> [[VEC_IND6]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT7]] = add nuw nsw <2 x i64> [[VEC_IND6]], splat (i64 2) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP8]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: vec.epilog.middle.block: @@ -200,7 +200,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[START]], [[N_VEC]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[START]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i64> [[DOTSPLAT]], ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -212,7 +212,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP2]], align 4 ; CHECK-NEXT: store <2 x i64> [[STEP_ADD]], ptr [[TMP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[STEP_ADD]], splat (i64 2) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: middle.block: @@ -221,7 +221,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[IND_END5:%.*]] = add i64 [[START]], [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] @@ -230,7 +230,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK-NEXT: [[IND_END4:%.*]] = add i64 [[START]], [[N_VEC3]] ; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <2 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT8]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION10:%.*]] = add <2 x i64> [[DOTSPLAT9]], +; CHECK-NEXT: [[INDUCTION10:%.*]] = add nuw nsw <2 x i64> [[DOTSPLAT9]], ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] @@ -239,7 +239,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX13]] ; CHECK-NEXT: store <2 x i64> [[VEC_IND11]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX7]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT12]] = add <2 x i64> [[VEC_IND11]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT12]] = add nuw nsw <2 x i64> [[VEC_IND11]], splat (i64 2) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: vec.epilog.middle.block: @@ -296,7 +296,7 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) { ; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr [[TMP1]], align 4 ; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[STEP_ADD]], splat (i64 2) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[IND_END4]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: middle.block: @@ -304,14 +304,14 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) { ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 2 ; CHECK-NEXT: [[IND_END:%.*]] = sub i64 [[N]], [[N_MOD_VF2]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[VEC_EPILOG_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i64> [[DOTSPLAT]], ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] @@ -320,7 +320,7 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) { ; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i64> [[VEC_IND8]], splat (i64 10) ; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT9]] = add <2 x i64> [[VEC_IND8]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT9]] = add nuw nsw <2 x i64> [[VEC_IND8]], splat (i64 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[IND_END]] ; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: vec.epilog.middle.block: @@ -410,19 +410,19 @@ define void @test_widen_truncated_induction(ptr %A) { ; CHECK-NEXT: store <2 x i8> [[VEC_IND]], ptr [[TMP1]], align 1 ; CHECK-NEXT: store <2 x i8> [[STEP_ADD]], ptr [[TMP3]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[STEP_ADD]], splat (i8 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i8> [[STEP_ADD]], splat (i8 2) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[VEC_EPILOG_RESUME_VAL]] to i8 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i8> [[DOTSPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i8> [[DOTSPLAT]], ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] @@ -430,7 +430,7 @@ define void @test_widen_truncated_induction(ptr %A) { ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX2]] ; CHECK-NEXT: store <2 x i8> [[VEC_IND3]], ptr [[TMP7]], align 1 ; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX2]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i8> [[VEC_IND3]], splat (i8 2) +; CHECK-NEXT: [[VEC_IND_NEXT4]] = add nuw nsw <2 x i8> [[VEC_IND3]], splat (i8 2) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT5]], 10000 ; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: vec.epilog.middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll index f50d0834c5dc8..75f256085a17a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll @@ -198,7 +198,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT: [[VECTOR_BODY]]: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE35:.*]] ] ; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <16 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE35]] ] -; DEFAULT-NEXT: [[VEC_IND1:%.*]] = phi <16 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], %[[PRED_STORE_CONTINUE35]] ] +; DEFAULT-NEXT: [[VEC_IND1:%.*]] = phi <16 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT36:%.*]], %[[PRED_STORE_CONTINUE35]] ] ; DEFAULT-NEXT: [[TMP0:%.*]] = icmp ule <16 x i8> [[VEC_IND]], splat (i8 14) ; DEFAULT-NEXT: [[TMP1:%.*]] = mul <16 x i8> [[BROADCAST_SPLAT]], [[VEC_IND1]] ; DEFAULT-NEXT: [[TMP2:%.*]] = lshr <16 x i8> [[VEC_IND1]], splat (i8 1) @@ -353,7 +353,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT: [[PRED_STORE_CONTINUE35]]: ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <16 x i8> [[VEC_IND]], splat (i8 16) -; DEFAULT-NEXT: [[VEC_IND_NEXT2]] = add <16 x i8> [[VEC_IND1]], splat (i8 16) +; DEFAULT-NEXT: [[VEC_IND_NEXT36]] = add nuw nsw <16 x i8> [[VEC_IND1]], splat (i8 16) ; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] @@ -454,8 +454,8 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i8 [[C]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; DEFAULT-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv16i8() -; DEFAULT-NEXT: [[TMP11:%.*]] = mul [[TMP10]], splat (i8 1) -; DEFAULT-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] +; DEFAULT-NEXT: [[TMP11:%.*]] = mul nuw nsw [[TMP10]], splat (i8 1) +; DEFAULT-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP11]] ; DEFAULT-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP1]] to i8 ; DEFAULT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP12]], i64 0 ; DEFAULT-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer @@ -477,7 +477,7 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; DEFAULT-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; DEFAULT-NEXT: [[TMP24:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; DEFAULT-NEXT: [[TMP23:%.*]] = xor i1 [[TMP24]], true -; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; DEFAULT-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] @@ -504,8 +504,8 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; OPTSIZE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i8 [[C]], i64 0 ; OPTSIZE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; OPTSIZE-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv16i8() -; OPTSIZE-NEXT: [[TMP11:%.*]] = mul [[TMP10]], splat (i8 1) -; OPTSIZE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] +; OPTSIZE-NEXT: [[TMP11:%.*]] = mul nuw nsw [[TMP10]], splat (i8 1) +; OPTSIZE-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP11]] ; OPTSIZE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP1]] to i8 ; OPTSIZE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP12]], i64 0 ; OPTSIZE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer @@ -527,7 +527,7 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; OPTSIZE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; OPTSIZE-NEXT: [[TMP24:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; OPTSIZE-NEXT: [[TMP23:%.*]] = xor i1 [[TMP24]], true -; OPTSIZE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; OPTSIZE-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; OPTSIZE-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; OPTSIZE: [[MIDDLE_BLOCK]]: ; OPTSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] @@ -554,8 +554,8 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; MINSIZE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i8 [[C]], i64 0 ; MINSIZE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; MINSIZE-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv16i8() -; MINSIZE-NEXT: [[TMP11:%.*]] = mul [[TMP10]], splat (i8 1) -; MINSIZE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] +; MINSIZE-NEXT: [[TMP11:%.*]] = mul nuw nsw [[TMP10]], splat (i8 1) +; MINSIZE-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP11]] ; MINSIZE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP1]] to i8 ; MINSIZE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP12]], i64 0 ; MINSIZE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer @@ -577,7 +577,7 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; MINSIZE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; MINSIZE-NEXT: [[TMP24:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; MINSIZE-NEXT: [[TMP23:%.*]] = xor i1 [[TMP24]], true -; MINSIZE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; MINSIZE-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; MINSIZE-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; MINSIZE: [[MIDDLE_BLOCK]]: ; MINSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll index 5b61fba4ae994..5f8d7e7d24cc4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll @@ -19,8 +19,8 @@ define void @foo() { ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul [[TMP4]], splat (i64 1) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP5]] +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw nsw [[TMP4]], splat (i64 1) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP5]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -44,7 +44,7 @@ define void @foo() { ; CHECK-NEXT: [[TMP14:%.*]] = phi [ [[TMP10]], [[INNER_LOOP1]] ] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0( [[TMP14]], align 4 [[TMP6]], splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll index 6d0777e42ab0e..2edbec4681ab0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll @@ -51,7 +51,7 @@ define void @foo_i32(i32 %n) { ; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_LATCH]], label %[[FOR_BODY31]] ; CHECK: [[VECTOR_LATCH]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -113,7 +113,7 @@ define void @foo_i64(i64 %n) { ; CHECK-NEXT: br i1 [[TMP5]], label %[[VECTOR_LATCH]], label %[[FOR_BODY31]] ; CHECK: [[VECTOR_LATCH]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8 ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll index 1dcd665817196..f67a3d9be408a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll @@ -320,7 +320,7 @@ define void @srem_sdiv_without_tail_folding(i32 %d.0, i32 %d.1, ptr %dst, i32 %e ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE12]] ; CHECK: [[PRED_STORE_CONTINUE12]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP42]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll index 2521ece2eea06..e338b828d2520 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll @@ -27,8 +27,8 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0 ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv2i32() ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[IDX]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP8]], splat (i32 1) -; CHECK-NEXT: [[INDUCTION:%.*]] = add [[DOTSPLAT]], [[TMP10]] +; CHECK-NEXT: [[TMP9:%.*]] = mul nsw [[TMP8]], splat (i32 1) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw [[DOTSPLAT]], [[TMP9]] ; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector [[DOTSPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -42,7 +42,7 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i64, ptr [[B:%.*]], i32 [[OFFSET_IDX]] ; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP18]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT2]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[DOTSPLAT2]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll b/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll index 1213d974e75ef..f8be8d5b62031 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll @@ -43,7 +43,7 @@ define i32 @foo(i32 noundef %n, i32 noundef %lag, i32 noundef %shift) vscale_ran ; SC_SVE-NEXT: [[TMP16:%.*]] = shl <4 x i32> [[TMP15]], [[BROADCAST_SPLAT]] ; SC_SVE-NEXT: [[TMP17]] = add <4 x i32> [[TMP16]], [[VEC_PHI]] ; SC_SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; SC_SVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; SC_SVE-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; SC_SVE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SC_SVE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SC_SVE: middle.block: @@ -114,7 +114,7 @@ define i32 @foo(i32 noundef %n, i32 noundef %lag, i32 noundef %shift) vscale_ran ; NO_SC_SVE-NEXT: [[TMP16:%.*]] = shl <8 x i32> [[TMP15]], [[BROADCAST_SPLAT]] ; NO_SC_SVE-NEXT: [[TMP17]] = add <8 x i32> [[TMP16]], [[VEC_PHI]] ; NO_SC_SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; NO_SC_SVE-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) +; NO_SC_SVE-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i32> [[VEC_IND]], splat (i32 8) ; NO_SC_SVE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO_SC_SVE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO_SC_SVE: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll index 3c0455938be80..337c097c85712 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll @@ -36,7 +36,7 @@ define void @cond_ind64(ptr noalias nocapture %a, ptr noalias nocapture readonly ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[WIDE_MASKED_LOAD]], ptr align 4 [[TMP11]], [[TMP9]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index 28d2a278de498..e90f8d09fc7ab 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -106,7 +106,7 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[D:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP3:%.*]] = shl [[TMP2]], splat (i64 1) +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw [[TMP2]], splat (i64 1) ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer @@ -128,7 +128,7 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 { ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv8i32( [[TMP10]], [[TMP12]]) ; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP14]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -190,7 +190,7 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[D:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP3:%.*]] = shl [[TMP2]], splat (i64 1) +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw [[TMP2]], splat (i64 1) ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer @@ -214,7 +214,7 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 { ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i16, ptr @CD_i16, [[TMP9]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0( [[TMP14]], align 2 [[TMP15]], splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -291,7 +291,7 @@ define i32 @test_struct_load6(ptr %S) #1 { ; CHECK-NEXT: [[TMP15:%.*]] = add [[TMP14]], [[WIDE_MASKED_GATHER5]] ; CHECK-NEXT: [[TMP16]] = sub [[TMP12]], [[TMP15]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -391,7 +391,7 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv8i32( [[REVERSE2]], [[REVERSE3]]) ; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP19]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -595,7 +595,7 @@ define void @load_gap_reverse(ptr noalias nocapture readonly %P1, ptr noalias no ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0( [[TMP4]], align 8 [[TMP5]], splat (i1 true)) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0( [[TMP7]], align 8 [[TMP6]], splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: @@ -821,7 +821,7 @@ define void @PR27626_0(ptr %p, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP15]], align 4 [[TMP13]], splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: @@ -894,7 +894,7 @@ define i32 @PR27626_1(ptr %p, i64 %n) #1 { ; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { , } [[STRIDED_VEC2]], 0 ; CHECK-NEXT: [[TMP17]] = add [[TMP16]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: @@ -968,7 +968,7 @@ define void @PR27626_2(ptr %p, i64 %n, i32 %z) #1 { ; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP15]], align 4 [[TMP14]], splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: @@ -1044,7 +1044,7 @@ define i32 @PR27626_3(ptr %p, i64 %n, i32 %z) #1 { ; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { , } [[STRIDED_VEC2]], 0 ; CHECK-NEXT: [[TMP18]] = add [[TMP17]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; CHECK: middle.block: @@ -1111,7 +1111,7 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP9:%.*]] = shl [[TMP10]], splat (i64 1) +; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw [[TMP10]], splat (i64 1) ; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP6]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer @@ -1125,7 +1125,7 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv8i32( [[BROADCAST_SPLAT2]], [[BROADCAST_SPLAT4]]) ; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[P]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; CHECK: middle.block: @@ -1188,8 +1188,8 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP21:%.*]] = shl [[TMP10]], splat (i64 1) -; CHECK-NEXT: [[INDUCTION:%.*]] = add [[TMP21]], splat (i64 3) +; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw [[TMP10]], splat (i64 1) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw [[TMP19]], splat (i64 3) ; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP7]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer @@ -1206,7 +1206,7 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT2]], align 4 [[TMP17]], splat (i1 true)) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT4]], align 4 [[TMP15]], splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; CHECK: middle.block: @@ -1273,7 +1273,7 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], [[DOTNOT]] ; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1 ; CHECK-NEXT: [[TMP14:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl [[TMP14]], splat (i64 1) +; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw [[TMP14]], splat (i64 1) ; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP9]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP17]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer @@ -1301,7 +1301,7 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: store [[TMP27]], ptr [[TMP28]], align 4, !alias.scope [[META37:![0-9]+]], !noalias [[META34]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]] ; CHECK: middle.block: @@ -1394,7 +1394,7 @@ define void @interleave_deinterleave_factor3(ptr writeonly noalias %dst, ptr rea ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw i8, [[TMP10]], i64 8 ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP17]], align 4 [[TMP25]], splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] ; CHECK: middle.block: @@ -1590,7 +1590,7 @@ define void @interleave_deinterleave_reverse(ptr noalias nocapture readonly %A, ; CHECK-NEXT: [[INTERLEAVED_VEC11:%.*]] = call @llvm.vector.interleave4.nxv16i32( [[REVERSE6]], [[REVERSE7]], [[REVERSE8]], [[REVERSE9]]) ; CHECK-NEXT: store [[INTERLEAVED_VEC11]], ptr [[TMP26]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll index 02cc499f18827..82dff2f8aa2f7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll @@ -60,7 +60,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK3:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP4]], [[TMP4]]) ; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv32i8.p0( [[INTERLEAVED_VEC]], ptr align 1 [[TMP12]], [[INTERLEAVED_MASK3]]) ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP2]] -; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] +; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT2]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: @@ -198,7 +198,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP9]] ; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( splat (i8 2), align 1 [[TMP10]], [[TMP7]]) ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP2]] -; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] +; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT2]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: @@ -328,7 +328,7 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP10]] ; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( splat (i8 2), align 1 [[TMP11]], [[TMP8]]) ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP2]] -; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT4]] +; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT4]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: @@ -483,7 +483,7 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK3:%.*]] = call @llvm.vector.interleave4.nxv64i1( [[TMP4]], [[TMP4]], [[TMP4]], [[TMP4]]) ; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv64i8.p0( [[INTERLEAVED_VEC]], ptr align 1 [[TMP17]], [[INTERLEAVED_MASK3]]) ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP2]] -; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] +; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT2]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll index 945d808d3fa3f..8cc9e431e6214 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -164,9 +164,9 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[TMP10]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[TMP2]]) ; CHECK-NEXT: [[TMP13:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP15:%.*]] = mul [[TMP13]], splat (i64 4) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP15]] -; CHECK-NEXT: [[TMP18:%.*]] = mul i64 4, [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = mul nsw [[TMP13]], splat (i64 4) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw zeroinitializer, [[TMP14]] +; CHECK-NEXT: [[TMP18:%.*]] = mul nsw i64 4, [[TMP4]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP18]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -182,7 +182,7 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP12]]) ; CHECK-NEXT: [[TMP21:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP22:%.*]] = xor i1 [[TMP21]], true -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll index baf050c7facee..0617d2937f824 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll @@ -90,7 +90,7 @@ define void @many_deps(ptr noalias %buckets, ptr %array, ptr %indices, ptr %othe ; NORMAL_DEP_LIMIT-NEXT: [[TMP15:%.*]] = add [[WIDE_LOAD10]], [[VEC_IND]] ; NORMAL_DEP_LIMIT-NEXT: store [[TMP15]], ptr [[TMP14]], align 4, !alias.scope [[META7]], !noalias [[META0]] ; NORMAL_DEP_LIMIT-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP8]] -; NORMAL_DEP_LIMIT-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; NORMAL_DEP_LIMIT-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; NORMAL_DEP_LIMIT-NEXT: [[TMP16:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] ; NORMAL_DEP_LIMIT-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NORMAL_DEP_LIMIT: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll index 871d9be609bd7..ca4faf4a0a1c9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll @@ -625,7 +625,7 @@ define void @simple_histogram_rtdepcheck(ptr noalias %buckets, ptr %array, ptr % ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[INDEX]] ; CHECK-NEXT: store [[VEC_IND]], ptr [[TMP15]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll index 0b0e2d4154cb6..9f62c7dcda65a 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll @@ -351,8 +351,8 @@ define void @test_stride_noninvar_4i32(ptr readonly %data, ptr noalias nocapture ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[INDEX]] ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) -; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], splat (i32 32) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT3]] = add nuw nsw <4 x i32> [[VEC_IND2]], splat (i32 32) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: @@ -452,9 +452,9 @@ define void @test_stride_noninvar3_4i32(ptr readonly %data, ptr noalias nocaptur ; CHECK-NEXT: [[IND_END:%.*]] = add i32 3, [[TMP0]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> , [[DOTSPLAT]] -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> splat (i32 3), [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[X]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw <4 x i32> , [[DOTSPLAT]] +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i32> splat (i32 3), [[TMP3]] +; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[X]], 4 ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0 ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -470,8 +470,8 @@ define void @test_stride_noninvar3_4i32(ptr readonly %data, ptr noalias nocaptur ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[INDEX]] ; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) -; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <4 x i32> [[VEC_IND4]], [[DOTSPLAT3]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT5]] = add nuw nsw <4 x i32> [[VEC_IND4]], [[DOTSPLAT3]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll index 4af40b711726d..9ea95658818fe 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll @@ -508,7 +508,7 @@ define dso_local void @select_not_allowed(ptr noalias nocapture %A, ptr noalias ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] ; CHECK-NEXT: store <4 x i32> [[WIDE_MASKED_GATHER]], ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll index 2087218bf3ea3..adf443b74acf1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll @@ -14,21 +14,21 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 { ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP11:%.*]] = call @llvm.stepvector.nxv8i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul [[TMP11]], splat (i64 3) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP13]] +; CHECK-NEXT: [[TMP4:%.*]] = mul nsw [[TMP11]], splat (i64 3) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw zeroinitializer, [[TMP4]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP2]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) ; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[TMP12]] to i64 -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP17]] +; CHECK-NEXT: [[TMP16:%.*]] = mul nsw i64 3, [[TMP17]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP20]], splat (i1 true), i32 [[TMP12]]) ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -69,21 +69,21 @@ define void @block_with_dead_inst_2(ptr %src) #0 { ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv8i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP5]], splat (i64 3) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; CHECK-NEXT: [[TMP1:%.*]] = mul nsw [[TMP5]], splat (i64 3) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw zeroinitializer, [[TMP1]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 333, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) ; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP9]] to i64 -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = mul nsw i64 3, [[TMP7]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP13]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP10]], splat (i1 true), i32 [[TMP9]]) ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -124,21 +124,21 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 { ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv8i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP5]], splat (i64 3) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; CHECK-NEXT: [[TMP1:%.*]] = mul nsw [[TMP5]], splat (i64 3) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw zeroinitializer, [[TMP1]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 333, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) ; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP9]] to i64 -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = mul nsw i64 3, [[TMP7]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP13]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP10]], splat (i1 true), i32 [[TMP9]]) ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -186,21 +186,21 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 { ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP11:%.*]] = call @llvm.stepvector.nxv8i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul [[TMP11]], splat (i64 3) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP13]] +; CHECK-NEXT: [[TMP4:%.*]] = mul nsw [[TMP11]], splat (i64 3) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw zeroinitializer, [[TMP4]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP2]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) ; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[TMP12]] to i64 -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP17]] +; CHECK-NEXT: [[TMP16:%.*]] = mul nsw i64 3, [[TMP17]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP20]], splat (i1 true), i32 [[TMP12]]) ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -248,21 +248,21 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 { ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv8i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP5]], splat (i64 3) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; CHECK-NEXT: [[TMP1:%.*]] = mul nsw [[TMP5]], splat (i64 3) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw zeroinitializer, [[TMP1]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 333, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) ; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP9]] to i64 -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = mul nsw i64 3, [[TMP7]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP13]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP10]], splat (i1 true), i32 [[TMP9]]) ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -319,15 +319,15 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 % ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = xor [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: [[TMP11:%.*]] = call @llvm.stepvector.nxv8i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul [[TMP11]], splat (i64 3) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP13]] +; CHECK-NEXT: [[TMP5:%.*]] = mul nsw [[TMP11]], splat (i64 3) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw zeroinitializer, [[TMP5]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP2]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) ; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP27]] to i64 -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = mul nsw i64 3, [[TMP12]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] @@ -340,7 +340,7 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 % ; CHECK-NEXT: [[TMP24:%.*]] = or [[TMP22]], [[TMP23]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP20]], [[TMP24]], i32 [[TMP27]]) ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP12]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index b81637f50989d..9f6f79d9030ed 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -354,8 +354,8 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul [[TMP9]], splat (i64 2) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] +; CHECK-NEXT: [[TMP4:%.*]] = mul nsw [[TMP9]], splat (i64 2) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw zeroinitializer, [[TMP4]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -363,7 +363,7 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP2]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64 -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP16]] +; CHECK-NEXT: [[TMP12:%.*]] = mul nsw i64 2, [[TMP16]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[EVL_BASED_IV]], 2 @@ -378,7 +378,7 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s ; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[TMP18]], align 4 [[TMP19]], splat (i1 true), i32 [[TMP10]]) ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll index 21272cb72f4d6..69d83db49fd18 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll @@ -11,8 +11,8 @@ define void @test_wide_integer_induction(ptr noalias %a, i64 %N) { ; CHECK-NEXT: br label [[ENTRY:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP9]], splat (i64 1) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP10]] +; CHECK-NEXT: [[TMP1:%.*]] = mul [[TMP9]], splat (i64 1) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_EVL_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll index 5c78cfd6daded..b62d6da2f2e2d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll @@ -44,7 +44,7 @@ define void @masked_strided_factor2(ptr noalias nocapture readonly %p, ptr noali ; SCALAR_EPILOGUE-NEXT: [[INTERLEAVED_MASK3:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP4]], [[TMP4]]) ; SCALAR_EPILOGUE-NEXT: call void @llvm.masked.store.nxv32i8.p0( [[INTERLEAVED_VEC]], ptr align 1 [[TMP12]], [[INTERLEAVED_MASK3]]) ; SCALAR_EPILOGUE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP2]] -; SCALAR_EPILOGUE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] +; SCALAR_EPILOGUE-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT2]] ; SCALAR_EPILOGUE-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_EPILOGUE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SCALAR_EPILOGUE: middle.block: @@ -222,7 +222,7 @@ define void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noali ; SCALAR_EPILOGUE-NEXT: [[INTERLEAVED_MASK3:%.*]] = call @llvm.vector.interleave4.nxv64i1( [[TMP4]], [[TMP4]], [[TMP4]], [[TMP4]]) ; SCALAR_EPILOGUE-NEXT: call void @llvm.masked.store.nxv64i8.p0( [[INTERLEAVED_VEC]], ptr align 1 [[TMP17]], [[INTERLEAVED_MASK3]]) ; SCALAR_EPILOGUE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP2]] -; SCALAR_EPILOGUE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] +; SCALAR_EPILOGUE-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT2]] ; SCALAR_EPILOGUE-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_EPILOGUE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SCALAR_EPILOGUE: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-store-with-gap.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-store-with-gap.ll index f36919f98dd00..b436a46842eab 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-store-with-gap.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-store-with-gap.ll @@ -22,7 +22,7 @@ define void @store_factor_2_with_tail_gap(i64 %n, ptr %a) { ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i64> [[TMP2]], <32 x i64> poison, <32 x i32> ; CHECK-NEXT: call void @llvm.masked.store.v32i64.p0(<32 x i64> [[INTERLEAVED_VEC]], ptr align 8 [[TMP1]], <32 x i1> ) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <16 x i64> [[VEC_IND]], splat (i64 16) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll index 06b47aa6551a0..77ee3c08329d5 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll @@ -16,8 +16,8 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) { ; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 ; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() -; VLENUNK-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) -; VLENUNK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; VLENUNK-NEXT: [[TMP1:%.*]] = mul [[TMP6]], splat (i64 1) +; VLENUNK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll index 1cbec47d72203..f49f9284a1e93 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll @@ -46,15 +46,15 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; RV32: vector.ph: ; RV32-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i64() -; RV32-NEXT: [[TMP9:%.*]] = mul [[TMP7]], splat (i64 16) -; RV32-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP9]] +; RV32-NEXT: [[TMP9:%.*]] = mul nuw nsw [[TMP7]], splat (i64 16) +; RV32-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP9]] ; RV32-NEXT: br label [[VECTOR_BODY:%.*]] ; RV32: vector.body: ; RV32-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; RV32-NEXT: [[AVL:%.*]] = phi i64 [ 625, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; RV32-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) ; RV32-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64 -; RV32-NEXT: [[TMP11:%.*]] = mul i64 16, [[TMP8]] +; RV32-NEXT: [[TMP11:%.*]] = mul nuw nsw i64 16, [[TMP8]] ; RV32-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; RV32-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; RV32-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], [[VEC_IND]] @@ -68,7 +68,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] ; RV32-NEXT: call void @llvm.vp.scatter.nxv2f64.nxv2p0( [[TMP18]], align 8 [[TMP19]], [[TMP14]], i32 [[TMP10]]), !alias.scope [[META3]], !noalias [[META5]] ; RV32-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] -; RV32-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; RV32-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; RV32-NEXT: [[TMP24:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; RV32-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; RV32: middle.block: @@ -115,15 +115,15 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV64-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; RV64: vector.ph: ; RV64-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i64() -; RV64-NEXT: [[TMP9:%.*]] = mul [[TMP7]], splat (i64 16) -; RV64-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP9]] +; RV64-NEXT: [[TMP1:%.*]] = mul nuw nsw [[TMP7]], splat (i64 16) +; RV64-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP1]] ; RV64-NEXT: br label [[VECTOR_BODY:%.*]] ; RV64: vector.body: ; RV64-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; RV64-NEXT: [[AVL:%.*]] = phi i64 [ 625, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; RV64-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) ; RV64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64 -; RV64-NEXT: [[TMP11:%.*]] = mul i64 16, [[TMP8]] +; RV64-NEXT: [[TMP11:%.*]] = mul nuw nsw i64 16, [[TMP8]] ; RV64-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; RV64-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; RV64-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], [[VEC_IND]] @@ -137,7 +137,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV64-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] ; RV64-NEXT: call void @llvm.vp.scatter.nxv2f64.nxv2p0( [[TMP18]], align 8 [[TMP19]], [[TMP14]], i32 [[TMP10]]), !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] ; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] -; RV64-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; RV64-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; RV64-NEXT: [[TMP24:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; RV64-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; RV64: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll new file mode 100644 index 0000000000000..fa710cb8d65b1 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 +; RUN: opt -p loop-vectorize -S %s | FileCheck %s + +target triple = "riscv64-unknown-linux-gnu" + +define void @ptr_induction(ptr %p, ptr noalias %q, ptr noalias %p.end) #0 { +; CHECK-LABEL: define void @ptr_induction( +; CHECK-SAME: ptr [[P:%.*]], ptr noalias [[Q:%.*]], ptr noalias [[P_END:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[P2:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[P_END1:%.*]] = ptrtoint ptr [[P_END]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[P_END1]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[P2]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[Q]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, ptr [[P]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul [[TMP2]], splat (i64 1) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP3]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], %[[VECTOR_PH]] ], [ [[PTR_IND7:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP1]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector [[BROADCAST_SPLATINSERT5]], poison, zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint [[VECTOR_GEP]] to +; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i64.nxv2p0( [[TMP6]], align 8 [[BROADCAST_SPLAT]], splat (i1 true), i32 [[TMP4]]) +; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i64.nxv2p0( [[VEC_IND]], align 8 [[BROADCAST_SPLAT4]], splat (i1 true), i32 [[TMP4]]) +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP5]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT6]] +; CHECK-NEXT: [[PTR_IND7]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %ptr.ind = phi ptr [ %p, %entry ], [ %ptr.ind.next, %loop ] + %ptri64 = ptrtoint ptr %ptr.ind to i64 + store i64 %ptri64, ptr %q + store i64 %iv, ptr %p + %iv.next = add i64 %iv, 1 + %ptr.ind.next = getelementptr i8, ptr %ptr.ind, i64 1 + %ec = icmp eq ptr %ptr.ind, %p.end + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +attributes #0 = { "target-features"="+v" } diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll index 8e562a97d51cf..02c363bb54457 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll @@ -205,7 +205,7 @@ define void @safe_load_store_distance_not_pow_of_2(i64 %N) { ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr [10 x [12 x i16]], ptr @a, i64 0, i64 8, <8 x i64> [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> zeroinitializer, <8 x ptr> align 2 [[TMP7]], <8 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 24) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <8 x i64> [[VEC_IND]], splat (i64 24) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 723b5e9cc280d..414e5d9295554 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -115,15 +115,15 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 64) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw [[TMP6]], splat (i64 64) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP1]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP7]] to i64 -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 64, [[TMP11]] +; CHECK-NEXT: [[TMP9:%.*]] = mul nuw nsw i64 64, [[TMP11]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[P:%.*]], [[VEC_IND]] @@ -131,7 +131,7 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-NEXT: [[TMP13:%.*]] = add [[WIDE_MASKED_GATHER]], splat (i32 1) ; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[TMP13]], align 4 [[TMP12]], splat (i1 true), i32 [[TMP7]]) ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: @@ -156,8 +156,8 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-UF2-NEXT: [[TMP5:%.*]] = mul i64 [[N_VEC]], 64 ; CHECK-UF2-NEXT: [[TMP6:%.*]] = mul [[BROADCAST_SPLAT]], splat (i64 64) ; CHECK-UF2-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-UF2-NEXT: [[TMP8:%.*]] = mul [[TMP7]], splat (i64 64) -; CHECK-UF2-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; CHECK-UF2-NEXT: [[TMP8:%.*]] = mul nuw nsw [[TMP7]], splat (i64 64) +; CHECK-UF2-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP8]] ; CHECK-UF2-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UF2: vector.body: ; CHECK-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -172,7 +172,7 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-UF2-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP11]], align 4 [[TMP9]], splat (i1 true)) ; CHECK-UF2-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP12]], align 4 [[TMP10]], splat (i1 true)) ; CHECK-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] -; CHECK-UF2-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[TMP6]] +; CHECK-UF2-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[STEP_ADD]], [[TMP6]] ; CHECK-UF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-UF2: middle.block: @@ -1339,8 +1339,8 @@ define void @constant_stride_reinterpret(ptr noalias %in, ptr noalias %out) { ; NOSTRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]] ; NOSTRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() -; NOSTRIDED-UF2-NEXT: [[TMP6:%.*]] = mul [[TMP5]], splat (i64 1) -; NOSTRIDED-UF2-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] +; NOSTRIDED-UF2-NEXT: [[TMP6:%.*]] = mul nuw nsw [[TMP5]], splat (i64 1) +; NOSTRIDED-UF2-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP6]] ; NOSTRIDED-UF2-NEXT: br label [[VECTOR_BODY:%.*]] ; NOSTRIDED-UF2: vector.body: ; NOSTRIDED-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1357,7 +1357,7 @@ define void @constant_stride_reinterpret(ptr noalias %in, ptr noalias %out) { ; NOSTRIDED-UF2-NEXT: store [[WIDE_MASKED_GATHER]], ptr [[TMP9]], align 8 ; NOSTRIDED-UF2-NEXT: store [[WIDE_MASKED_GATHER1]], ptr [[TMP12]], align 8 ; NOSTRIDED-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] -; NOSTRIDED-UF2-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[BROADCAST_SPLAT]] +; NOSTRIDED-UF2-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[STEP_ADD]], [[BROADCAST_SPLAT]] ; NOSTRIDED-UF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NOSTRIDED-UF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; NOSTRIDED-UF2: middle.block: @@ -1423,8 +1423,8 @@ define void @constant_stride_reinterpret(ptr noalias %in, ptr noalias %out) { ; STRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]] ; STRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; STRIDED-UF2-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() -; STRIDED-UF2-NEXT: [[TMP6:%.*]] = mul [[TMP5]], splat (i64 1) -; STRIDED-UF2-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] +; STRIDED-UF2-NEXT: [[TMP6:%.*]] = mul nuw nsw [[TMP5]], splat (i64 1) +; STRIDED-UF2-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP6]] ; STRIDED-UF2-NEXT: br label [[VECTOR_BODY:%.*]] ; STRIDED-UF2: vector.body: ; STRIDED-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1441,7 +1441,7 @@ define void @constant_stride_reinterpret(ptr noalias %in, ptr noalias %out) { ; STRIDED-UF2-NEXT: store [[WIDE_MASKED_GATHER]], ptr [[TMP9]], align 8 ; STRIDED-UF2-NEXT: store [[WIDE_MASKED_GATHER1]], ptr [[TMP12]], align 8 ; STRIDED-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] -; STRIDED-UF2-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[BROADCAST_SPLAT]] +; STRIDED-UF2-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[STEP_ADD]], [[BROADCAST_SPLAT]] ; STRIDED-UF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; STRIDED-UF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; STRIDED-UF2: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll index 8ab0f6f4c14f1..317bcde2f4670 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll @@ -1184,8 +1184,8 @@ define void @vp_ptrtoint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: br label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv2i64() -; IF-EVL-NEXT: [[TMP10:%.*]] = mul [[TMP9]], splat (i64 1) -; IF-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP10]] +; IF-EVL-NEXT: [[TMP1:%.*]] = mul [[TMP9]], splat (i64 1) +; IF-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] ; IF-EVL: [[VECTOR_BODY]]: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -1222,8 +1222,8 @@ define void @vp_ptrtoint(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() -; NO-VP-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 1) -; NO-VP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw nsw [[TMP6]], splat (i64 1) +; NO-VP-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP5]] ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1235,7 +1235,7 @@ define void @vp_ptrtoint(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; NO-VP-NEXT: store [[TMP10]], ptr [[TMP11]], align 8 ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; NO-VP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; NO-VP-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; NO-VP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll index 34a82757eccc0..c7003560721bc 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll @@ -380,8 +380,8 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP: vector.ph: ; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i32() -; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = mul [[TMP10]], splat (i32 1) -; IF-EVL-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] +; IF-EVL-OUTLOOP-NEXT: [[TMP2:%.*]] = mul nuw nsw [[TMP10]], splat (i32 1) +; IF-EVL-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP2]] ; IF-EVL-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL-OUTLOOP: vector.body: ; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT1:%.*]], [[VECTOR_BODY]] ] @@ -400,7 +400,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[TMP20:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT1]] = add i64 [[TMP20]], [[EVL_BASED_IV1]] ; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]] -; IF-EVL-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; IF-EVL-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL-OUTLOOP: middle.block: @@ -415,8 +415,8 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: br label [[VECTOR_PH:%.*]] ; IF-EVL-INLOOP: vector.ph: ; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i32() -; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = mul [[TMP9]], splat (i32 1) -; IF-EVL-INLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP10]] +; IF-EVL-INLOOP-NEXT: [[TMP1:%.*]] = mul nuw nsw [[TMP9]], splat (i32 1) +; IF-EVL-INLOOP-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP1]] ; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL-INLOOP: vector.body: ; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT1:%.*]], [[VECTOR_BODY]] ] @@ -435,7 +435,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT1]] = add i64 [[TMP19]], [[EVL_BASED_IV1]] ; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] -; IF-EVL-INLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; IF-EVL-INLOOP-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-INLOOP-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL-INLOOP: middle.block: @@ -457,8 +457,8 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i32() -; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul [[TMP12]], splat (i32 1) -; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP14]] +; NO-VP-OUTLOOP-NEXT: [[TMP8:%.*]] = mul nuw nsw [[TMP12]], splat (i32 1) +; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP8]] ; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP7]] to i32 ; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP16]], i64 0 ; NO-VP-OUTLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer @@ -473,7 +473,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP22:%.*]] = select [[TMP27]], [[WIDE_MASKED_LOAD]], zeroinitializer ; NO-VP-OUTLOOP-NEXT: [[TMP23]] = add [[TMP22]], [[VEC_PHI]] ; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; NO-VP-OUTLOOP-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-OUTLOOP-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; NO-VP-OUTLOOP: middle.block: @@ -513,8 +513,8 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-INLOOP-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i32() -; NO-VP-INLOOP-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i32 1) -; NO-VP-INLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; NO-VP-INLOOP-NEXT: [[TMP5:%.*]] = mul nuw nsw [[TMP6]], splat (i32 1) +; NO-VP-INLOOP-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP5]] ; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP3]] to i32 ; NO-VP-INLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP10]], i64 0 ; NO-VP-INLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer @@ -530,7 +530,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP16]]) ; NO-VP-INLOOP-NEXT: [[TMP18]] = add i32 [[VEC_PHI]], [[TMP17]] ; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; NO-VP-INLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; NO-VP-INLOOP-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; NO-VP-INLOOP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-INLOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; NO-VP-INLOOP: middle.block: @@ -584,8 +584,8 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP: vector.ph: ; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i32() -; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = mul [[TMP10]], splat (i32 1) -; IF-EVL-OUTLOOP-NEXT: [[INDUCTION1:%.*]] = add zeroinitializer, [[TMP13]] +; IF-EVL-OUTLOOP-NEXT: [[TMP2:%.*]] = mul nuw nsw [[TMP10]], splat (i32 1) +; IF-EVL-OUTLOOP-NEXT: [[INDUCTION1:%.*]] = add nuw nsw zeroinitializer, [[TMP2]] ; IF-EVL-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL-OUTLOOP: vector.body: ; IF-EVL-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -604,7 +604,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[TMP25:%.*]] = zext i32 [[TMP14]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP25]], [[IV]] ; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP25]] -; IF-EVL-OUTLOOP-NEXT: [[VEC_IND_NEXT7]] = add [[VEC_IND2]], [[BROADCAST_SPLAT2]] +; IF-EVL-OUTLOOP-NEXT: [[VEC_IND_NEXT7]] = add nuw nsw [[VEC_IND2]], [[BROADCAST_SPLAT2]] ; IF-EVL-OUTLOOP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL-OUTLOOP: middle.block: @@ -619,8 +619,8 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: br label [[VECTOR_PH:%.*]] ; IF-EVL-INLOOP: vector.ph: ; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i32() -; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = mul [[TMP9]], splat (i32 1) -; IF-EVL-INLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP10]] +; IF-EVL-INLOOP-NEXT: [[TMP1:%.*]] = mul nuw nsw [[TMP9]], splat (i32 1) +; IF-EVL-INLOOP-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP1]] ; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL-INLOOP: vector.body: ; IF-EVL-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -638,7 +638,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[IV]] ; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] -; IF-EVL-INLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; IF-EVL-INLOOP-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-INLOOP-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL-INLOOP: middle.block: @@ -660,8 +660,8 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i32() -; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul [[TMP12]], splat (i32 1) -; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP14]] +; NO-VP-OUTLOOP-NEXT: [[TMP8:%.*]] = mul nuw nsw [[TMP12]], splat (i32 1) +; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP8]] ; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP7]] to i32 ; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP16]], i64 0 ; NO-VP-OUTLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer @@ -676,7 +676,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP22:%.*]] = add [[VEC_PHI]], [[WIDE_MASKED_LOAD]] ; NO-VP-OUTLOOP-NEXT: [[PREDPHI]] = select [[TMP28]], [[TMP22]], [[VEC_PHI]] ; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; NO-VP-OUTLOOP-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-OUTLOOP-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NO-VP-OUTLOOP: middle.block: @@ -720,8 +720,8 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-INLOOP-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i32() -; NO-VP-INLOOP-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i32 1) -; NO-VP-INLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; NO-VP-INLOOP-NEXT: [[TMP5:%.*]] = mul nuw nsw [[TMP6]], splat (i32 1) +; NO-VP-INLOOP-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP5]] ; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP3]] to i32 ; NO-VP-INLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP10]], i64 0 ; NO-VP-INLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer @@ -737,7 +737,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP16]]) ; NO-VP-INLOOP-NEXT: [[TMP18]] = add i32 [[VEC_PHI]], [[TMP17]] ; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; NO-VP-INLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; NO-VP-INLOOP-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; NO-VP-INLOOP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-INLOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NO-VP-INLOOP: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll index c7ba826295de8..a3bec999425a3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll @@ -545,8 +545,8 @@ define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) { ; IF-EVL-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP18]], 2 ; IF-EVL-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32 ; IF-EVL-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() -; IF-EVL-NEXT: [[TMP12:%.*]] = mul [[TMP6]], splat (i64 1) -; IF-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP12]] +; IF-EVL-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) +; IF-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] ; IF-EVL-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() ; IF-EVL-NEXT: [[TMP19:%.*]] = mul nuw i32 [[TMP13]], 2 ; IF-EVL-NEXT: [[TMP10:%.*]] = sub i32 [[TMP19]], 1 @@ -589,8 +589,8 @@ define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) { ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() -; NO-VP-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 1) -; NO-VP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw nsw [[TMP6]], splat (i64 1) +; NO-VP-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP5]] ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; NO-VP-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() @@ -607,7 +607,7 @@ define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) { ; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[INDEX]] ; NO-VP-NEXT: store [[TMP13]], ptr [[TMP11]], align 8 ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; NO-VP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; NO-VP-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; NO-VP-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll index a07e031418762..b5662b0bd8d3b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll @@ -119,8 +119,8 @@ define i32 @load_factor_4_with_gap(i64 %n, ptr noalias %a) { ; IF-EVL-NEXT: br label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: ; IF-EVL-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() -; IF-EVL-NEXT: [[TMP3:%.*]] = mul [[TMP2]], splat (i64 1) -; IF-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP3]] +; IF-EVL-NEXT: [[TMP1:%.*]] = mul [[TMP2]], splat (i64 1) +; IF-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -163,8 +163,8 @@ define i32 @load_factor_4_with_gap(i64 %n, ptr noalias %a) { ; NO-VP-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[N_MOD_VF]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[TMP5]] ; NO-VP-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() -; NO-VP-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 1) -; NO-VP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; NO-VP-NEXT: [[TMP7:%.*]] = mul nuw nsw [[TMP6]], splat (i64 1) +; NO-VP-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP7]] ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -181,7 +181,7 @@ define i32 @load_factor_4_with_gap(i64 %n, ptr noalias %a) { ; NO-VP-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( align 4 [[TMP12]], splat (i1 true), poison) ; NO-VP-NEXT: [[TMP13]] = add [[TMP11]], [[WIDE_MASKED_GATHER2]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; NO-VP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; NO-VP-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; NO-VP: middle.block: @@ -249,8 +249,8 @@ define void @store_factor_4_with_gap(i32 %n, ptr noalias %a) { ; IF-EVL-NEXT: br label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: ; IF-EVL-NEXT: [[TMP4:%.*]] = call @llvm.stepvector.nxv4i32() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul [[TMP4]], splat (i32 1) -; IF-EVL-NEXT: [[INDUCTION1:%.*]] = add zeroinitializer, [[TMP5]] +; IF-EVL-NEXT: [[TMP1:%.*]] = mul [[TMP4]], splat (i32 1) +; IF-EVL-NEXT: [[INDUCTION1:%.*]] = add zeroinitializer, [[TMP1]] ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[VEC_IND2:%.*]] = phi [ [[INDUCTION1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] @@ -285,8 +285,8 @@ define void @store_factor_4_with_gap(i32 %n, ptr noalias %a) { ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP9]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv4i32() -; NO-VP-NEXT: [[TMP8:%.*]] = mul [[TMP7]], splat (i32 1) -; NO-VP-NEXT: [[INDUCTION1:%.*]] = add zeroinitializer, [[TMP8]] +; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw nsw [[TMP7]], splat (i32 1) +; NO-VP-NEXT: [[INDUCTION1:%.*]] = add nuw nsw zeroinitializer, [[TMP5]] ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i32 [[TMP9]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -300,7 +300,7 @@ define void @store_factor_4_with_gap(i32 %n, ptr noalias %a) { ; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], [[VEC_IND4]], i32 3 ; NO-VP-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[VEC_IND4]], align 4 [[TMP13]], splat (i1 true)) ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP9]] -; NO-VP-NEXT: [[VEC_IND_NEXT5]] = add [[VEC_IND4]], [[BROADCAST_SPLAT3]] +; NO-VP-NEXT: [[VEC_IND_NEXT5]] = add nuw nsw [[VEC_IND4]], [[BROADCAST_SPLAT3]] ; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; NO-VP: middle.block: @@ -359,8 +359,8 @@ define i32 @load_factor_4_with_tail_gap(i64 %n, ptr noalias %a) { ; IF-EVL-NEXT: br label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: ; IF-EVL-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() -; IF-EVL-NEXT: [[TMP3:%.*]] = mul [[TMP2]], splat (i64 1) -; IF-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP3]] +; IF-EVL-NEXT: [[TMP1:%.*]] = mul [[TMP2]], splat (i64 1) +; IF-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -403,8 +403,8 @@ define i32 @load_factor_4_with_tail_gap(i64 %n, ptr noalias %a) { ; NO-VP-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[N_MOD_VF]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[TMP5]] ; NO-VP-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() -; NO-VP-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 1) -; NO-VP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; NO-VP-NEXT: [[TMP7:%.*]] = mul nuw nsw [[TMP6]], splat (i64 1) +; NO-VP-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP7]] ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -421,7 +421,7 @@ define i32 @load_factor_4_with_tail_gap(i64 %n, ptr noalias %a) { ; NO-VP-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( align 4 [[TMP12]], splat (i1 true), poison) ; NO-VP-NEXT: [[TMP13]] = add [[TMP11]], [[WIDE_MASKED_GATHER2]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; NO-VP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; NO-VP-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NO-VP: middle.block: @@ -490,8 +490,8 @@ define void @store_factor_4_with_tail_gap(i32 %n, ptr noalias %a) { ; IF-EVL-NEXT: br label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: ; IF-EVL-NEXT: [[TMP4:%.*]] = call @llvm.stepvector.nxv4i32() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul [[TMP4]], splat (i32 1) -; IF-EVL-NEXT: [[INDUCTION1:%.*]] = add zeroinitializer, [[TMP5]] +; IF-EVL-NEXT: [[TMP1:%.*]] = mul [[TMP4]], splat (i32 1) +; IF-EVL-NEXT: [[INDUCTION1:%.*]] = add zeroinitializer, [[TMP1]] ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[VEC_IND2:%.*]] = phi [ [[INDUCTION1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] @@ -526,8 +526,8 @@ define void @store_factor_4_with_tail_gap(i32 %n, ptr noalias %a) { ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP9]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv4i32() -; NO-VP-NEXT: [[TMP8:%.*]] = mul [[TMP7]], splat (i32 1) -; NO-VP-NEXT: [[INDUCTION1:%.*]] = add zeroinitializer, [[TMP8]] +; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw nsw [[TMP7]], splat (i32 1) +; NO-VP-NEXT: [[INDUCTION1:%.*]] = add nuw nsw zeroinitializer, [[TMP5]] ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i32 [[TMP9]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -541,7 +541,7 @@ define void @store_factor_4_with_tail_gap(i32 %n, ptr noalias %a) { ; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], [[VEC_IND4]], i32 2 ; NO-VP-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[VEC_IND4]], align 4 [[TMP13]], splat (i1 true)) ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP9]] -; NO-VP-NEXT: [[VEC_IND_NEXT5]] = add [[VEC_IND4]], [[BROADCAST_SPLAT3]] +; NO-VP-NEXT: [[VEC_IND_NEXT5]] = add nuw nsw [[VEC_IND4]], [[BROADCAST_SPLAT3]] ; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; NO-VP: middle.block: @@ -594,8 +594,8 @@ define i32 @load_factor_4_reverse(i64 %n, ptr noalias %a) { ; IF-EVL-NEXT: [[TMP4:%.*]] = call @llvm.stepvector.nxv4i64() ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[N]], i64 0 ; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; IF-EVL-NEXT: [[TMP5:%.*]] = mul [[TMP4]], splat (i64 -1) -; IF-EVL-NEXT: [[INDUCTION:%.*]] = add [[BROADCAST_SPLAT]], [[TMP5]] +; IF-EVL-NEXT: [[TMP3:%.*]] = mul nsw [[TMP4]], splat (i64 -1) +; IF-EVL-NEXT: [[INDUCTION:%.*]] = add nsw [[BROADCAST_SPLAT]], [[TMP3]] ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -603,7 +603,7 @@ define i32 @load_factor_4_reverse(i64 %n, ptr noalias %a) { ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP1]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 -1, [[TMP7]] +; IF-EVL-NEXT: [[TMP8:%.*]] = mul nsw i64 -1, [[TMP7]] ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x i32], ptr [[A:%.*]], [[VEC_IND]], i32 0 @@ -619,7 +619,7 @@ define i32 @load_factor_4_reverse(i64 %n, ptr noalias %a) { ; IF-EVL-NEXT: [[TMP15:%.*]] = add [[TMP13]], [[WIDE_MASKED_GATHER5]] ; IF-EVL-NEXT: [[TMP16]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP15]], [[VEC_PHI]], i32 [[TMP6]]) ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] -; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] +; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[BROADCAST_SPLAT2]] ; IF-EVL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; IF-EVL: middle.block: @@ -646,9 +646,9 @@ define i32 @load_factor_4_reverse(i64 %n, ptr noalias %a) { ; NO-VP-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv4i64() ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[N]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; NO-VP-NEXT: [[TMP8:%.*]] = mul [[TMP7]], splat (i64 -1) -; NO-VP-NEXT: [[INDUCTION:%.*]] = add [[BROADCAST_SPLAT]], [[TMP8]] -; NO-VP-NEXT: [[TMP9:%.*]] = mul i64 -1, [[TMP5]] +; NO-VP-NEXT: [[TMP8:%.*]] = mul nsw [[TMP7]], splat (i64 -1) +; NO-VP-NEXT: [[INDUCTION:%.*]] = add nsw [[BROADCAST_SPLAT]], [[TMP8]] +; NO-VP-NEXT: [[TMP9:%.*]] = mul nsw i64 -1, [[TMP5]] ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -668,7 +668,7 @@ define i32 @load_factor_4_reverse(i64 %n, ptr noalias %a) { ; NO-VP-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( align 4 [[TMP15]], splat (i1 true), poison) ; NO-VP-NEXT: [[TMP16]] = add [[TMP14]], [[WIDE_MASKED_GATHER5]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] +; NO-VP-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[BROADCAST_SPLAT2]] ; NO-VP-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; NO-VP: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 1e21c753840e9..0375f0a8fd132 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -249,8 +249,8 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() -; SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 1) -; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; SCALABLE-NEXT: [[TMP1:%.*]] = mul [[TMP6]], splat (i64 1) +; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] ; SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]] ; SCALABLE: [[VECTOR_BODY]]: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -298,7 +298,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; FIXEDLEN-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP3]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP5]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD]], splat (i64 4) ; FIXEDLEN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; FIXEDLEN: [[MIDDLE_BLOCK]]: @@ -330,8 +330,8 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv4i64() -; TF-SCALABLE-NEXT: [[TMP6:%.*]] = mul [[TMP5]], splat (i64 1) -; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] +; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul [[TMP5]], splat (i64 1) +; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] ; TF-SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]] ; TF-SCALABLE: [[VECTOR_BODY]]: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -590,8 +590,8 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() -; SCALABLE-NEXT: [[TMP13:%.*]] = mul [[TMP6]], splat (i64 1) -; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP13]] +; SCALABLE-NEXT: [[TMP1:%.*]] = mul [[TMP6]], splat (i64 1) +; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] ; SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]] ; SCALABLE: [[VECTOR_BODY]]: ; SCALABLE-NEXT: [[TMP10:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -661,8 +661,8 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i64 [[V]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() -; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP5]], splat (i64 1) -; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul [[TMP5]], splat (i64 1) +; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] ; TF-SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]] ; TF-SCALABLE: [[VECTOR_BODY]]: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -712,8 +712,8 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[B]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() -; SCALABLE-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) -; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; SCALABLE-NEXT: [[TMP1:%.*]] = mul [[TMP6]], splat (i64 1) +; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] ; SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]] ; SCALABLE: [[VECTOR_BODY]]: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -760,7 +760,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD]], splat (i64 4) ; FIXEDLEN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; FIXEDLEN: [[MIDDLE_BLOCK]]: @@ -793,8 +793,8 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[B]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() -; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP5]], splat (i64 1) -; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul [[TMP5]], splat (i64 1) +; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] ; TF-SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]] ; TF-SCALABLE: [[VECTOR_BODY]]: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll index bb85b88f181f7..8536b3f0703cd 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll @@ -11,7 +11,7 @@ define void @wide_or_replaced_with_add_vpinstruction(ptr %src, ptr noalias %dst) ; CHECK: Cost of 0 for VF 2: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] ; CHECK: Cost of 1 for VF 2: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32 ; CHECK: Cost of 0 for VF 2: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next> -; CHECK: Cost of 0 for VF 2: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0> +; CHECK: Cost of 0 for VF 2: ir<%iv> = WIDEN-INDUCTION nuw nsw ir<0>, ir<1>, vp<%0> ; CHECK: Cost of 0 for VF 2: vp<%4> = SCALAR-STEPS vp<%3>, ir<1> ; CHECK: Cost of 0 for VF 2: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4> ; CHECK: Cost of 0 for VF 2: vp<%5> = vector-pointer ir<%g.src> @@ -29,7 +29,7 @@ define void @wide_or_replaced_with_add_vpinstruction(ptr %src, ptr noalias %dst) ; CHECK: Cost of 0 for VF 4: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] ; CHECK: Cost of 1 for VF 4: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32 ; CHECK: Cost of 0 for VF 4: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next> -; CHECK: Cost of 0 for VF 4: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0> +; CHECK: Cost of 0 for VF 4: ir<%iv> = WIDEN-INDUCTION nuw nsw ir<0>, ir<1>, vp<%0> ; CHECK: Cost of 0 for VF 4: vp<%4> = SCALAR-STEPS vp<%3>, ir<1> ; CHECK: Cost of 0 for VF 4: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4> ; CHECK: Cost of 0 for VF 4: vp<%5> = vector-pointer ir<%g.src> diff --git a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll index 193b5d4a788dc..69505eb176f50 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll @@ -48,7 +48,7 @@ define void @PR31671(float %x, ptr %d) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = fadd <16 x float> [[STRIDED_VEC2]], [[TMP1]] ; CHECK-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[TMP4]], <16 x ptr> align 4 [[TMP2]], <16 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 80) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <16 x i64> [[VEC_IND]], splat (i64 80) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 6384 ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll index c5f581fad41f5..3165422dcc539 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll @@ -34,7 +34,7 @@ define i1 @fn(ptr %nno) #0 { ; CHECK-NEXT: [[TMP11]] = or <4 x i32> [[PREDPHI]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP11]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 -4) ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 12 ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll index ed288d2f99a0b..fd76aa779bfb0 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll @@ -31,7 +31,7 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) { ; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[IV_START]] to i32 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP9]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i32> [[DOTSPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i32> [[DOTSPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <16 x i32> [[DOTSPLAT]], ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -46,7 +46,7 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) { ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[ARR:%.*]], i64 [[TMP16]] ; CHECK-NEXT: store <16 x i16> [[TMP15]], ptr [[TMP17]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i32> [[VEC_IND]], splat (i32 16) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <16 x i32> [[VEC_IND]], splat (i32 16) ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -55,7 +55,7 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) { ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[IND_END6:%.*]] = add i64 [[IV_START]], [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IV_START]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] @@ -65,7 +65,7 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) { ; CHECK-NEXT: [[TMP20:%.*]] = trunc i64 [[BC_RESUME_VAL]] to i32 ; CHECK-NEXT: [[DOTSPLATINSERT9:%.*]] = insertelement <4 x i32> poison, i32 [[TMP20]], i64 0 ; CHECK-NEXT: [[DOTSPLAT10:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT9]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION11:%.*]] = add <4 x i32> [[DOTSPLAT10]], +; CHECK-NEXT: [[INDUCTION11:%.*]] = add nuw nsw <4 x i32> [[DOTSPLAT10]], ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX8:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT15:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] @@ -80,9 +80,9 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) { ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[ARR]], i64 [[TMP27]] ; CHECK-NEXT: store <4 x i16> [[TMP26]], ptr [[TMP28]], align 2 ; CHECK-NEXT: [[INDEX_NEXT15]] = add nuw i64 [[INDEX8]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT13]] = add <4 x i32> [[VEC_IND12]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT13]] = add nuw nsw <4 x i32> [[VEC_IND12]], splat (i32 4) ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT15]], [[N_VEC4]] -; CHECK-NEXT: br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N16:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC4]] ; CHECK-NEXT: br i1 [[CMP_N16]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -101,7 +101,7 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) { ; CHECK-NEXT: store i16 [[STORE_VAL]], ptr [[ADDR]], align 2 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp sgt i32 [[IV_TRUNC]], 91 -; CHECK-NEXT: br i1 [[LOOP_COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -179,7 +179,7 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l, ; CHECK-NEXT: [[DOTCAST9:%.*]] = trunc i64 [[N_VEC]] to i16 ; CHECK-NEXT: [[IND_END10:%.*]] = mul i16 [[DOTCAST9]], [[TMP0]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF7:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] @@ -208,7 +208,7 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l, ; CHECK-NEXT: [[INDEX_NEXT24]] = add nuw i64 [[INDEX12]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT21]] = add <8 x i16> [[VEC_IND20]], [[BROADCAST_SPLAT23]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT24]], [[N_VEC5]] -; CHECK-NEXT: br i1 [[TMP20]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP20]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N25:%.*]] = icmp eq i64 [[L]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[CMP_N25]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -224,7 +224,7 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l, ; CHECK-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX3]], align 2 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[L]] -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -265,12 +265,12 @@ define i8 @multiple_inductions_start_at_0() { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 128 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i8> [[STEP_ADD_3]], zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <32 x i8> [[STEP_ADD_3]], i32 31 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF11:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ 1024, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] @@ -285,7 +285,7 @@ define i8 @multiple_inductions_start_at_0() { ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i32 [[INDEX1]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <4 x i8> [[VEC_IND2]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT3]], 1052 -; CHECK-NEXT: br i1 [[TMP3]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP3]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i8> [[VEC_IND2]], i32 3 ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -300,7 +300,7 @@ define i8 @multiple_inductions_start_at_0() { ; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[IV_2]] to i8 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp ugt i32 [[IV]], 1050 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: [[RES:%.*]] = phi i8 [ [[TRUNC]], [[LOOP]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ [[TMP4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i8 [[RES]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll index 002d811d46992..12b8d1e15b523 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll @@ -222,7 +222,7 @@ define i64 @test_pr62954_scalar_epilogue_required(ptr %A, ptr noalias %B, ptr %C ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 ; CHECK-NEXT: store i64 [[TMP2]], ptr [[GEP]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 36 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll index db592f959bace..0bac10a41640e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -134,7 +134,7 @@ define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n ; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], <16 x i64> [[VEC_IND]] ; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[TMP3]], <16 x ptr> align 4 [[TMP4]], <16 x i1> [[TMP1]]) ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 16 -; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 256) +; AVX512-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <16 x i64> [[VEC_IND]], splat (i64 256) ; AVX512-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; AVX512-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; AVX512: middle.block: @@ -180,7 +180,7 @@ define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE3]] ; FVW2: pred.store.continue3: ; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2 -; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 32) +; FVW2-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 32) ; FVW2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; FVW2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; FVW2: middle.block: @@ -249,7 +249,7 @@ define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) { ; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_OUT:%.*]], ptr [[OUT:%.*]], <16 x i64> [[VEC_IND]], i32 1 ; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[TMP3]], <16 x ptr> align 4 [[TMP4]], <16 x i1> [[TMP1]]) ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 256) +; AVX512-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <16 x i64> [[VEC_IND]], splat (i64 256) ; AVX512-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; AVX512-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; AVX512: middle.block: @@ -295,7 +295,7 @@ define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) { ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE2]] ; FVW2: pred.store.continue2: ; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 32) +; FVW2-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 32) ; FVW2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; FVW2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; FVW2: middle.block: @@ -351,7 +351,7 @@ define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noali ; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], <16 x i64> [[VEC_IND]] ; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p1(<16 x float> [[TMP3]], <16 x ptr addrspace(1)> align 4 [[TMP4]], <16 x i1> [[TMP1]]) ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 16 -; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 256) +; AVX512-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <16 x i64> [[VEC_IND]], splat (i64 256) ; AVX512-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; AVX512-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; AVX512: middle.block: @@ -397,7 +397,7 @@ define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noali ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE3]] ; FVW2: pred.store.continue3: ; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2 -; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 32) +; FVW2-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 32) ; FVW2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; FVW2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; FVW2: middle.block: @@ -452,7 +452,7 @@ define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noal ; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], <16 x i64> [[VEC_IND]] ; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[TMP3]], <16 x ptr> align 4 [[TMP4]], <16 x i1> [[TMP1]]) ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 16 -; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 256) +; AVX512-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <16 x i64> [[VEC_IND]], splat (i64 256) ; AVX512-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; AVX512-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; AVX512: middle.block: @@ -498,7 +498,7 @@ define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noal ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE3]] ; FVW2: pred.store.continue3: ; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2 -; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 32) +; FVW2-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 32) ; FVW2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; FVW2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; FVW2: middle.block: @@ -553,7 +553,7 @@ define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noal ; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], <16 x i64> [[VEC_IND]] ; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p1(<16 x float> [[TMP3]], <16 x ptr addrspace(1)> align 4 [[TMP4]], <16 x i1> [[TMP1]]) ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 16 -; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 256) +; AVX512-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <16 x i64> [[VEC_IND]], splat (i64 256) ; AVX512-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; AVX512-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; AVX512: middle.block: @@ -599,7 +599,7 @@ define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noal ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE3]] ; FVW2: pred.store.continue3: ; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2 -; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 32) +; FVW2-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 32) ; FVW2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; FVW2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; FVW2: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll index 9240484c6998b..5eeebf2009a62 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll @@ -370,7 +370,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[TMP63]], <32 x i32> poison, <32 x i32> ; CHECK-NEXT: store <32 x i32> [[INTERLEAVED_VEC]], ptr [[TMP56]], align 4, !alias.scope [[META13]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 32) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 32) ; CHECK-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP64]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll index 0678e9eea4c35..b6d26b4ab46be 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll @@ -35,8 +35,8 @@ define void @avoid_sinking_store_across_load(ptr %arr) { ; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[STRIDED_VEC6]], [[STRIDED_VEC5]] ; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP9]], <4 x ptr> align 4 [[TMP4]], <4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) -; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 12) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 12) +; CHECK-NEXT: [[VEC_IND_NEXT3]] = add nuw nsw <4 x i64> [[VEC_IND2]], splat (i64 12) ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll index 6558f761142f0..932153a23bdbd 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll @@ -1021,7 +1021,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[A]], <8 x i64> [[VEC_IND]] ; AVX512-NEXT: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> [[TMP5]], <8 x ptr> align 8 [[TMP6]], <8 x i1> [[TMP1]]), !alias.scope [[META29:![0-9]+]], !noalias [[META31:![0-9]+]] ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; AVX512-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 128) +; AVX512-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i64> [[VEC_IND]], splat (i64 128) ; AVX512-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 624 ; AVX512-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; AVX512: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll index 5e1850be132bd..a8c5bb0ee6f3b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll @@ -173,7 +173,7 @@ define void @scev4stride1(ptr noalias nocapture %a, ptr noalias nocapture readon ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]] ; CHECK-NEXT: store <64 x i32> [[WIDE_MASKED_GATHER]], ptr [[TMP3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <64 x i32> [[VEC_IND]], splat (i32 64) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <64 x i32> [[VEC_IND]], splat (i32 64) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -198,7 +198,7 @@ define void @scev4stride1(ptr noalias nocapture %a, ptr noalias nocapture readon ; AUTOVF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]] ; AUTOVF-NEXT: store <8 x i32> [[WIDE_MASKED_GATHER]], ptr [[TMP3]], align 4 ; AUTOVF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; AUTOVF-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) +; AUTOVF-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i32> [[VEC_IND]], splat (i32 8) ; AUTOVF-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; AUTOVF-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; AUTOVF: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll index 113bb7a7f2aca..4eaadcb93e3de 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll @@ -50,7 +50,7 @@ define void @foo(i32 %n) { ; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_LATCH]], label %[[FOR_BODY31]] ; CHECK: [[VECTOR_LATCH]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll index ff5020cd60138..1350e40c77e66 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll @@ -29,7 +29,7 @@ define void @foo(ptr %ptr, ptr %ptr.2) { ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x i64> [[VEC_IND]], ptr [[TMP6]], align 8, !alias.scope [[META3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 80 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll index e7f56a45ebdc6..34c54de2140cc 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll @@ -38,7 +38,7 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP14]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll b/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll index e888ad3b8eb4e..fb5a5f9c068b9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll @@ -36,7 +36,7 @@ define void @test_pr55096(i64 %c, ptr %p) { ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] ; CHECK: pred.store.continue2: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 340 ; CHECK-NEXT: br i1 [[TMP15]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: exit: diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll index ba7db65d745a9..08855fe9ecba5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll @@ -35,7 +35,7 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 { ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i1> [[TMP4]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr align 8 [[TMP8]], <4 x i1> [[REVERSE]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 -4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 12 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF0:![0-9]+]], !llvm.loop [[LOOP1:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll index e405fe7c6f764..9ef4e205a970d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll @@ -59,8 +59,8 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP11]], <16 x i64> [[TMP15]], i64 0 ; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 8), <16 x ptr> align 8 [[TMP16]], <16 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 32) -; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <16 x i64> [[VEC_IND3]], splat (i64 32) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <16 x i64> [[VEC_IND]], splat (i64 32) +; CHECK-NEXT: [[VEC_IND_NEXT3]] = add nuw nsw <16 x i64> [[VEC_IND3]], splat (i64 32) ; CHECK-NEXT: [[TMP63:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP63]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -83,15 +83,15 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[IND_END11:%.*]] = mul i64 [[N_VEC7]], 2 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[DOTSPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <8 x i64> [[DOTSPLAT]], ; CHECK-NEXT: [[DOTSPLATINSERT17:%.*]] = insertelement <8 x i64> poison, i64 [[BC_RESUME_VAL5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT18:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT17]], <8 x i64> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION19:%.*]] = add <8 x i64> [[DOTSPLAT18]], +; CHECK-NEXT: [[INDUCTION9:%.*]] = add nuw nsw <8 x i64> [[DOTSPLAT18]], ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX14:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT22:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND15:%.*]] = phi <8 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT16:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND20:%.*]] = phi <8 x i64> [ [[INDUCTION19]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT21:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND15:%.*]] = phi <8 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT14:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND20:%.*]] = phi <8 x i64> [ [[INDUCTION9]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT15:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP18:%.*]] = sub nsw <8 x i64> splat (i64 8), [[VEC_IND15]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <8 x i64> [[VEC_IND15]] ; CHECK-NEXT: [[TMP20:%.*]] = add nsw <8 x i64> [[TMP18]], [[VEC_IND20]] @@ -102,8 +102,8 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP19]], <8 x i64> [[TMP23]], i64 0 ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 8), <8 x ptr> align 8 [[TMP24]], <8 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT22]] = add nuw i64 [[INDEX14]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT16]] = add <8 x i64> [[VEC_IND15]], splat (i64 16) -; CHECK-NEXT: [[VEC_IND_NEXT21]] = add <8 x i64> [[VEC_IND20]], splat (i64 16) +; CHECK-NEXT: [[VEC_IND_NEXT14]] = add nuw nsw <8 x i64> [[VEC_IND15]], splat (i64 16) +; CHECK-NEXT: [[VEC_IND_NEXT15]] = add nuw nsw <8 x i64> [[VEC_IND20]], splat (i64 16) ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT22]], [[N_VEC7]] ; CHECK-NEXT: br i1 [[TMP25]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: @@ -134,8 +134,8 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: br label %[[VECTOR_BODY30:.*]] ; CHECK: [[VECTOR_BODY30]]: ; CHECK-NEXT: [[INDEX34:%.*]] = phi i64 [ 0, %[[VECTOR_PH25]] ], [ [[INDEX_NEXT39:%.*]], %[[VECTOR_BODY30]] ] -; CHECK-NEXT: [[VEC_IND35:%.*]] = phi <16 x i64> [ , %[[VECTOR_PH25]] ], [ [[VEC_IND_NEXT36:%.*]], %[[VECTOR_BODY30]] ] -; CHECK-NEXT: [[VEC_IND37:%.*]] = phi <16 x i64> [ , %[[VECTOR_PH25]] ], [ [[VEC_IND_NEXT38:%.*]], %[[VECTOR_BODY30]] ] +; CHECK-NEXT: [[VEC_IND35:%.*]] = phi <16 x i64> [ , %[[VECTOR_PH25]] ], [ [[VEC_IND_NEXT35:%.*]], %[[VECTOR_BODY30]] ] +; CHECK-NEXT: [[VEC_IND37:%.*]] = phi <16 x i64> [ , %[[VECTOR_PH25]] ], [ [[VEC_IND_NEXT36:%.*]], %[[VECTOR_BODY30]] ] ; CHECK-NEXT: [[TMP30:%.*]] = sub nsw <16 x i64> splat (i64 8), [[VEC_IND35]] ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <16 x i64> [[VEC_IND35]] ; CHECK-NEXT: [[TMP32:%.*]] = add nsw <16 x i64> [[TMP30]], [[VEC_IND37]] @@ -148,8 +148,8 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 7), <16 x ptr> align 16 [[TMP33]], <16 x i1> [[BROADCAST_SPLAT]]) ; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 7), <16 x ptr> align 8 [[TMP37]], <16 x i1> [[BROADCAST_SPLAT]]) ; CHECK-NEXT: [[INDEX_NEXT39]] = add nuw i64 [[INDEX34]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT36]] = add <16 x i64> [[VEC_IND35]], splat (i64 32) -; CHECK-NEXT: [[VEC_IND_NEXT38]] = add <16 x i64> [[VEC_IND37]], splat (i64 32) +; CHECK-NEXT: [[VEC_IND_NEXT35]] = add nuw nsw <16 x i64> [[VEC_IND35]], splat (i64 32) +; CHECK-NEXT: [[VEC_IND_NEXT36]] = add nuw nsw <16 x i64> [[VEC_IND37]], splat (i64 32) ; CHECK-NEXT: [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT39]], [[N_VEC32]] ; CHECK-NEXT: br i1 [[TMP41]], label %[[MIDDLE_BLOCK37:.*]], label %[[VECTOR_BODY30]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK37]]: @@ -175,15 +175,15 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[TMP48:%.*]] = xor <8 x i1> [[BROADCAST_SPLAT73]], splat (i1 true) ; CHECK-NEXT: [[DOTSPLATINSERT62:%.*]] = insertelement <8 x i64> poison, i64 [[BC_RESUME_VAL42]], i64 0 ; CHECK-NEXT: [[DOTSPLAT63:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT62]], <8 x i64> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION64:%.*]] = add <8 x i64> [[DOTSPLAT63]], +; CHECK-NEXT: [[INDUCTION52:%.*]] = add nuw nsw <8 x i64> [[DOTSPLAT63]], ; CHECK-NEXT: [[DOTSPLATINSERT67:%.*]] = insertelement <8 x i64> poison, i64 [[BC_RESUME_VAL44]], i64 0 ; CHECK-NEXT: [[DOTSPLAT68:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT67]], <8 x i64> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION69:%.*]] = add <8 x i64> [[DOTSPLAT68]], +; CHECK-NEXT: [[INDUCTION55:%.*]] = add nuw nsw <8 x i64> [[DOTSPLAT68]], ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY56:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY56]]: ; CHECK-NEXT: [[INDEX61:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL39]], %[[VEC_EPILOG_PH45]] ], [ [[INDEX_NEXT74:%.*]], %[[VEC_EPILOG_VECTOR_BODY56]] ] -; CHECK-NEXT: [[VEC_IND65:%.*]] = phi <8 x i64> [ [[INDUCTION64]], %[[VEC_EPILOG_PH45]] ], [ [[VEC_IND_NEXT66:%.*]], %[[VEC_EPILOG_VECTOR_BODY56]] ] -; CHECK-NEXT: [[VEC_IND70:%.*]] = phi <8 x i64> [ [[INDUCTION69]], %[[VEC_EPILOG_PH45]] ], [ [[VEC_IND_NEXT71:%.*]], %[[VEC_EPILOG_VECTOR_BODY56]] ] +; CHECK-NEXT: [[VEC_IND65:%.*]] = phi <8 x i64> [ [[INDUCTION52]], %[[VEC_EPILOG_PH45]] ], [ [[VEC_IND_NEXT61:%.*]], %[[VEC_EPILOG_VECTOR_BODY56]] ] +; CHECK-NEXT: [[VEC_IND70:%.*]] = phi <8 x i64> [ [[INDUCTION55]], %[[VEC_EPILOG_PH45]] ], [ [[VEC_IND_NEXT62:%.*]], %[[VEC_EPILOG_VECTOR_BODY56]] ] ; CHECK-NEXT: [[TMP44:%.*]] = sub nsw <8 x i64> splat (i64 8), [[VEC_IND65]] ; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <8 x i64> [[VEC_IND65]] ; CHECK-NEXT: [[TMP46:%.*]] = add nsw <8 x i64> [[TMP44]], [[VEC_IND70]] @@ -196,8 +196,8 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 7), <8 x ptr> align 16 [[TMP47]], <8 x i1> [[BROADCAST_SPLAT73]]) ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 7), <8 x ptr> align 8 [[TMP51]], <8 x i1> [[BROADCAST_SPLAT73]]) ; CHECK-NEXT: [[INDEX_NEXT74]] = add nuw i64 [[INDEX61]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT66]] = add <8 x i64> [[VEC_IND65]], splat (i64 16) -; CHECK-NEXT: [[VEC_IND_NEXT71]] = add <8 x i64> [[VEC_IND70]], splat (i64 16) +; CHECK-NEXT: [[VEC_IND_NEXT61]] = add nuw nsw <8 x i64> [[VEC_IND65]], splat (i64 16) +; CHECK-NEXT: [[VEC_IND_NEXT62]] = add nuw nsw <8 x i64> [[VEC_IND70]], splat (i64 16) ; CHECK-NEXT: [[TMP55:%.*]] = icmp eq i64 [[INDEX_NEXT74]], [[N_VEC53]] ; CHECK-NEXT: br i1 [[TMP55]], label %[[VEC_EPILOG_MIDDLE_BLOCK63:.*]], label %[[VEC_EPILOG_VECTOR_BODY56]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK63]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/transform-narrow-interleave-to-widen-memory-gaps.ll b/llvm/test/Transforms/LoopVectorize/X86/transform-narrow-interleave-to-widen-memory-gaps.ll index eca70b3af159c..86ac78be5bda9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/transform-narrow-interleave-to-widen-memory-gaps.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/transform-narrow-interleave-to-widen-memory-gaps.ll @@ -29,7 +29,7 @@ define void @load_store_interleave_group_with_gaps(ptr noalias %data, i64 nounde ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[DATA]], <4 x i64> [[TMP5]] ; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[STRIDED_VEC2]], <4 x ptr> align 8 [[TMP6]], <4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll b/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll index efc9a4fa57292..8184cad22ae8b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll @@ -44,7 +44,7 @@ define void @iv.4_used_as_vector_and_first_lane(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[TMP14]], ptr align 4 [[TMP34]], <4 x i1> [[TMP18]]) ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[TMP15]], ptr align 4 [[TMP35]], <4 x i1> [[TMP19]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4) ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll index f9570405ecabc..b5d42a8f71430 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll @@ -39,7 +39,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; DISABLED_MASKED_STRIDED: vector.body: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp samesign ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] @@ -124,7 +124,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP49]], ptr align 1 [[TMP50]], <8 x i1> [[TMP0]]) ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP51]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -139,7 +139,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; ENABLED_MASKED_STRIDED: vector.body: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp samesign ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl i32 [[INDEX]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 [[TMP1]] ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> @@ -148,7 +148,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr align 1 [[TMP3]], <8 x i1> [[TMP0]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i32> [[VEC_IND]], splat (i32 8) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1016 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.body: @@ -212,7 +212,7 @@ define dso_local void @masked_strided1_optsize(ptr noalias nocapture readonly %p ; DISABLED_MASKED_STRIDED: vector.body: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp samesign ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] @@ -297,7 +297,7 @@ define dso_local void @masked_strided1_optsize(ptr noalias nocapture readonly %p ; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP49]], ptr align 1 [[TMP50]], <8 x i1> [[TMP0]]) ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP51]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -312,7 +312,7 @@ define dso_local void @masked_strided1_optsize(ptr noalias nocapture readonly %p ; ENABLED_MASKED_STRIDED: vector.body: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp samesign ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl i32 [[INDEX]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 [[TMP1]] ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> @@ -322,7 +322,7 @@ define dso_local void @masked_strided1_optsize(ptr noalias nocapture readonly %p ; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr align 1 [[TMP4]], <8 x i1> [[TMP0]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i32> [[VEC_IND]], splat (i32 8) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: @@ -903,7 +903,7 @@ define dso_local void @unconditional_strided1_optsize(ptr noalias nocapture read ; DISABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: store <8 x i8> [[TMP32]], ptr [[TMP33]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP34]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -1162,7 +1162,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no ; DISABLED_MASKED_STRIDED: vector.body: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE60]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp samesign ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] @@ -1472,7 +1472,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] ; DISABLED_MASKED_STRIDED: pred.store.continue60: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP165]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -1487,7 +1487,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no ; ENABLED_MASKED_STRIDED: vector.body: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp samesign ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl i32 [[INDEX]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 [[TMP1]] ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> @@ -1500,7 +1500,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <16 x i32> ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[INTERLEAVED_VEC]], ptr align 1 [[TMP6]], <16 x i1> [[INTERLEAVED_MASK]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i32> [[VEC_IND]], splat (i32 8) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: @@ -1877,7 +1877,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] ; DISABLED_MASKED_STRIDED: pred.store.continue60: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 -8) +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i32> [[VEC_IND]], splat (i32 -8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP165]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -2202,7 +2202,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] ; ENABLED_MASKED_STRIDED: pred.store.continue60: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 -8) +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i32> [[VEC_IND]], splat (i32 -8) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP165]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll index 500e60372ac13..cfc588a7f5d89 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll @@ -64,7 +64,7 @@ define dso_local void @test1(ptr noalias nocapture %points, ptr noalias nocaptur ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP26]], ptr [[TMP21]], align 2 ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP27]], ptr [[TMP23]], align 2 ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP28]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -352,7 +352,7 @@ define dso_local void @test(ptr noalias nocapture %points, ptr noalias nocapture ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE6]] ; DISABLED_MASKED_STRIDED: pred.store.continue6: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP19]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -405,7 +405,7 @@ define dso_local void @test(ptr noalias nocapture %points, ptr noalias nocapture ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE6]] ; ENABLED_MASKED_STRIDED: pred.store.continue6: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP19]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll index 67fe87a328976..5999a3581e467 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll @@ -339,7 +339,7 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; SINK-GATHER-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP1]], <8 x i32> [[TMP64]], <8 x i32> [[BROADCAST_SPLAT16]] ; SINK-GATHER-NEXT: [[TMP66]] = add <8 x i32> [[VEC_PHI]], [[PREDPHI]] ; SINK-GATHER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; SINK-GATHER-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) +; SINK-GATHER-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i64> [[VEC_IND]], splat (i64 8) ; SINK-GATHER-NEXT: [[TMP67:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SINK-GATHER-NEXT: br i1 [[TMP67]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; SINK-GATHER: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/assume.ll b/llvm/test/Transforms/LoopVectorize/assume.ll index b41ddebc0c362..a9a0b33f542af 100644 --- a/llvm/test/Transforms/LoopVectorize/assume.ll +++ b/llvm/test/Transforms/LoopVectorize/assume.ll @@ -161,7 +161,7 @@ define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[TMP7]], align 4 ; CHECK-NEXT: store <2 x float> [[TMP6]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[STEP_ADD]], splat (i64 2) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/check-prof-info.ll b/llvm/test/Transforms/LoopVectorize/check-prof-info.ll index b59ad8481597c..af72f3641a635 100644 --- a/llvm/test/Transforms/LoopVectorize/check-prof-info.ll +++ b/llvm/test/Transforms/LoopVectorize/check-prof-info.ll @@ -17,7 +17,7 @@ define void @_Z3foov() { ; CHECK: vector.ph: ; CHECK: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK: br i1 [[TMP6:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: br i1 [[TMP4:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF0:![0-9]+]], !llvm.loop [[LOOP1:![0-9]+]] ; CHECK: middle.block: ; CHECK: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: @@ -28,7 +28,7 @@ define void @_Z3foov() { ; CHECK-MASKED: vector.ph: ; CHECK-MASKED: br label [[VECTOR_BODY:%.*]] ; CHECK-MASKED: vector.body: -; CHECK-MASKED: br i1 [[TMP18:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-MASKED: br i1 [[TMP16:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF0:![0-9]+]], !llvm.loop [[LOOP1:![0-9]+]] ; CHECK-MASKED: middle.block: ; CHECK-MASKED: br label [[FOR_BODY:%.*]] ; CHECK-MASKED: for.cond.cleanup: @@ -41,15 +41,15 @@ define void @_Z3foov() { ; CHECK-SCALABLE: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-SCALABLE: br label [[VECTOR_BODY:%.*]] ; CHECK-SCALABLE: vector.body: -; CHECK-SCALABLE: [[VEC_IND_NEXT:%.*]] = add [[VEC_IND:%.*]], [[BROADCAST_SPLAT]] -; CHECK-SCALABLE: br i1 [[TMP16:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-SCALABLE: [[VEC_IND_NEXT:%.*]] = add nuw nsw [[VEC_IND:%.*]], [[BROADCAST_SPLAT]] +; CHECK-SCALABLE: br i1 [[TMP11:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK-SCALABLE: middle.block: -; CHECK-SCALABLE: br i1 [[CMP_N:%.*]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]], !prof [[PROF5:![0-9]+]] +; CHECK-SCALABLE: br i1 [[CMP_N:%.*]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]], !prof [[PROF6:![0-9]+]] ; CHECK-SCALABLE: scalar.ph: ; CHECK-SCALABLE: br label [[FOR_BODY:%.*]] ; CHECK-SCALABLE: for.cond.cleanup: ; CHECK-SCALABLE: for.body: -; CHECK-SCALABLE: br i1 [[EXITCOND:%.*]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-SCALABLE: br i1 [[EXITCOND:%.*]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !prof [[PROF7:![0-9]+]], !llvm.loop [[LOOP8:![0-9]+]] ; entry: br label %for.body @@ -80,14 +80,14 @@ define void @_Z3foo2v() { ; CHECK: vector.ph: ; CHECK: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK: br i1 [[TMP6:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: br i1 [[TMP4:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF0]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK: br label [[SCALAR_PH:%.+]] +; CHECK: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: ; CHECK: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK: for.body: -; CHECK: br i1 [[EXITCOND:%.*]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]], !prof [[PROF8:![0-9]+]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: br i1 [[EXITCOND:%.*]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]] ; ; CHECK-MASKED-LABEL: @_Z3foo2v( ; CHECK-MASKED: entry: @@ -95,14 +95,14 @@ define void @_Z3foo2v() { ; CHECK-MASKED: vector.ph: ; CHECK-MASKED: br label [[VECTOR_BODY:%.*]] ; CHECK-MASKED: vector.body: -; CHECK-MASKED: br i1 [[TMP18:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-MASKED: br i1 [[TMP16:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF0]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-MASKED: middle.block: -; CHECK-MASKED: br label [[SCALAR_PH:%.+]] +; CHECK-MASKED: br label [[SCALAR_PH:%.*]] ; CHECK-MASKED: scalar.ph: ; CHECK-MASKED: br label [[FOR_BODY:%.*]] ; CHECK-MASKED: for.cond.cleanup: ; CHECK-MASKED: for.body: -; CHECK-MASKED: br i1 [[EXITCOND:%.*]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]], !prof [[PROF8:![0-9]+]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-MASKED: br i1 [[EXITCOND:%.*]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]] ; ; CHECK-SCALABLE-LABEL: @_Z3foo2v( ; CHECK-SCALABLE: entry: @@ -112,15 +112,15 @@ define void @_Z3foo2v() { ; CHECK-SCALABLE: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-SCALABLE: br label [[VECTOR_BODY:%.*]] ; CHECK-SCALABLE: vector.body: -; CHECK-SCALABLE: [[VEC_IND_NEXT:%.*]] = add [[VEC_IND:%.*]], [[BROADCAST_SPLAT]] -; CHECK-SCALABLE: br i1 [[TMP16:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-SCALABLE: [[VEC_IND_NEXT:%.*]] = add nuw nsw [[VEC_IND:%.*]], [[BROADCAST_SPLAT]] +; CHECK-SCALABLE: br i1 [[TMP11:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-SCALABLE: middle.block: -; CHECK-SCALABLE: br i1 [[CMP_N:%.*]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]], !prof [[PROF5]] +; CHECK-SCALABLE: br i1 [[CMP_N:%.*]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]], !prof [[PROF6]] ; CHECK-SCALABLE: scalar.ph: ; CHECK-SCALABLE: br label [[FOR_BODY:%.*]] ; CHECK-SCALABLE: for.cond.cleanup: ; CHECK-SCALABLE: for.body: -; CHECK-SCALABLE: br i1 [[EXITCOND:%.*]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-SCALABLE: br i1 [[EXITCOND:%.*]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !prof [[PROF11:![0-9]+]], !llvm.loop [[LOOP12:![0-9]+]] ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll index 45405511abe90..1fe3962dfd072 100644 --- a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll @@ -1612,7 +1612,7 @@ define void @pr61396_pointer_used_as_both_stored_value_and_pointer_operand_by_st ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 0 ; CHECK-NEXT: store <4 x ptr> [[TMP0]], ptr [[TMP1]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240 ; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -1642,7 +1642,7 @@ define void @pr61396_pointer_used_as_both_stored_value_and_pointer_operand_by_st ; INTER-NEXT: [[TMP1:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 0 ; INTER-NEXT: store <4 x ptr> [[TMP0]], ptr [[TMP1]], align 8 ; INTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; INTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; INTER-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; INTER-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240 ; INTER-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; INTER: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll b/llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll index 49eb8b349a274..5d92c127aff93 100644 --- a/llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll +++ b/llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll @@ -87,7 +87,7 @@ define void @cse_wide_gep(ptr noalias %A, ptr noalias %B, ptr noalias %C, i64 %n ; CHECK-NEXT: store <4 x ptr> [[TMP2]], ptr [[TMP6]], align 8 ; CHECK-NEXT: store <4 x ptr> [[TMP3]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll index 5177d7b6e0090..1773b2aab7d87 100644 --- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll +++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll @@ -43,12 +43,13 @@ define void @deref_assumption_in_header_constant_trip_count(ptr noalias noundef ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP30]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP32]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop.header @@ -102,8 +103,9 @@ define void @align_deref_assumption_in_header_constant_trip_count_loop_invariant ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4) ] @@ -172,12 +174,13 @@ define void @deref_assumption_too_small_in_header_constant_trip_count(ptr noalia ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP30]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP32]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop.header @@ -247,12 +250,13 @@ define void @deref_assumption_in_header_constant_trip_count_align_1(ptr noalias ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP18]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop.header @@ -322,12 +326,13 @@ define void @deref_assumption_in_header_constant_trip_count_align_via_arg_attrib ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP18]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop.header @@ -397,12 +402,13 @@ define void @deref_assumption_in_header_constant_trip_count_align_not_known(ptr ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP18]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop.header @@ -468,12 +474,13 @@ define void @deref_assumption_in_then_constant_trip_count(ptr noalias noundef %a ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP26]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop.header @@ -543,12 +550,13 @@ define void @deref_assumption_in_latch_constant_trip_count(ptr noalias noundef % ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP30]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP32]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop.header @@ -621,7 +629,7 @@ define void @deref_assumption_in_header_variable_trip_count(ptr noalias noundef ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP30]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP32]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -679,8 +687,9 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_1(ptr noali ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %a, i64 4000) ] @@ -750,8 +759,9 @@ define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_1 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %a, i64 3999) ] @@ -803,8 +813,9 @@ define void @align_and_deref_assumption_in_preheader_constant_trip_count_align_4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4000) ] @@ -857,8 +868,9 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_known_via ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %a, i64 4000) ] @@ -928,8 +940,9 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_not_known ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %a, i64 4000) ] @@ -999,8 +1012,9 @@ define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_4 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %a, i64 3999) ] @@ -1068,8 +1082,9 @@ define void @may_free_align_deref_assumption_in_header_constant_trip_count_loop_ ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4) ] @@ -1138,8 +1153,9 @@ define void @may_free_local_ptr_align_deref_assumption_in_header_constant_trip_c ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: %a = call ptr @get_ptr() @@ -1195,8 +1211,9 @@ define void @deref_assumption_in_header_constant_trip_count_nofree_via_context(p ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4000) ] @@ -1267,8 +1284,9 @@ define void @deref_assumption_in_header_constant_trip_count_may_free(ptr noalias ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4000) ] @@ -1339,8 +1357,9 @@ define void @deref_assumption_in_header_constant_trip_count_nofree_via_context_b ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4000) ] @@ -1418,8 +1437,9 @@ define void @deref_assumption_in_header_constant_trip_count_multiple_loop_predec ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4000) ] diff --git a/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll index 5e3a70222d7bb..6c9e3694b2a34 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll @@ -23,7 +23,7 @@ define i64 @select_icmp_const(ptr %a, i64 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 3) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -46,18 +46,18 @@ define i64 @select_icmp_const(ptr %a, i64 %n) { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT8]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT9]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i64> [[DOTSPLAT9]], ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-NEXT: [[TMP7:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i64> [ [[DOTSPLAT]], %[[VEC_EPILOG_PH]] ], [ [[TMP11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD8]], splat (i64 3) ; CHECK-NEXT: [[TMP11]] = select <4 x i1> [[TMP10]], <4 x i64> [[VEC_IND5]], <4 x i64> [[VEC_PHI7]] ; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[TMP7]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT6]] = add <4 x i64> [[VEC_IND5]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT11]] = add nuw nsw <4 x i64> [[VEC_IND5]], splat (i64 4) ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP12]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: @@ -124,7 +124,7 @@ define i64 @select_fcmp_const_fast(ptr %a, i64 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -147,18 +147,18 @@ define i64 @select_fcmp_const_fast(ptr %a, i64 %n) { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT8]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT9]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i64> [[DOTSPLAT9]], ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-NEXT: [[TMP7:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i64> [ [[DOTSPLAT]], %[[VEC_EPILOG_PH]] ], [ [[TMP11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]] ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[TMP10:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD8]], splat (float 3.000000e+00) ; CHECK-NEXT: [[TMP11]] = select <4 x i1> [[TMP10]], <4 x i64> [[VEC_IND5]], <4 x i64> [[VEC_PHI7]] ; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[TMP7]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT6]] = add <4 x i64> [[VEC_IND5]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT11]] = add nuw nsw <4 x i64> [[VEC_IND5]], splat (i64 4) ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP12]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: @@ -231,7 +231,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], splat (i8 3) ; CHECK-NEXT: [[TMP8]] = select <4 x i1> [[TMP7]], <4 x i8> [[VEC_IND]], <4 x i8> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i8> [[VEC_IND]], splat (i8 4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -257,11 +257,11 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT8]], <4 x i8> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i8> [[DOTSPLAT9]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i8> [[DOTSPLAT9]], ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX4:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <4 x i8> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <4 x i8> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i8> [ [[DOTSPLAT]], %[[VEC_EPILOG_PH]] ], [ [[TMP17:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX4]] to i8 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[A]], i8 [[OFFSET_IDX]] @@ -269,7 +269,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD10]], splat (i8 3) ; CHECK-NEXT: [[TMP17]] = select <4 x i1> [[TMP16]], <4 x i8> [[VEC_IND5]], <4 x i8> [[VEC_PHI7]] ; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i32 [[INDEX4]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT6]] = add <4 x i8> [[VEC_IND5]], splat (i8 4) +; CHECK-NEXT: [[VEC_IND_NEXT11]] = add nuw nsw <4 x i8> [[VEC_IND5]], splat (i8 4) ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT11]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP18]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll index 15daf90ad770c..813c040bd78f7 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll @@ -495,7 +495,7 @@ define i64 @test_reduction_with_widen_induction_order_1(ptr %A, i64 %N) { ; CHECK-NEXT: [[TMP1]] = add <4 x i64> [[VEC_PHI]], [[WIDE_LOAD]] ; CHECK-NEXT: store <4 x i64> [[VEC_IND]], ptr [[TMP0]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: @@ -513,7 +513,7 @@ define i64 @test_reduction_with_widen_induction_order_1(ptr %A, i64 %N) { ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] @@ -524,7 +524,7 @@ define i64 @test_reduction_with_widen_induction_order_1(ptr %A, i64 %N) { ; CHECK-NEXT: [[TMP6]] = add <4 x i64> [[VEC_PHI6]], [[WIDE_LOAD7]] ; CHECK-NEXT: store <4 x i64> [[VEC_IND5]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX4]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT9]] = add <4 x i64> [[VEC_IND5]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT9]] = add nuw nsw <4 x i64> [[VEC_IND5]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP7]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: vec.epilog.middle.block: @@ -589,7 +589,7 @@ define i64 @test_reduction_with_widen_induction_order_2(ptr %A, i64 %N) { ; CHECK-NEXT: [[TMP1]] = add <4 x i64> [[VEC_PHI]], [[WIDE_LOAD]] ; CHECK-NEXT: store <4 x i64> [[VEC_IND]], ptr [[TMP0]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: middle.block: @@ -607,7 +607,7 @@ define i64 @test_reduction_with_widen_induction_order_2(ptr %A, i64 %N) { ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] @@ -618,7 +618,7 @@ define i64 @test_reduction_with_widen_induction_order_2(ptr %A, i64 %N) { ; CHECK-NEXT: [[TMP6]] = add <4 x i64> [[VEC_PHI5]], [[WIDE_LOAD7]] ; CHECK-NEXT: store <4 x i64> [[VEC_IND6]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX4]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT9]] = add <4 x i64> [[VEC_IND6]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT9]] = add nuw nsw <4 x i64> [[VEC_IND6]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP7]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: vec.epilog.middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll index d3c8c1304b588..2f05435bc75ba 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll @@ -689,6 +689,17 @@ define double @test_chained_first_order_recurrence_sink_users_1(ptr %ptr, i64 %n ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ , %[[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x double> [ , %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = add i64 1, [[INDEX1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[PTR]], i64 [[OFFSET_IDX1]] +; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x double>, ptr [[TMP3]], align 8 +; CHECK-NEXT: [[TMP2]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[WIDE_LOAD]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR1]], <4 x double> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> splat (double 1.000000e+01), [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x double> [[TMP5]], [[TMP2]] +; CHECK-NEXT: store <4 x double> [[TMP8]], ptr [[TMP3]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[INDEX]] +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x double> [[TMP2]], i32 3 @@ -801,7 +812,7 @@ define i64 @test_first_order_recurrences_and_induction(ptr %ptr, i64 %n) { ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP1]], splat (i64 10) ; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -865,7 +876,7 @@ define i64 @test_first_order_recurrences_and_induction2(ptr %ptr, i64 %n) { ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP1]], splat (i64 10) ; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index bd0c098d335a2..cebd52fa7f866 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -875,14 +875,14 @@ define i32 @PR27246() { ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = sub i32 [[I_016]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I_016]], i64 0 ; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], +; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add nsw <4 x i32> [[DOTSPLAT]], ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 -4) ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 -4) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i32> [[STEP_ADD]], splat (i32 -4) ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -965,13 +965,13 @@ define i32 @PR27246() { ; SINK-AFTER-NEXT: [[IND_END:%.*]] = sub i32 [[I_016]], [[N_VEC]] ; SINK-AFTER-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I_016]], i64 0 ; SINK-AFTER-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; SINK-AFTER-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], +; SINK-AFTER-NEXT: [[INDUCTION:%.*]] = add nsw <4 x i32> [[DOTSPLAT]], ; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]] ; SINK-AFTER: vector.body: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 -4) +; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i32> [[VEC_IND]], splat (i32 -4) ; SINK-AFTER-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SINK-AFTER: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/induction-step.ll b/llvm/test/Transforms/LoopVectorize/induction-step.ll index 53d5ac472c892..1c7ec9b28bd71 100644 --- a/llvm/test/Transforms/LoopVectorize/induction-step.ll +++ b/llvm/test/Transforms/LoopVectorize/induction-step.ll @@ -37,19 +37,19 @@ define void @induction_with_global(i32 %init, ptr noalias nocapture %A, i32 %N) ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[TMP0]], i64 0 ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = mul <8 x i32> , [[DOTSPLAT3]] -; CHECK-NEXT: [[INDUCTION4:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP0]], 8 +; CHECK-NEXT: [[TMP6:%.*]] = mul nsw <8 x i32> , [[DOTSPLAT3]] +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <8 x i32> [[DOTSPLAT]], [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = mul nsw i32 [[TMP0]], 8 ; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP7]], i64 0 ; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[TMP8:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ [[INDUCTION4]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP8]] ; CHECK-NEXT: store <8 x i32> [[VEC_IND]], ptr [[TMP9]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP8]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], [[DOTSPLAT6]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <8 x i32> [[VEC_IND]], [[DOTSPLAT6]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -126,19 +126,19 @@ define i32 @induction_with_loop_inv(i32 %init, ptr noalias nocapture %A, i32 %N, ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[J_012]], i64 0 ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> , [[DOTSPLAT3]] -; CHECK-NEXT: [[INDUCTION4:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[J_012]], 8 +; CHECK-NEXT: [[TMP8:%.*]] = mul nsw <8 x i32> , [[DOTSPLAT3]] +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <8 x i32> [[DOTSPLAT]], [[TMP8]] +; CHECK-NEXT: [[TMP5:%.*]] = mul nsw i32 [[J_012]], 8 ; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[TMP6:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ [[INDUCTION4]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]] ; CHECK-NEXT: store <8 x i32> [[VEC_IND]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP6]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], [[DOTSPLAT6]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <8 x i32> [[VEC_IND]], [[DOTSPLAT6]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -219,19 +219,19 @@ define void @non_primary_iv_loop_inv_trunc(ptr %a, i64 %n, i64 %step) { ; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[STEP]] to i32 ; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP3]], i64 0 ; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> , [[DOTSPLAT6]] -; CHECK-NEXT: [[INDUCTION7:%.*]] = add <8 x i32> zeroinitializer, [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP3]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw <8 x i32> , [[DOTSPLAT6]] +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <8 x i32> zeroinitializer, [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw nsw i32 [[TMP3]], 8 ; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT8]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[TMP6:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND10:%.*]] = phi <8 x i32> [ [[INDUCTION7]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND10:%.*]] = phi <8 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]] ; CHECK-NEXT: store <8 x i32> [[VEC_IND10]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP6]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT11]] = add <8 x i32> [[VEC_IND10]], [[DOTSPLAT9]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <8 x i32> [[VEC_IND10]], [[DOTSPLAT9]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index 66e4de5da7955..76fa6bdb543a7 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -29,7 +29,7 @@ define void @multi_int_induction(ptr %A, i32 %N) { ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -70,7 +70,7 @@ define void @multi_int_induction(ptr %A, i32 %N) { ; IND-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP3]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; IND-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IND: middle.block: @@ -159,7 +159,7 @@ define void @multi_int_induction(ptr %A, i32 %N) { ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP4]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP6]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -1578,7 +1578,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP18]], align 1, !alias.scope [[META20:![0-9]+]], !noalias [[META17]] ; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP19]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: @@ -1631,18 +1631,18 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; IND-NEXT: [[TMP10:%.*]] = shl nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; IND-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i64 0 ; IND-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP10]], i64 1 -; IND-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] -; IND-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]] -; IND-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 1, !alias.scope [[META17:![0-9]+]] -; IND-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 1, !alias.scope [[META17]] +; IND-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP11]] +; IND-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP13]] +; IND-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP12]], align 1, !alias.scope [[META17:![0-9]+]] +; IND-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP24]], align 1, !alias.scope [[META17]] ; IND-NEXT: [[DOTSPLIT:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P]], i64 [[INDEX]] ; IND-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTSPLIT]], i64 4 ; IND-NEXT: [[TMP17:%.*]] = getelementptr [[PAIR_I32]], ptr [[P]], i64 [[INDEX]] ; IND-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP17]], i64 12 -; IND-NEXT: store i32 [[TMP25]], ptr [[TMP16]], align 1, !alias.scope [[META20:![0-9]+]], !noalias [[META17]] +; IND-NEXT: store i32 [[TMP14]], ptr [[TMP16]], align 1, !alias.scope [[META20:![0-9]+]], !noalias [[META17]] ; IND-NEXT: store i32 [[TMP15]], ptr [[TMP18]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; IND-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; IND-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; IND: middle.block: @@ -1801,7 +1801,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; UNROLL-NO-IC-NEXT: store i32 [[TMP25]], ptr [[TMP29]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; UNROLL-NO-IC-NEXT: store i32 [[TMP26]], ptr [[TMP30]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[STEP_ADD]], splat (i64 2) ; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -2439,7 +2439,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; CHECK-NEXT: store i16 [[TMP9]], ptr [[TMP7]], align 2 ; CHECK-NEXT: store i16 [[TMP10]], ptr [[TMP8]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: @@ -2488,7 +2488,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; IND-NEXT: store i16 [[TMP8]], ptr [[TMP6]], align 2 ; IND-NEXT: store i16 [[TMP9]], ptr [[TMP7]], align 2 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; IND-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; IND: middle.block: @@ -2611,7 +2611,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; UNROLL-NO-IC-NEXT: store i16 [[TMP17]], ptr [[TMP13]], align 2 ; UNROLL-NO-IC-NEXT: store i16 [[TMP18]], ptr [[TMP14]], align 2 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -4048,7 +4048,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; CHECK: middle.block: @@ -4081,7 +4081,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; IND-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] ; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; IND-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; IND: middle.block: @@ -4155,7 +4155,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP3]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -4249,7 +4249,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP5]] ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; CHECK: middle.block: @@ -4287,7 +4287,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; IND-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]] ; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; IND-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; IND: middle.block: @@ -4377,7 +4377,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP7]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP9]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -4470,7 +4470,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[I]], [[N_VEC]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[I]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT]], ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -4479,7 +4479,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[OFFSET_IDX]] ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] ; CHECK: middle.block: @@ -4508,7 +4508,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; IND-NEXT: [[IND_END:%.*]] = add i32 [[I]], [[N_VEC]] ; IND-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[I]], i64 0 ; IND-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; IND-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], +; IND-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT]], ; IND-NEXT: br label [[VECTOR_BODY:%.*]] ; IND: vector.body: ; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -4518,7 +4518,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; IND-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP1]] ; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; IND-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] ; IND: middle.block: @@ -4548,7 +4548,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; UNROLL-NEXT: [[IND_END:%.*]] = add i32 [[I]], [[N_VEC]] ; UNROLL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[I]], i64 0 ; UNROLL-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; UNROLL-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], +; UNROLL-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT]], ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -4592,7 +4592,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = add i32 [[I]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[I]], i64 0 ; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], +; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT]], ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -4604,7 +4604,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP4]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -4633,7 +4633,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; INTERLEAVE-NEXT: [[IND_END:%.*]] = add i32 [[I]], [[N_VEC]] ; INTERLEAVE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I]], i64 0 ; INTERLEAVE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; INTERLEAVE-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], +; INTERLEAVE-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i32> [[DOTSPLAT]], ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -4698,7 +4698,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] ; CHECK: middle.block: @@ -4736,7 +4736,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; IND-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP0]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) +; IND-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 4) ; IND-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] ; IND: middle.block: @@ -4819,7 +4819,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP3]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 4) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[STEP_ADD]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -4943,7 +4943,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; CHECK-NEXT: [[TMP14:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32> ; CHECK-NEXT: [[TMP15]] = or <2 x i32> [[VEC_PHI]], [[TMP14]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i16> [[VEC_IND]], splat (i16 2) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] ; CHECK: middle.block: @@ -4986,7 +4986,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; IND-NEXT: [[TMP13:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32> ; IND-NEXT: [[TMP14]] = or <2 x i32> [[VEC_PHI]], [[TMP13]] ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) +; IND-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i16> [[VEC_IND]], splat (i16 2) ; IND-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20 ; IND-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] ; IND: middle.block: @@ -5121,7 +5121,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; UNROLL-NO-IC-NEXT: [[TMP28]] = or <2 x i32> [[VEC_PHI]], [[TMP26]] ; UNROLL-NO-IC-NEXT: [[TMP29]] = or <2 x i32> [[VEC_PHI1]], [[TMP27]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[STEP_ADD]], splat (i16 2) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i16> [[STEP_ADD]], splat (i16 2) ; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20 ; UNROLL-NO-IC-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -5309,9 +5309,9 @@ define i64 @trunc_with_first_order_recurrence() { ; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64> ; CHECK-NEXT: [[TMP10]] = add <2 x i64> [[TMP6]], [[TMP9]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) -; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], splat (i32 2) -; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT3]] = add nuw nsw <2 x i32> [[VEC_IND2]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT5]] = add nuw nsw <2 x i32> [[VEC_IND4]], splat (i32 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] ; CHECK: middle.block: @@ -5360,18 +5360,18 @@ define i64 @trunc_with_first_order_recurrence() { ; IND-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND2]], <2 x i32> ; IND-NEXT: [[TMP1:%.*]] = mul <2 x i32> [[VEC_IND]], [[VEC_IND2]] ; IND-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], splat (i32 42) -; IND-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND2]] +; IND-NEXT: [[TMP3:%.*]] = add nuw <2 x i32> [[TMP0]], [[VEC_IND2]] ; IND-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP3]], [[TMP2]] ; IND-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64> ; IND-NEXT: [[TMP6:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP5]] -; IND-NEXT: [[TMP7:%.*]] = shl <2 x i32> [[VEC_IND4]], splat (i32 1) +; IND-NEXT: [[TMP7:%.*]] = shl nuw <2 x i32> [[VEC_IND4]], splat (i32 1) ; IND-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP2]], [[TMP7]] ; IND-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64> ; IND-NEXT: [[TMP10]] = add <2 x i64> [[TMP6]], [[TMP9]] ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) -; IND-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], splat (i32 2) -; IND-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], splat (i32 2) +; IND-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) +; IND-NEXT: [[VEC_IND_NEXT3]] = add nuw nsw <2 x i32> [[VEC_IND2]], splat (i32 2) +; IND-NEXT: [[VEC_IND_NEXT5]] = add nuw nsw <2 x i32> [[VEC_IND4]], splat (i32 2) ; IND-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 ; IND-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] ; IND: middle.block: @@ -5518,9 +5518,9 @@ define i64 @trunc_with_first_order_recurrence() { ; UNROLL-NO-IC-NEXT: [[TMP20]] = add <2 x i64> [[TMP12]], [[TMP18]] ; UNROLL-NO-IC-NEXT: [[TMP21]] = add <2 x i64> [[TMP13]], [[TMP19]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT4]] = add <2 x i32> [[STEP_ADD7]], splat (i32 2) -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT6]] = add <2 x i32> [[STEP_ADD8]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[STEP_ADD]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT4]] = add nuw nsw <2 x i32> [[STEP_ADD7]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT6]] = add nuw nsw <2 x i32> [[STEP_ADD8]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 ; UNROLL-NO-IC-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -5683,7 +5683,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[VEC_IND]], [[TMP4]] ; CHECK-NEXT: store <2 x i32> [[TMP6]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]] ; CHECK: middle.block: @@ -5711,7 +5711,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; IND-NEXT: [[TMP5:%.*]] = add <2 x i32> [[VEC_IND]], [[TMP2]] ; IND-NEXT: store <2 x i32> [[TMP5]], ptr [[TMP4]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; IND-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; IND-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]] ; IND: middle.block: @@ -5778,7 +5778,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP8]], ptr [[TMP7]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP9]], ptr [[TMP11]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; UNROLL-NO-IC-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -5900,9 +5900,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; CHECK-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = mul <2 x i32> , [[DOTSPLAT]] -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> zeroinitializer, [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[STEP]], 2 +; CHECK-NEXT: [[TMP19:%.*]] = mul nsw <2 x i32> , [[DOTSPLAT]] +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <2 x i32> zeroinitializer, [[TMP19]] +; CHECK-NEXT: [[TMP18:%.*]] = mul nsw i32 [[STEP]], 2 ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i64 0 ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -5914,7 +5914,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[TMP20]], ptr [[TMP21]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], [[DOTSPLAT3]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], [[DOTSPLAT3]] ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] ; CHECK: middle.block: @@ -5972,8 +5972,8 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; IND-NEXT: [[IND_END:%.*]] = mul i32 [[STEP]], [[DOTCAST]] ; IND-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0 ; IND-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; IND-NEXT: [[TMP15:%.*]] = mul nuw <2 x i32> [[DOTSPLAT]], -; IND-NEXT: [[TMP16:%.*]] = shl i32 [[STEP]], 1 +; IND-NEXT: [[TMP15:%.*]] = mul nuw nsw <2 x i32> [[DOTSPLAT]], +; IND-NEXT: [[TMP16:%.*]] = shl nsw i32 [[STEP]], 1 ; IND-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP16]], i64 0 ; IND-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer ; IND-NEXT: br label [[VECTOR_BODY:%.*]] @@ -5985,7 +5985,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; IND-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[INDEX]] ; IND-NEXT: store <2 x i32> [[TMP17]], ptr [[TMP18]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], [[DOTSPLAT3]] +; IND-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], [[DOTSPLAT3]] ; IND-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] ; IND: middle.block: @@ -6044,7 +6044,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 ; UNROLL-NEXT: [[IND_END:%.*]] = mul i32 [[STEP]], [[DOTCAST]] ; UNROLL-NEXT: [[TMP16:%.*]] = shl <2 x i32> [[DOTSPLAT]], splat (i32 1) -; UNROLL-NEXT: [[TMP17:%.*]] = mul nuw <2 x i32> [[DOTSPLAT]], +; UNROLL-NEXT: [[TMP17:%.*]] = mul nuw nsw <2 x i32> [[DOTSPLAT]], ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -6058,7 +6058,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NEXT: store <2 x i32> [[TMP18]], ptr [[TMP20]], align 4 ; UNROLL-NEXT: store <2 x i32> [[TMP19]], ptr [[TMP21]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], [[TMP16]] +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[STEP_ADD]], [[TMP16]] ; UNROLL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] ; UNROLL: middle.block: @@ -6120,8 +6120,8 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NO-IC-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP]] ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = mul <2 x i32> splat (i32 2), [[BROADCAST_SPLAT]] -; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = mul <2 x i32> , [[BROADCAST_SPLAT]] -; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <2 x i32> zeroinitializer, [[TMP18]] +; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = mul nsw <2 x i32> , [[BROADCAST_SPLAT]] +; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add nsw <2 x i32> zeroinitializer, [[TMP19]] ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -6135,7 +6135,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP20]], ptr [[TMP22]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP21]], ptr [[TMP24]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], [[TMP17]] +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[STEP_ADD]], [[TMP17]] ; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -6194,7 +6194,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 ; INTERLEAVE-NEXT: [[IND_END:%.*]] = mul i32 [[STEP]], [[DOTCAST]] ; INTERLEAVE-NEXT: [[TMP16:%.*]] = shl <4 x i32> [[DOTSPLAT]], splat (i32 2) -; INTERLEAVE-NEXT: [[TMP17:%.*]] = mul <4 x i32> [[DOTSPLAT]], +; INTERLEAVE-NEXT: [[TMP17:%.*]] = mul nsw <4 x i32> [[DOTSPLAT]], ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -6208,7 +6208,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; INTERLEAVE-NEXT: store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4 ; INTERLEAVE-NEXT: store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], [[TMP16]] +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i32> [[STEP_ADD]], [[TMP16]] ; INTERLEAVE-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] ; INTERLEAVE: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll b/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll index 8975c058c6b79..da8efa77bf5e6 100644 --- a/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll @@ -105,7 +105,7 @@ define i32 @cond_branch(i32 %a, ptr %src) { ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP3]], <4 x i32> splat (i32 10) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -152,7 +152,7 @@ define i32 @optimizable_trunc_used_outside() { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-gep-nowrap-flags.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-gep-nowrap-flags.ll index d6a6fded712a5..7224e26d6a0e6 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-gep-nowrap-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-gep-nowrap-flags.ll @@ -33,12 +33,13 @@ define void @nusw_preservation(ptr noalias %A, ptr %B) { ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 -4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i32> [[VEC_IND]], splat (i32 -4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -94,12 +95,13 @@ define void @inbounds_preservation(ptr noalias %A, ptr %B) { ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 -4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i32> [[VEC_IND]], splat (i32 -4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -155,12 +157,13 @@ define void @nuw_drop(ptr noalias %A, ptr %B) { ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 -4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i32> [[VEC_IND]], splat (i32 -4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -209,8 +212,9 @@ define void @nusw_preservation_2(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -258,8 +262,9 @@ define void @inbounds_preservation_2(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -307,8 +312,9 @@ define void @nuw_drop_2(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll index fc2e2337e0569..de079374eb401 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -345,7 +345,7 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32> [[REVERSE3]], <4 x i32> [[REVERSE4]], <8 x i32> ; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 -4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i32> [[VEC_IND]], splat (i32 -4) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -595,7 +595,7 @@ define void @load_gap_reverse(ptr noalias nocapture %P1, ptr noalias nocapture % ; CHECK-NEXT: store i64 [[TMP27]], ptr [[TMP10]], align 8 ; CHECK-NEXT: store i64 [[TMP28]], ptr [[TMP11]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 -4) ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: @@ -721,7 +721,7 @@ define void @mixed_load3_store3(ptr nocapture %A) { ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <12 x i32> ; CHECK-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: middle.block: @@ -1157,13 +1157,13 @@ define i32 @PR27626_3(ptr %p, i64 %n, i32 %z) { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[DOTSPLIT:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTSPLIT]], i64 4 -; CHECK-NEXT: [[DOTSPLIT3:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP5]] +; CHECK-NEXT: [[DOTSPLIT3:%.*]] = getelementptr inbounds nuw [[PAIR_I32]], ptr [[P]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTSPLIT3]], i64 4 -; CHECK-NEXT: [[DOTSPLIT4:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP7]] +; CHECK-NEXT: [[DOTSPLIT4:%.*]] = getelementptr inbounds nuw [[PAIR_I32]], ptr [[P]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTSPLIT4]], i64 4 -; CHECK-NEXT: [[DOTSPLIT5:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP9]] +; CHECK-NEXT: [[DOTSPLIT5:%.*]] = getelementptr inbounds nuw [[PAIR_I32]], ptr [[P]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTSPLIT5]], i64 4 -; CHECK-NEXT: [[DOTSPLIT6:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP11]] +; CHECK-NEXT: [[DOTSPLIT6:%.*]] = getelementptr inbounds nuw [[PAIR_I32]], ptr [[P]], i64 [[TMP11]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTSPLIT6]], i64 4 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i64 0 @@ -1178,7 +1178,7 @@ define i32 @PR27626_3(ptr %p, i64 %n, i32 %z) { ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP17]] = add <4 x i32> [[STRIDED_VEC2]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: middle.block: @@ -1346,12 +1346,12 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -1) +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 -1) ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP7]], i64 0 ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i64> [[TMP7]], i64 1 ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i64> [[TMP7]], i64 2 ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i64> [[TMP7]], i64 3 -; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -3) +; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 -3) ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP8]], i64 0 ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP8]], i64 1 ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP8]], i64 2 @@ -1385,7 +1385,7 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) { ; CHECK-NEXT: store i32 [[Z]], ptr [[TMP11]], align 4 ; CHECK-NEXT: store i32 [[Z]], ptr [[TMP13]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll index 70b1ea13677b8..69d2aa4c620c1 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll @@ -23,7 +23,7 @@ define i64 @select_decreasing_induction_icmp_const_start(ptr %a) { ; IC1VF4-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3) ; IC1VF4-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4) +; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 -4) ; IC1VF4-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000 ; IC1VF4-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IC1VF4: [[MIDDLE_BLOCK]]: @@ -77,7 +77,7 @@ define i64 @select_decreasing_induction_icmp_const_start(ptr %a) { ; IC4VF4-NEXT: [[TMP15]] = select <4 x i1> [[TMP11]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]] ; IC4VF4-NEXT: [[TMP16]] = select <4 x i1> [[TMP12]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]] ; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 -4) +; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[STEP_ADD_3]], splat (i64 -4) ; IC4VF4-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000 ; IC4VF4-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IC4VF4: [[MIDDLE_BLOCK]]: @@ -180,7 +180,7 @@ define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) { ; IC1VF4-NEXT: [[TMP4:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1) ; IC1VF4-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[VEC_PHI]] ; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 -4) +; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -4) ; IC1VF4-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12 ; IC1VF4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IC1VF4: [[MIDDLE_BLOCK]]: @@ -398,7 +398,7 @@ define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) { ; IC4VF4-NEXT: [[TMP114:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP110]], <4 x i16> [[VEC_PHI2]] ; IC4VF4-NEXT: [[TMP115:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP111]], <4 x i16> [[VEC_PHI3]] ; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 -; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD_3]], splat (i16 -4) +; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i16> [[STEP_ADD_3]], splat (i16 -4) ; IC4VF4-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IC4VF4: [[MIDDLE_BLOCK]]: ; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP112]], <4 x i16> [[TMP113]]) @@ -506,7 +506,7 @@ define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) { ; IC1VF4-NEXT: [[TMP4:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1) ; IC1VF4-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[VEC_PHI]] ; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 -4) +; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -4) ; IC1VF4-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12 ; IC1VF4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IC1VF4: [[MIDDLE_BLOCK]]: @@ -724,7 +724,7 @@ define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) { ; IC4VF4-NEXT: [[TMP114:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP110]], <4 x i16> [[VEC_PHI2]] ; IC4VF4-NEXT: [[TMP115:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP111]], <4 x i16> [[VEC_PHI3]] ; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 -; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD_3]], splat (i16 -4) +; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i16> [[STEP_ADD_3]], splat (i16 -4) ; IC4VF4-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IC4VF4: [[MIDDLE_BLOCK]]: ; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP112]], <4 x i16> [[TMP113]]) @@ -829,7 +829,7 @@ define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) { ; IC1VF4-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3) ; IC1VF4-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4) +; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 -4) ; IC1VF4-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], -9223372036854775808 ; IC1VF4-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IC1VF4: [[MIDDLE_BLOCK]]: @@ -883,7 +883,7 @@ define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) { ; IC4VF4-NEXT: [[TMP15]] = select <4 x i1> [[TMP11]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]] ; IC4VF4-NEXT: [[TMP16]] = select <4 x i1> [[TMP12]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]] ; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 -4) +; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[STEP_ADD_3]], splat (i64 -4) ; IC4VF4-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], -9223372036854775808 ; IC4VF4-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IC4VF4: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll index c958ea7b9b88e..21ef1885b75b9 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll @@ -22,7 +22,7 @@ define i64 @select_icmp_nuw_nsw(ptr %a, ptr %b, i64 %ii, i64 %n) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP6]] = select <4 x i1> [[TMP5]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP9]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -92,7 +92,7 @@ define i64 @select_icmp_nsw(ptr %a, ptr %b, i64 %ii, i64 %n) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP6]] = select <4 x i1> [[TMP5]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP9]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll index b991d58eb2b8d..c55b089043e25 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll @@ -30,7 +30,7 @@ define i32 @select_icmp_const_truncated_iv_widened_exit(ptr %a, i32 %n) { ; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], splat (i64 3) ; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: @@ -101,7 +101,7 @@ define i32 @select_icmp_const_truncated_iv_widened_exit(ptr %a, i32 %n) { ; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI2]] ; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI3]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[STEP_ADD_3]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: @@ -253,7 +253,7 @@ define i32 @select_icmp_const_truncated_iv_const_exit(ptr %a) { ; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], splat (i64 3) ; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000 ; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: @@ -297,7 +297,7 @@ define i32 @select_icmp_const_truncated_iv_const_exit(ptr %a) { ; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI2]] ; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI3]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[STEP_ADD_3]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000 ; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: @@ -396,7 +396,7 @@ define i32 @select_fcmp_max_valid_const_ub(ptr %a) { ; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD]], zeroinitializer ; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648 ; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: @@ -440,7 +440,7 @@ define i32 @select_fcmp_max_valid_const_ub(ptr %a) { ; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI2]] ; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI3]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[STEP_ADD_3]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648 ; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: @@ -544,7 +544,7 @@ define i32 @select_icmp_truncated_unsigned_iv_range(ptr %a) { ; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], splat (i32 3) ; CHECK-VF4IC1-NEXT: [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-VF4IC1-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648 ; CHECK-VF4IC1-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: @@ -589,7 +589,7 @@ define i32 @select_icmp_truncated_unsigned_iv_range(ptr %a) { ; CHECK-VF4IC4-NEXT: [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI2]] ; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI3]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[STEP_ADD_3]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648 ; CHECK-VF4IC4-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll index 91c83103bf8f2..8d3bd267b9482 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll @@ -22,7 +22,7 @@ define i64 @select_icmp_const_1(ptr %a, i64 %n) { ; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 3) ; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: @@ -85,7 +85,7 @@ define i64 @select_icmp_const_1(ptr %a, i64 %n) { ; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]] ; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: @@ -215,7 +215,7 @@ define i64 @select_icmp_const_2(ptr %a, i64 %n) { ; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 3) ; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_PHI]], <4 x i64> [[VEC_IND]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: @@ -278,7 +278,7 @@ define i64 @select_icmp_const_2(ptr %a, i64 %n) { ; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i64> [[VEC_PHI2]], <4 x i64> [[STEP_ADD_2]] ; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i64> [[VEC_PHI3]], <4 x i64> [[STEP_ADD_3]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: @@ -408,7 +408,7 @@ define i64 @select_icmp_const_3_variable_rdx_start(ptr %a, i64 %rdx.start, i64 % ; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 3) ; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: @@ -471,7 +471,7 @@ define i64 @select_icmp_const_3_variable_rdx_start(ptr %a, i64 %rdx.start, i64 % ; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]] ; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: @@ -601,7 +601,7 @@ define i64 @select_fcmp_const_fast(ptr %a, i64 %n) { ; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) ; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: @@ -664,7 +664,7 @@ define i64 @select_fcmp_const_fast(ptr %a, i64 %n) { ; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]] ; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: @@ -794,7 +794,7 @@ define i64 @select_fcmp_const(ptr %a, i64 %n) { ; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = fcmp ueq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) ; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: @@ -857,7 +857,7 @@ define i64 @select_fcmp_const(ptr %a, i64 %n) { ; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]] ; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: @@ -989,7 +989,7 @@ define i64 @select_icmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) { ; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-VF4IC1-NEXT: [[TMP6]] = select <4 x i1> [[TMP5]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC1-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: @@ -1062,7 +1062,7 @@ define i64 @select_icmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) { ; CHECK-VF4IC4-NEXT: [[TMP17]] = select <4 x i1> [[TMP13]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]] ; CHECK-VF4IC4-NEXT: [[TMP18]] = select <4 x i1> [[TMP14]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC4-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: @@ -1208,7 +1208,7 @@ define i64 @select_fcmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) { ; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-VF4IC1-NEXT: [[TMP6]] = select <4 x i1> [[TMP5]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC1-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: @@ -1281,7 +1281,7 @@ define i64 @select_fcmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) { ; CHECK-VF4IC4-NEXT: [[TMP17]] = select <4 x i1> [[TMP13]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]] ; CHECK-VF4IC4-NEXT: [[TMP18]] = select <4 x i1> [[TMP14]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC4-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: @@ -1428,7 +1428,7 @@ define i64 @select_icmp_min_valid_iv_start(ptr %a, ptr %b, i64 %rdx.start, i64 % ; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-VF4IC1-NEXT: [[TMP6]] = select <4 x i1> [[TMP5]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC1-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: @@ -1505,7 +1505,7 @@ define i64 @select_icmp_min_valid_iv_start(ptr %a, ptr %b, i64 %rdx.start, i64 % ; CHECK-VF4IC4-NEXT: [[TMP17]] = select <4 x i1> [[TMP13]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI3]] ; CHECK-VF4IC4-NEXT: [[TMP18]] = select <4 x i1> [[TMP14]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI4]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC4-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: @@ -1661,7 +1661,7 @@ define i64 @select_icmp_unsigned_iv_range(ptr %a, ptr %b, i64 %rdx.start) { ; CHECK-VF4IC1-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-VF4IC1-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9223372036854775804 ; CHECK-VF4IC1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: @@ -1730,7 +1730,7 @@ define i64 @select_icmp_unsigned_iv_range(ptr %a, ptr %b, i64 %rdx.start) { ; CHECK-VF4IC4-NEXT: [[TMP16]] = select <4 x i1> [[TMP12]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]] ; CHECK-VF4IC4-NEXT: [[TMP17]] = select <4 x i1> [[TMP13]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9223372036854775792 ; CHECK-VF4IC4-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll index 86515ebe25637..162803a377bc0 100644 --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -388,7 +388,7 @@ define i64 @iv_scalar_steps_and_outside_users(ptr %ptr) { ; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[INDEX]] ; VEC-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP1]], align 4 ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VEC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; VEC-NEXT: [[VEC_IND_NEXT]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) ; VEC-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1002 ; VEC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; VEC: [[MIDDLE_BLOCK]]: @@ -448,7 +448,7 @@ define i32 @iv_2_dead_in_loop_only_used_outside(ptr %ptr) { ; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[INDEX]] ; VEC-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP1]], align 4 ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VEC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; VEC-NEXT: [[VEC_IND_NEXT]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) ; VEC-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1002 ; VEC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; VEC: [[MIDDLE_BLOCK]]: @@ -597,7 +597,7 @@ define i32 @postinc_not_iv_backedge_value(i32 %k) { ; VEC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VEC-NEXT: [[TMP0:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; VEC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; VEC-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; VEC-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; VEC-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; VEC: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll index 00256a5c4a456..4b3f370f4cdaf 100644 --- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll @@ -418,7 +418,7 @@ define void @test_rev_loops_non_deref_loads(ptr nocapture noundef writeonly %des ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] ; CHECK: pred.store.continue2: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 -2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 -2) ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: @@ -577,7 +577,7 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] ; CHECK: pred.store.continue2: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 -2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 -2) ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: @@ -684,7 +684,7 @@ define void @adding_offset_overflows(i32 %n, ptr %A) { ; CHECK: pred.store.continue4: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -707,7 +707,7 @@ define void @adding_offset_overflows(i32 %n, ptr %A) { ; CHECK: loop.latch: ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: diff --git a/llvm/test/Transforms/LoopVectorize/loop-scalars.ll b/llvm/test/Transforms/LoopVectorize/loop-scalars.ll index f8ddd344f5587..a598f154ef54b 100644 --- a/llvm/test/Transforms/LoopVectorize/loop-scalars.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-scalars.ll @@ -21,7 +21,7 @@ define void @vector_gep(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x ptr> [[TMP0]], ptr [[TMP1]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll index 1e4a98d22bf17..9c73d85bd58ff 100644 --- a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll @@ -23,14 +23,14 @@ define i32 @test1() { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I1:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <2 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -38,18 +38,18 @@ define i32 @test1() { ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] -; CHECK: [[_LR_PH_I]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I1]] +; CHECK: [[_LR_PH_I1]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I1:.*:]] -; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] +; CHECK: [[_LR_PH_I:.*:]] +; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I1]] ] ; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10 ; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]] ; CHECK: [[UNNAMEDBB10]]: @@ -96,14 +96,14 @@ define i32 @test2() { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I1:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <2 x i32> [[DOTSPLAT]], ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -111,18 +111,18 @@ define i32 @test2() { ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> splat (i32 1), <2 x i32> [[VEC_IND]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] -; CHECK: [[_LR_PH_I]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I1]] +; CHECK: [[_LR_PH_I1]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I1:.*:]] -; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] +; CHECK: [[_LR_PH_I:.*:]] +; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I1]] ] ; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10 ; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]] ; CHECK: [[UNNAMEDBB10]]: @@ -169,7 +169,7 @@ define i32 @test3(i32 %N) { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I1:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -178,7 +178,7 @@ define i32 @test3(i32 %N) { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <2 x i32> [[DOTSPLAT]], ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -189,18 +189,18 @@ define i32 @test3(i32 %N) { ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> zeroinitializer, <2 x i32> splat (i32 2) ; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[PREDPHI]], <2 x i32> splat (i32 1) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[PREDPHI1]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I1]] -; CHECK: [[_LR_PH_I1]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] +; CHECK: [[_LR_PH_I]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I:.*:]] -; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I1]] ] +; CHECK: [[_LR_PH_I1:.*:]] +; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] ; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10 ; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]] ; CHECK: [[UNNAMEDBB10]]: @@ -257,14 +257,14 @@ define i32 @test4(i32 %N) { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I1:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <2 x i32> [[DOTSPLAT]], ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -272,18 +272,18 @@ define i32 @test4(i32 %N) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] -; CHECK: [[_LR_PH_I]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT_LOOPEXIT:.*]], label %[[_LR_PH_I1]] +; CHECK: [[_LR_PH_I1]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[DOTLR_PH_I_PREHEADER]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I1:.*:]] -; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] +; CHECK: [[_LR_PH_I:.*:]] +; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I1]] ] ; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10 ; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]] ; CHECK: [[UNNAMEDBB10]]: @@ -519,14 +519,14 @@ define i8 @outside_user_non_phi() { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I1:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <2 x i32> [[DOTSPLAT]], ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -535,18 +535,18 @@ define i8 @outside_user_non_phi() { ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i32> [[PREDPHI]] to <2 x i8> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP4]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] -; CHECK: [[_LR_PH_I]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I1]] +; CHECK: [[_LR_PH_I1]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I1:.*:]] -; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] +; CHECK: [[_LR_PH_I:.*:]] +; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I1]] ] ; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10 ; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]] ; CHECK: [[UNNAMEDBB10]]: @@ -650,14 +650,14 @@ define i32 @sum_arrays_outside_use(ptr %B, ptr %A, ptr %C, i32 %N) { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N]], i32 [[TMP0]]) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I1:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[C1]], [[B2]] ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP2]], 8 ; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[C1]], [[A3]] ; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i32 [[TMP3]], 8 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] -; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[_LR_PH_I]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[_LR_PH_I1]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -680,12 +680,12 @@ define i32 @sum_arrays_outside_use(ptr %B, ptr %A, ptr %C, i32 %N) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[TMP11]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] -; CHECK: [[_LR_PH_I]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I1]] +; CHECK: [[_LR_PH_I1]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ], [ [[B_PROMOTED]], %[[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I1:.*:]] -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IVNEXT:%.*]], %[[DOTLR_PH_I]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] +; CHECK: [[_LR_PH_I:.*:]] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IVNEXT:%.*]], %[[DOTLR_PH_I]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I1]] ] ; CHECK-NEXT: [[INDVARS_IV:%.*]] = sext i32 [[IV]] to i64 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[BLOAD:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 @@ -742,7 +742,7 @@ define i32 @non_uniform_live_out() { ; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i8> [[WIDE_LOAD]], splat (i8 1) ; CHECK-NEXT: store <2 x i8> [[TMP4]], ptr [[TMP2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20000 ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll b/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll index 481fa04cf7164..30d01e8b790a7 100644 --- a/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll +++ b/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll @@ -175,7 +175,7 @@ define void @predicated_noalias_scope_decl(ptr noalias nocapture readonly %a, pt ; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[TMP7]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP6]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll index bfc7feecafbc4..81095290bcc92 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -654,7 +654,7 @@ define void @f4(ptr noalias %A, i32 signext %n) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x i8> [[TMP1]], ptr [[TMP2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -670,16 +670,16 @@ define void @f4(ptr noalias %A, i32 signext %n) { ; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[VEC_EPILOG_RESUME_VAL]] to i32 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i32> [[DOTSPLAT]], ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND7:%.*]] = phi <4 x i32> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT8:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND7:%.*]] = phi <4 x i32> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT7:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i32> [[VEC_IND7]] to <4 x i8> ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX6]] ; CHECK-NEXT: store <4 x i8> [[TMP7]], ptr [[TMP8]], align 1 ; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <4 x i32> [[VEC_IND7]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT7]] = add nuw nsw <4 x i32> [[VEC_IND7]], splat (i32 4) ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP10]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: @@ -720,7 +720,7 @@ define void @f4(ptr noalias %A, i32 signext %n) { ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store <4 x i8> [[TMP1]], ptr [[TMP2]], align 1 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK-PROFITABLE-BY-DEFAULT: [[MIDDLE_BLOCK]]: @@ -736,16 +736,16 @@ define void @f4(ptr noalias %A, i32 signext %n) { ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP5:%.*]] = trunc i64 [[VEC_EPILOG_RESUME_VAL]] to i32 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i64 0 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT]], ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-PROFITABLE-BY-DEFAULT: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND7:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT8:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND7:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT7:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP7:%.*]] = trunc <2 x i32> [[VEC_IND7]] to <2 x i8> ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX6]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store <2 x i8> [[TMP7]], ptr [[TMP8]], align 1 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT8]] = add <2 x i32> [[VEC_IND7]], splat (i32 2) +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT7]] = add nuw nsw <2 x i32> [[VEC_IND7]], splat (i32 2) ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC3]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP10]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-PROFITABLE-BY-DEFAULT: [[VEC_EPILOG_MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll index f9f7feb7bdfbc..f57041257c919 100644 --- a/llvm/test/Transforms/LoopVectorize/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/optsize.ll @@ -571,7 +571,7 @@ define i32 @pr45526_pgso() !prof !14 { ; NPGSO-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; NPGSO-NEXT: [[TMP0:%.*]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 1) ; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; NPGSO-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; NPGSO-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; NPGSO-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 508 ; NPGSO-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; NPGSO: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll b/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll index 8a77d14b9d3ba..f7986d8f1f928 100644 --- a/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll +++ b/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll @@ -40,7 +40,7 @@ define void @inner_latch_header_first_successor(i64 %N, i32 %c, i64 %M) { ; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x i64> [ [[TMP3]], %[[INNER3]] ] ; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[TMP10]], <4 x ptr> align 4 [[TMP0]], <4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -142,7 +142,7 @@ define void @inner_latch_header_second_successor(i64 %N, i32 %c, i64 %M) { ; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i64> [ [[TMP3]], %[[INNER3]] ] ; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[TMP9]], <4 x ptr> align 4 [[TMP0]], <4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll b/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll index 2e17f7adca279..32dbc273cb6b6 100644 --- a/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll +++ b/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll @@ -42,7 +42,7 @@ define void @test(ptr %src, i64 %n) { ; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_1_LATCH5]], label [[LOOP_2_HEADER1]] ; CHECK: vector.latch: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/outer-loop-wide-phis.ll b/llvm/test/Transforms/LoopVectorize/outer-loop-wide-phis.ll index 59e3d71f27a38..afd5000818bfc 100644 --- a/llvm/test/Transforms/LoopVectorize/outer-loop-wide-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/outer-loop-wide-phis.ll @@ -36,7 +36,7 @@ define void @wide_phi_2_predecessors(ptr noalias %A, ptr noalias %B, i32 %c, i1 ; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x i64> [ [[TMP3]], %[[INNER_LATCH3]] ] ; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[TMP10]], <4 x ptr> align 8 [[TMP0]], <4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -144,7 +144,7 @@ define void @wide_phi_2_predecessors_phi_ops_swapped(ptr noalias %A, ptr noalias ; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x i64> [ [[TMP3]], %[[INNER_LATCH3]] ] ; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[TMP10]], <4 x ptr> align 8 [[TMP0]], <4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll index b2f1954ca989b..020f84209b23b 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll @@ -53,7 +53,7 @@ define void @non_outermost_loop_hcfg_construction(i64 %n, ptr %a) { ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_LOOP_LATCH4]], label [[INNERMOST_LOOP3]] ; CHECK: vector.latch: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -196,7 +196,7 @@ define void @non_outermost_loop_hcfg_construction_other_loops_at_same_level(i64 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_LOOP_J0_CLEANUP4]], label [[INNERMOST_LOOP3]] ; CHECK: vector.latch: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll index 47743753349ed..b224a5a86b83d 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll @@ -25,8 +25,8 @@ define void @foo() { ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul [[TMP4]], splat (i64 1) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw nsw [[TMP4]], splat (i64 1) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP5]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -50,7 +50,7 @@ define void @foo() { ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi [ [[TMP12]], [[INNER_LOOP1]] ] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0( [[VEC_PHI5]], align 4 [[TMP10]], splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll index 4086c79082cce..0a19e0ec55a4b 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll @@ -48,7 +48,7 @@ define void @foo(i32 %n) { ; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_LATCH]], label %[[FOR_BODY31]] ; CHECK: [[VECTOR_LATCH]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll index fb9b1c7d62e3e..5fcd6db2eb28a 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll @@ -50,7 +50,7 @@ ; CHECK: [[ForInc]]: ; CHECK: %[[IndNext]] = add nuw i64 %[[Ind]], 4 -; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], splat (i64 4) +; CHECK: %[[VecIndNext]] = add nuw nsw <4 x i64> %[[VecInd]], splat (i64 4) ; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], {{.*}} ; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll index c491477c4d2be..623a9435edec1 100644 --- a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll +++ b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll @@ -71,9 +71,9 @@ define void @doit1(i32 %n, i32 %step) local_unnamed_addr { ; CHECK-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[STEP]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = mul <4 x i32> , [[DOTSPLAT]] -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> zeroinitializer, [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[STEP]], 4 +; CHECK-NEXT: [[TMP19:%.*]] = mul nsw <4 x i32> , [[DOTSPLAT]] +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP19]] +; CHECK-NEXT: [[TMP18:%.*]] = mul nsw i32 [[STEP]], 4 ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP18]], i64 0 ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -83,7 +83,7 @@ define void @doit1(i32 %n, i32 %step) local_unnamed_addr { ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [250 x i32], ptr @a, i64 0, i64 [[INDEX]] ; CHECK-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP20]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], [[DOTSPLAT3]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i32> [[VEC_IND]], [[DOTSPLAT3]] ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -192,9 +192,9 @@ define void @doit2(i32 %n, i32 %step) local_unnamed_addr { ; CHECK-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[STEP]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP16:%.*]] = mul <4 x i32> , [[DOTSPLAT]] -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> zeroinitializer, [[TMP16]] -; CHECK-NEXT: [[TMP17:%.*]] = mul i32 [[STEP]], 4 +; CHECK-NEXT: [[TMP18:%.*]] = mul nsw <4 x i32> , [[DOTSPLAT]] +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP18]] +; CHECK-NEXT: [[TMP17:%.*]] = mul nsw i32 [[STEP]], 4 ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP17]], i64 0 ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -204,7 +204,7 @@ define void @doit2(i32 %n, i32 %step) local_unnamed_addr { ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [250 x i32], ptr @a, i64 0, i64 [[INDEX]] ; CHECK-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP19]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], [[DOTSPLAT3]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i32> [[VEC_IND]], [[DOTSPLAT3]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -385,9 +385,9 @@ define void @doit4(i32 %n, i8 signext %cstep) local_unnamed_addr { ; CHECK-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[CONV]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[CONV]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = mul <4 x i32> , [[DOTSPLAT]] -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> zeroinitializer, [[TMP14]] -; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[CONV]], 4 +; CHECK-NEXT: [[TMP16:%.*]] = mul nsw <4 x i32> , [[DOTSPLAT]] +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP16]] +; CHECK-NEXT: [[TMP15:%.*]] = mul nsw i32 [[CONV]], 4 ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP15]], i64 0 ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -397,7 +397,7 @@ define void @doit4(i32 %n, i8 signext %cstep) local_unnamed_addr { ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [250 x i32], ptr @a, i64 0, i64 [[INDEX]] ; CHECK-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP17]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], [[DOTSPLAT3]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i32> [[VEC_IND]], [[DOTSPLAT3]] ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/pr34681.ll b/llvm/test/Transforms/LoopVectorize/pr34681.ll index 0f509a5c4eeb3..a04a4e9eea6fe 100644 --- a/llvm/test/Transforms/LoopVectorize/pr34681.ll +++ b/llvm/test/Transforms/LoopVectorize/pr34681.ll @@ -80,7 +80,7 @@ define i32 @foo1(i32 %N, ptr nocapture readnone %A, ptr nocapture readonly %B, i ; CHECK-NEXT: [[TMP28:%.*]] = sext <4 x i16> [[TMP27]] to <4 x i32> ; CHECK-NEXT: [[TMP29]] = add <4 x i32> [[VEC_PHI]], [[TMP28]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP30]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -185,7 +185,7 @@ define i32 @foo2(i16 zeroext %N, ptr nocapture readnone %A, ptr nocapture readon ; CHECK-NEXT: [[TMP25:%.*]] = sext <4 x i16> [[TMP24]] to <4 x i32> ; CHECK-NEXT: [[TMP26]] = add <4 x i32> [[VEC_PHI]], [[TMP25]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP27]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/pr35773.ll b/llvm/test/Transforms/LoopVectorize/pr35773.ll index b7165156e28fd..00b9e9133af9b 100644 --- a/llvm/test/Transforms/LoopVectorize/pr35773.ll +++ b/llvm/test/Transforms/LoopVectorize/pr35773.ll @@ -17,8 +17,8 @@ define void @doit1(ptr %ptr) { ; CHECK-NEXT: store <4 x i32> [[I32_IV]], ptr [[GEP1]], align 4 ; CHECK-NEXT: [[MAIN_IV_NEXT]] = add nuw i32 [[MAIN_IV]], 4 -; CHECK-NEXT: [[I32_IV_NEXT]] = add <4 x i32> [[I32_IV]], splat (i32 36) -; CHECK-NEXT: [[IV_FROM_TRUNC_NEXT]] = add <4 x i8> [[IV_FROM_TRUNC]], splat (i8 36) +; CHECK-NEXT: [[I32_IV_NEXT]] = add nuw nsw <4 x i32> [[I32_IV]], splat (i32 36) +; CHECK-NEXT: [[IV_FROM_TRUNC_NEXT]] = add nuw nsw <4 x i8> [[IV_FROM_TRUNC]], splat (i8 36) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[MAIN_IV_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 ; diff --git a/llvm/test/Transforms/LoopVectorize/pr36983-multiple-lcssa.ll b/llvm/test/Transforms/LoopVectorize/pr36983-multiple-lcssa.ll index 98963a72c5ad0..310c7729a6b63 100644 --- a/llvm/test/Transforms/LoopVectorize/pr36983-multiple-lcssa.ll +++ b/llvm/test/Transforms/LoopVectorize/pr36983-multiple-lcssa.ll @@ -14,7 +14,7 @@ define i16 @duplicate_lcssa(i16 %val) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = sub nsw <4 x i16> [[VEC_IND]], splat (i16 1) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 -4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -4) ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536 ; CHECK-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll b/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll index 1bb6454cdeea2..c4a5a8b18dd5d 100644 --- a/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll +++ b/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll @@ -72,12 +72,13 @@ define void @scev4stride1(ptr noalias nocapture %a, ptr noalias nocapture readon ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]] ; CHECK-NEXT: store <4 x i32> [[TMP20]], ptr [[TMP21]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll index ebd532aa5032c..f17866718a0ee 100644 --- a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll +++ b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll @@ -27,7 +27,7 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 { ; CHECK-NEXT: [[PREDPHI6:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> [[TMP0]], <2 x i32> [[TMP3]] ; CHECK-NEXT: [[PREDPHI7]] = select i1 [[C_2]], <2 x i32> [[VEC_PHI]], <2 x i32> [[PREDPHI6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 176 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll index cf973affae5f2..5654dbd727f85 100644 --- a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll +++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll @@ -224,7 +224,7 @@ define void @predicated_phi_dbg(i64 %n, ptr %x) { ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i64, ptr [[X]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP21]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -304,7 +304,7 @@ define void @predicated_phi_dbg(i64 %n, ptr %x) { ; DEBUGLOC-NEXT: [[TMP21:%.*]] = getelementptr i64, ptr [[X]], i64 [[INDEX]], !dbg [[DBG57:![0-9]+]] ; DEBUGLOC-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP21]], align 8, !dbg [[DBG58:![0-9]+]] ; DEBUGLOC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4, !dbg [[DBG53]] -; DEBUGLOC-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4), !dbg [[DBG53]] +; DEBUGLOC-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4), !dbg [[DBG53]] ; DEBUGLOC-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]], !dbg [[DBG59:![0-9]+]] ; DEBUGLOC-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !dbg [[DBG59]], !llvm.loop [[LOOP60:![0-9]+]] ; DEBUGLOC: [[MIDDLE_BLOCK]]: @@ -385,7 +385,7 @@ define void @scalar_cast_dbg(ptr nocapture %a, i32 %start, i64 %k) { ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP5]] ; CHECK-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -428,7 +428,7 @@ define void @scalar_cast_dbg(ptr nocapture %a, i32 %start, i64 %k) { ; DEBUGLOC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP5]], !dbg [[DBG77:![0-9]+]] ; DEBUGLOC-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP6]], align 4, !dbg [[DBG78:![0-9]+]] ; DEBUGLOC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4, !dbg [[DBG75]] -; DEBUGLOC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4), !dbg [[DBG76]] +; DEBUGLOC-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4), !dbg [[DBG76]] ; DEBUGLOC-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]], !dbg [[DBG79:![0-9]+]] ; DEBUGLOC-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !dbg [[DBG79]], !llvm.loop [[LOOP80:![0-9]+]] ; DEBUGLOC: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll index 5f54b0ac7834a..caf1a934ed70a 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll @@ -28,7 +28,7 @@ define i8 @PR34687(i1 %c, i32 %x, i32 %n, i32 %divisor) { ; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i8> ; CHECK-NEXT: [[TMP4]] = zext <4 x i8> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll index c708715c623e6..dfdf1100eb57b 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll @@ -365,7 +365,7 @@ define void @reduc_store_inside_unrolled(ptr %dst, ptr readonly %src) { ; CHECK-NEXT: [[TMP33:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP29]], i32 3 ; CHECK-NEXT: [[TMP34]] = add <4 x i32> [[TMP33]], [[TMP16]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; CHECK-NEXT: br i1 [[TMP35]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -571,7 +571,7 @@ define void @reduc_store_middle_store_predicated(ptr %dst, ptr readonly %src) { ; CHECK-NEXT: [[TMP33:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP29]], i32 3 ; CHECK-NEXT: [[TMP34]] = add <4 x i32> [[TMP33]], [[TMP16]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; CHECK-NEXT: br i1 [[TMP35]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll index c858f201e01fa..d140bc09fe731 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll @@ -161,8 +161,8 @@ define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], splat (i64 1) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; CHECK-NEXT: [[TMP8:%.*]] = mul nuw nsw [[TMP7]], splat (i64 1) +; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP8]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -187,7 +187,7 @@ define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-NEXT: store [[TMP15]], ptr [[TMP17]], align 4 ; CHECK-NEXT: store [[TMP16]], ptr [[TMP20]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[STEP_ADD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll index 359132f7904cc..d87d39e684993 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll @@ -43,7 +43,7 @@ define void @add_ind64_unrolled(ptr noalias nocapture %a, ptr noalias nocapture ; CHECK-NEXT: store [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store [[TMP13]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[STEP_ADD]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -119,7 +119,7 @@ define void @add_ind64_unrolled_nxv1i64(ptr noalias nocapture %a, ptr noalias no ; CHECK-NEXT: store [[TMP11]], ptr [[TMP13]], align 8 ; CHECK-NEXT: store [[TMP12]], ptr [[TMP15]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[STEP_ADD]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: @@ -184,9 +184,9 @@ define void @add_unique_ind32(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 ; CHECK-NEXT: [[IND_END:%.*]] = shl i32 [[DOTCAST]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i32() -; CHECK-NEXT: [[TMP7:%.*]] = shl [[TMP6]], splat (i32 1) +; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw [[TMP6]], splat (i32 1) ; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP3]] to i32 -; CHECK-NEXT: [[TMP9:%.*]] = shl i32 [[TMP8]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i32 [[TMP8]], 1 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP9]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -196,7 +196,7 @@ define void @add_unique_ind32(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store [[VEC_IND]], ptr [[TMP10]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll index cde2de73b7bfd..70127a6762423 100644 --- a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll @@ -25,7 +25,7 @@ define void @single_incoming_phi_no_blend_mask(i64 %a, i64 %b) { ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[INDEX]] ; CHECK-NEXT: store <2 x i16> [[PREDPHI]], ptr [[TMP6]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -87,7 +87,7 @@ define void @single_incoming_phi_with_blend_mask(i64 %a, i64 %b) { ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[INDEX]] ; CHECK-NEXT: store <2 x i16> [[PREDPHI1]], ptr [[TMP9]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: @@ -152,9 +152,9 @@ define void @multiple_incoming_phi_with_blend_mask(i64 %a, ptr noalias %dst) { ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[DST:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x i16> [[TMP9]], ptr [[TMP10]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) -; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], splat (i16 2) -; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i16> [[VEC_IND3]], splat (i16 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT2]] = add nuw nsw <2 x i16> [[VEC_IND1]], splat (i16 2) +; CHECK-NEXT: [[VEC_IND_NEXT4]] = add nuw nsw <2 x i16> [[VEC_IND3]], splat (i16 2) ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -231,7 +231,7 @@ define void @single_incoming_needs_predication(i64 %a, i64 %b) { ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[INDEX]] ; CHECK-NEXT: store <2 x i16> [[PREDPHI3]], ptr [[TMP18]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: @@ -283,7 +283,7 @@ define void @duplicated_incoming_blocks_blend(i32 %x, ptr %ptr) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i32 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/uitofp-preserve-nneg.ll b/llvm/test/Transforms/LoopVectorize/uitofp-preserve-nneg.ll index b85f2746a0b14..98203a245f863 100644 --- a/llvm/test/Transforms/LoopVectorize/uitofp-preserve-nneg.ll +++ b/llvm/test/Transforms/LoopVectorize/uitofp-preserve-nneg.ll @@ -18,7 +18,7 @@ define void @uitofp_preserve_nneg(ptr %result, i32 %size, float %y) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[RESULT:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX1]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll index 3b515a2acb1a7..24dc182fe24a1 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll @@ -114,7 +114,7 @@ define void @blend_chain_iv(i1 %c) { ; CHECK-NEXT: store i16 0, ptr [[TMP6]], align 2 ; CHECK-NEXT: store i16 0, ptr [[TMP8]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[PREDPHI1]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[PREDPHI1]], splat (i64 4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll index 927fefc73ceea..f80d7b695e2af 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll @@ -112,7 +112,7 @@ define void @ld_div3_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP10]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -168,7 +168,7 @@ define void @ld_div1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: @@ -272,7 +272,7 @@ define void @ld_div3_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: @@ -328,7 +328,7 @@ define void @ld_div1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -383,7 +383,7 @@ define void @ld_div2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -520,7 +520,7 @@ define void @ld_div2_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP10]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: @@ -569,7 +569,7 @@ define void @ld_div3_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP10]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: @@ -625,7 +625,7 @@ define void @ld_div1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: @@ -729,7 +729,7 @@ define void @ld_div3_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: @@ -785,7 +785,7 @@ define void @ld_div1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: @@ -841,7 +841,7 @@ define void @ld_div2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; CHECK: middle.block: @@ -937,7 +937,7 @@ define void @test_step_is_not_invariant(ptr %A) { ; CHECK-NEXT: store i16 [[TMP1]], ptr [[TMP9]], align 2 ; CHECK-NEXT: store i16 [[TMP2]], ptr [[TMP10]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], 56 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll index d6277d657ea7e..0f191b2d8a278 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll @@ -112,7 +112,7 @@ define void @ld_and_neg3_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP10]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -168,7 +168,7 @@ define void @ld_and_neg1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: @@ -270,7 +270,7 @@ define void @ld_and_neg1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: @@ -325,7 +325,7 @@ define void @ld_and_neg2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: @@ -374,7 +374,7 @@ define void @ld_and_neg2_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP10]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: @@ -430,7 +430,7 @@ define void @ld_and_neg2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: middle.block: @@ -486,7 +486,7 @@ define void @ld_and_neg2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: middle.block: @@ -542,7 +542,7 @@ define void @ld_and_neg3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll index edf04bbcbcdff..7ff10c544f72a 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll @@ -53,7 +53,7 @@ define void @ld_div2_urem3_1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: store <8 x i64> [[TMP34]], ptr [[TMP35]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <8 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -129,7 +129,7 @@ define void @ld_div2_urem3_2(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: store <8 x i64> [[TMP35]], ptr [[TMP36]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <8 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: @@ -203,7 +203,7 @@ define void @ld_div4(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: store <8 x i64> [[TMP33]], ptr [[TMP34]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <8 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll index 32873a4e90e81..a5bb07f1fd4ef 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll @@ -124,7 +124,7 @@ define void @ld_lshr1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; VF4-NEXT: store <4 x i64> [[TMP17]], ptr [[TMP18]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; VF4-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF4-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; VF4: middle.block: @@ -248,7 +248,7 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) ; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; VF2: middle.block: @@ -301,7 +301,7 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8 ; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8) ; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; VF4: middle.block: @@ -442,7 +442,7 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VF2: middle.block: @@ -494,7 +494,7 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8 ; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VF4: middle.block: @@ -549,7 +549,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; VF2: middle.block: @@ -601,7 +601,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8 ; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; VF4: middle.block: @@ -651,7 +651,7 @@ define void @ld_lshr1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP10]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; VF2-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; VF2-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; VF2: middle.block: @@ -689,7 +689,7 @@ define void @ld_lshr1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: store <4 x i64> [[TMP17]], ptr [[TMP18]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; VF4-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 ; VF4-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; VF4: middle.block: @@ -830,7 +830,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; VF2: middle.block: @@ -883,7 +883,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8 ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; VF4: middle.block: @@ -939,7 +939,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; VF2: middle.block: @@ -992,7 +992,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8 ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; VF4: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll index 607d1365098f2..fb962c017156d 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll @@ -29,8 +29,8 @@ define void @ld_div1_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; VF2-NEXT: store <2 x i64> [[TMP11]], ptr [[TMP12]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF2: middle.block: @@ -71,8 +71,8 @@ define void @ld_div1_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; VF4-NEXT: store <4 x i64> [[TMP19]], ptr [[TMP20]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF4: middle.block: @@ -162,8 +162,8 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; VF4-NEXT: store <4 x i64> [[TMP19]], ptr [[TMP20]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; VF4: middle.block: @@ -219,8 +219,8 @@ define void @ld_div3_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; VF2-NEXT: store <2 x i64> [[TMP11]], ptr [[TMP12]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF2: middle.block: @@ -261,8 +261,8 @@ define void @ld_div3_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; VF4-NEXT: store <4 x i64> [[TMP19]], ptr [[TMP20]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF4: middle.block: @@ -325,8 +325,8 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; VF2: middle.block: @@ -382,8 +382,8 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; VF4: middle.block: @@ -446,8 +446,8 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VF2: middle.block: @@ -503,8 +503,8 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VF4: middle.block: @@ -567,8 +567,8 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VF2: middle.block: @@ -624,8 +624,8 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VF4: middle.block: @@ -688,8 +688,8 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VF2: middle.block: @@ -744,8 +744,8 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VF4: middle.block: @@ -807,8 +807,8 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; VF2: middle.block: @@ -863,8 +863,8 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; VF4: middle.block: @@ -926,8 +926,8 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; VF2: middle.block: @@ -982,8 +982,8 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; VF4: middle.block: @@ -1039,8 +1039,8 @@ define void @ld_div1_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: store <2 x i64> [[TMP11]], ptr [[TMP12]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; VF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; VF2: middle.block: @@ -1081,8 +1081,8 @@ define void @ld_div1_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: store <4 x i64> [[TMP19]], ptr [[TMP20]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 ; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; VF4: middle.block: @@ -1138,8 +1138,8 @@ define void @ld_div2_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: store <2 x i64> [[TMP11]], ptr [[TMP12]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; VF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; VF2: middle.block: @@ -1180,8 +1180,8 @@ define void @ld_div2_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: store <4 x i64> [[TMP19]], ptr [[TMP20]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 ; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; VF4: middle.block: @@ -1237,8 +1237,8 @@ define void @ld_div3_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: store <2 x i64> [[TMP11]], ptr [[TMP12]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; VF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; VF2: middle.block: @@ -1279,8 +1279,8 @@ define void @ld_div3_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: store <4 x i64> [[TMP19]], ptr [[TMP20]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 ; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; VF4: middle.block: @@ -1343,8 +1343,8 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; VF2: middle.block: @@ -1400,8 +1400,8 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP32]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; VF4: middle.block: @@ -1464,8 +1464,8 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; VF2: middle.block: @@ -1521,8 +1521,8 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP32]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; VF4: middle.block: @@ -1585,8 +1585,8 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; VF2: middle.block: @@ -1642,8 +1642,8 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP32]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; VF4: middle.block: @@ -1706,8 +1706,8 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; VF2: middle.block: @@ -1763,8 +1763,8 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP32]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; VF4: middle.block: @@ -1827,8 +1827,8 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; VF2: middle.block: @@ -1884,8 +1884,8 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP32]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; VF4: middle.block: @@ -1948,8 +1948,8 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) -; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; VF2: middle.block: @@ -2005,8 +2005,8 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP32]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) -; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; VF4: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/vector-geps.ll b/llvm/test/Transforms/LoopVectorize/vector-geps.ll index 94bc32205ec11..90b4be0f96dce 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-geps.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-geps.ll @@ -20,7 +20,7 @@ define void @vector_gep_stored(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x ptr> [[TMP0]], ptr [[TMP1]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll index ef678ff759943..1668fd49a22a3 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll @@ -223,7 +223,7 @@ define i64 @find_last_iv(ptr %a, i64 %n, i64 %start) { ; CHECK: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> -; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<{{.+}}> +; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nuw nsw ir<0>, ir<1>, vp<{{.+}}> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%rdx> = phi ir<-9223372036854775808>, ir<%cond> ; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr inbounds ir<%a>, vp<[[SCALAR_STEPS]]> diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 84c6cc2675a80..754aee99ed865 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -165,7 +165,7 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> -; CHECK-NEXT: ir<%i> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> +; CHECK-NEXT: ir<%i> = WIDEN-INDUCTION nuw nsw ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: WIDEN ir<%cmp> = icmp ult ir<%i>, ir<5> ; CHECK-NEXT: Successor(s): pred.udiv @@ -534,7 +534,7 @@ define i32 @print_exit_value(ptr %ptr, i32 %off) { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> -; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> +; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%gep> = getelementptr inbounds ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN ir<%add> = add ir<%iv>, ir<%off> diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index 3161a0d5e6f5e..88dead4418628 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -276,7 +276,7 @@ define void @uniform_gep(i64 %k, ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<21>, ir<1>, vp<[[VF]]> +; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<21>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<21> + vp<[[CAN_IV]]> * ir<1> ; CHECK-NEXT: EMIT vp<[[WIDE_CAN_IV:%.+]]> = WIDEN-CANONICAL-INDUCTION vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule vp<[[WIDE_CAN_IV]]>, vp<[[BTC]]> @@ -1060,7 +1060,7 @@ define void @merge_with_dead_gep_between_regions(i32 %n, i32 %k, ptr noalias %sr ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<%n>, ir<-1>, vp<[[VF]]> +; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<%n>, ir<-1>, vp<[[VF]]> ; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1> ; CHECK-NEXT: WIDEN ir<%cond> = icmp ult ir<%iv>, ir<%k> ; CHECK-NEXT: Successor(s): pred.store diff --git a/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll b/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll index eaebfebf533ea..909ca14f21639 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll @@ -41,7 +41,7 @@ define void @inner_loop_reduction(ptr noalias nocapture readonly %a.in, ptr noal ; CHECK-NEXT: %[[C_PTR:.*]] = getelementptr inbounds double, ptr %c.out, <4 x i64> %[[VEC_INDEX]] ; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %[[REDUCTION]], <4 x ptr> align 8 %[[C_PTR]], <4 x i1> splat (i1 true)) ; CHECK-NEXT: %[[FOR1_INDEX_NEXT:.*]] = add nuw i64 %[[FOR1_INDEX]], 4 -; CHECK-NEXT: %{{.*}} = add <4 x i64> %[[VEC_INDEX]], splat (i64 4) +; CHECK-NEXT: %{{.*}} = add nuw nsw <4 x i64> %[[VEC_INDEX]], splat (i64 4) ; CHECK-NEXT: %[[EXIT_COND:.*]] = icmp eq i64 %[[FOR1_INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 %[[EXIT_COND]], label %{{.*}}, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll b/llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll index 180fd84c14450..72ba993defb5a 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll @@ -34,7 +34,7 @@ define void @widen_call_instruction(ptr noalias nocapture readonly %a.in, ptr no ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[C_OUT]], <4 x i64> [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP9]], <4 x ptr> align 8 [[TMP7]], <4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll b/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll index 48a11fa9182e7..429e964890103 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll @@ -36,7 +36,7 @@ define void @loop_invariant_select(ptr noalias nocapture %out, i1 %select, doubl ; CHECK-NEXT: br i1 [[TMP5]], label %[[VECTOR_LATCH]], label %[[FOR2_HEADER3]] ; CHECK: [[VECTOR_LATCH]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -96,7 +96,7 @@ define void @outer_loop_dependant_select(ptr noalias nocapture %out, double %a, ; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_LATCH]], label %[[FOR2_HEADER3]] ; CHECK: [[VECTOR_LATCH]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -157,7 +157,7 @@ define void @inner_loop_dependant_select(ptr noalias nocapture %out, double %a, ; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_LATCH]], label %[[FOR2_HEADER3]] ; CHECK: [[VECTOR_LATCH]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -219,7 +219,7 @@ define void @outer_and_inner_loop_dependant_select(ptr noalias nocapture %out, d ; CHECK-NEXT: br i1 [[TMP7]], label %[[VECTOR_LATCH]], label %[[FOR2_HEADER3]] ; CHECK: [[VECTOR_LATCH]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll b/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll index d08ca8c99e8ba..9bb010c0431d8 100644 --- a/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll +++ b/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll @@ -102,7 +102,7 @@ define void @wide_gep_multiple_indices_some_invariant(ptr noalias %dst, ptr noal ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x ptr> [[TMP1]], ptr [[TMP2]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td index 45cb67f0eee4a..4820b7a747ac2 100644 --- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td +++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td @@ -127,7 +127,7 @@ def AMDGPU_ScaledExtPacked816Op FixedVectorOfShapeAndType<[4], F8E8M0FNU>:$scale, ConfinedAttr:$blockSize, ConfinedAttr, IntMaxValue<1>]>:$firstScaleLane, - ConfinedAttr, IntMaxValue<2>]>:$firstScaleByte)>, + ConfinedAttr, IntMaxValue<3>]>:$firstScaleByte)>, Results<( outs AnyTypeOf<[FixedVectorOfShapeAndType<[8], F32>, FixedVectorOfShapeAndType<[8], F16>, @@ -139,17 +139,21 @@ def AMDGPU_ScaledExtPacked816Op let summary = "Extend a vector of packed floating point values"; let description = [{ - The scales applied to the input microfloats are stored in two bytes which + The scales applied to the input microfloats are stored in bytes which come from the `scales` input provided in a *half* of the wave identified - by `firstScaleLane`. The pair of bytes used is selected by - `firstScaleByte`. The 16 vectors in consecutive lanes starting from + by `firstScaleLane`. The bytes used is selected by `firstScaleByte` and depends + on the type of `source`. The 16 vectors in consecutive lanes starting from `firstScaleLane` (which we'll call the scale vectors) will be used by both - halves of the wave (with lane L reading from L % 16'th scale vector), but - each half will use a different byte. + halves of the wave (with lane L reading from L % 16'th scale vector). + + When `source` is either F4E2M1FN, F6E2M3FN, or F6E3M2FN each half of the + wave will use a different byte. The first one being `firstScaleByte` and + the second one being `firstScaleByte` + 1. When the block size is 32, + `firstScaleByte` can be either 0 or 2, selecting halves of the scale vectors. + Lanes 0-15 will read from `firstScaleByte` and lanes 16-31 will read + from `firstScaleByte` + 1. + - When the block size is 32, `firstScaleByte` can be either 0 or 2, - selecting halves of the scale vectors. Lanes 0-15 will read from - `firstScaleByte` and lanes 16-31 will read from `firstScaleByte` + 1. For example: ```mlir // Input: 8-element vector of F8E4M3FN, converting to F32 @@ -165,7 +169,8 @@ def AMDGPU_ScaledExtPacked816Op : vector<16xf6E2M3FN>, vector<4xf8E8M0FNU> -> vector<16xf16> ``` - However, when the block size is 16, `firstScaleByte` can be 0 or 1. + When `source` is either F4E2M1FN, F6E2M3FN, or F6E3M2FN and + the block size is 16, `firstScaleByte` can be 0 or 1. Lanes 0-15 read from the `firstScaleByte`th element of the scale vectors, while lanes 16-31 read from `firstScaleByte` + 2. For example: @@ -187,6 +192,16 @@ def AMDGPU_ScaledExtPacked816Op instructions use for matix scales. These selection operands allows one to choose portions of the matrix to convert. + When `source` is either F8E4M3FN or F8E5M2 and `blockSize` is 32, + then the same byte will be used by both halves of the wave. + In this case, `firstScaleByte` can be any value from 0 to 3. + + When `source` is either F8E4M3FN or F8E5M2 and `blockSize` is 16, + following combinations are allowed: + * `firstScaleLane(0), firstScaleByte(0)` + * `firstScaleLane(1), firstScaleByte(2)` + all other combinations are reserved. + Available on gfx1250+. }]; diff --git a/mlir/include/mlir/IR/SymbolInterfaces.td b/mlir/include/mlir/IR/SymbolInterfaces.td index bbfa30815bd4a..b3aafe063d376 100644 --- a/mlir/include/mlir/IR/SymbolInterfaces.td +++ b/mlir/include/mlir/IR/SymbolInterfaces.td @@ -171,6 +171,8 @@ def Symbol : OpInterface<"SymbolOpInterface"> { if (concreteOp.isDeclaration() && concreteOp.isPublic()) return concreteOp.emitOpError("symbol declaration cannot have public " "visibility"); + if ($_op->getNumResults() != 0) + return concreteOp.emitOpError("symbols must not have results"); auto parent = $_op->getParentOp(); if (parent && !parent->hasTrait() && parent->isRegistered()) { return concreteOp.emitOpError("symbol's parent must have the SymbolTable " diff --git a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp index df955fc90b45f..5c35823678576 100644 --- a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp +++ b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp @@ -344,14 +344,27 @@ void RawBufferAtomicCmpswapOp::getCanonicalizationPatterns( LogicalResult ScaledExtPacked816Op::verify() { int blockSize = getBlockSize(); assert((blockSize == 16 || blockSize == 32) && "invalid block size"); + int firstScaleByte = getFirstScaleByte(); - if (blockSize == 16 && !llvm::is_contained({0, 1}, firstScaleByte)) { - return emitOpError( - "blockSize of 16 can only have firstScaleByte be 0 or 1."); + auto sourceType = cast(getSource().getType()); + Type elementType = sourceType.getElementType(); + auto floatType = cast(elementType); + int bitWidth = floatType.getWidth(); + + if (llvm::is_contained({4, 6}, bitWidth) && blockSize == 16 && + !llvm::is_contained({0, 1}, firstScaleByte)) { + return emitOpError("blockSize of 16 can only have firstScaleByte be 0 or 1 " + "for f4 and f6."); + } + if (llvm::is_contained({4, 6}, bitWidth) && blockSize == 32 && + !llvm::is_contained({0, 2}, firstScaleByte)) { + return emitOpError("blockSize of 32 can only have firstScaleByte be 0 or 2 " + "for f4 and f6."); } - if (blockSize == 32 && !llvm::is_contained({0, 2}, firstScaleByte)) { + if (bitWidth == 8 && blockSize == 16 && + !llvm::is_contained({0, 2}, firstScaleByte)) { return emitOpError( - "blockSize of 32 can only have firstScaleByte be 0 or 2."); + "blockSize of 16 can only have firstScaleByte be 0 or 2 for f8."); } return success(); diff --git a/mlir/lib/Query/Matcher/Parser.cpp b/mlir/lib/Query/Matcher/Parser.cpp index e392a885c511b..7bfe03d02a7fa 100644 --- a/mlir/lib/Query/Matcher/Parser.cpp +++ b/mlir/lib/Query/Matcher/Parser.cpp @@ -27,7 +27,7 @@ struct Parser::TokenInfo { } // Known identifiers. - static const char *const ID_Extract; + static const char *const idExtract; llvm::StringRef text; TokenKind kind = TokenKind::Eof; @@ -35,7 +35,7 @@ struct Parser::TokenInfo { VariantValue value; }; -const char *const Parser::TokenInfo::ID_Extract = "extract"; +const char *const Parser::TokenInfo::idExtract = "extract"; class Parser::CodeTokenizer { public: @@ -452,13 +452,13 @@ bool Parser::parseMatcherExpressionImpl(const TokenInfo &nameToken, } if (chainCallToken.kind != TokenKind::Ident || - chainCallToken.text != TokenInfo::ID_Extract) { + chainCallToken.text != TokenInfo::idExtract) { error->addError(chainCallToken.range, ErrorType::ParserMalformedChainedExpr); return false; } - if (chainCallToken.text == TokenInfo::ID_Extract && + if (chainCallToken.text == TokenInfo::idExtract && !parseChainedExpression(functionName)) return false; } diff --git a/mlir/lib/Target/Cpp/TranslateToCpp.cpp b/mlir/lib/Target/Cpp/TranslateToCpp.cpp index 2e827fe16c77c..8d6d0764bae65 100644 --- a/mlir/lib/Target/Cpp/TranslateToCpp.cpp +++ b/mlir/lib/Target/Cpp/TranslateToCpp.cpp @@ -173,8 +173,11 @@ struct CppEmitter { /// Emits the operands of the operation. All operands are emitted in order. LogicalResult emitOperands(Operation &op); - /// Emits value as an operands of an operation - LogicalResult emitOperand(Value value); + /// Emits value as an operand of some operation. Unless \p isInBrackets is + /// true, operands emitted as sub-expressions will be parenthesized if needed + /// in order to enforce correct evaluation based on precedence and + /// associativity. + LogicalResult emitOperand(Value value, bool isInBrackets = false); /// Emit an expression as a C expression. LogicalResult emitExpression(ExpressionOp expressionOp); @@ -1579,7 +1582,7 @@ LogicalResult CppEmitter::emitExpression(ExpressionOp expressionOp) { return success(); } -LogicalResult CppEmitter::emitOperand(Value value) { +LogicalResult CppEmitter::emitOperand(Value value, bool isInBrackets) { if (isPartOfCurrentExpression(value)) { Operation *def = value.getDefiningOp(); assert(def && "Expected operand to be defined by an operation"); @@ -1587,10 +1590,12 @@ LogicalResult CppEmitter::emitOperand(Value value) { if (failed(precedence)) return failure(); - // Sub-expressions with equal or lower precedence need to be parenthesized, - // as they might be evaluated in the wrong order depending on the shape of - // the expression tree. - bool encloseInParenthesis = precedence.value() <= getExpressionPrecedence(); + // Unless already in brackets, sub-expressions with equal or lower + // precedence need to be parenthesized as they might be evaluated in the + // wrong order depending on the shape of the expression tree. + bool encloseInParenthesis = + !isInBrackets && precedence.value() <= getExpressionPrecedence(); + if (encloseInParenthesis) os << "("; pushExpressionPrecedence(precedence.value()); @@ -1629,15 +1634,9 @@ LogicalResult CppEmitter::emitOperand(Value value) { LogicalResult CppEmitter::emitOperands(Operation &op) { return interleaveCommaWithError(op.getOperands(), os, [&](Value operand) { - // If an expression is being emitted, push lowest precedence as these - // operands are either wrapped by parenthesis. - if (getEmittedExpression()) - pushExpressionPrecedence(lowestPrecedence()); - if (failed(emitOperand(operand))) - return failure(); - if (getEmittedExpression()) - popExpressionPrecedence(); - return success(); + // Emit operand under guarantee that if it's part of an expression then it + // is being emitted within brackets. + return emitOperand(operand, /*isInBrackets=*/true); }); } diff --git a/mlir/test/Dialect/AMDGPU/invalid.mlir b/mlir/test/Dialect/AMDGPU/invalid.mlir index 4c6f62a045405..5c8cc8b67c4b3 100644 --- a/mlir/test/Dialect/AMDGPU/invalid.mlir +++ b/mlir/test/Dialect/AMDGPU/invalid.mlir @@ -333,17 +333,25 @@ func.func @gather_to_lds_non_lds(%idx1 : index, %mem1 : memref<32xf16>, %mem2 : // ----- -func.func @amdgpu.scaled_ext_packed816_invalid_block_size_and_first_scale_byte_16(%v: vector<8xf8E5M2>, %scale: vector<4xf8E8M0FNU>) { - // expected-error@+1 {{'amdgpu.scaled_ext_packed816' op blockSize of 16 can only have firstScaleByte be 0 or 1.}} - %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(16) firstScaleLane(0) firstScaleByte(2) : vector<8xf8E5M2>, vector<4xf8E8M0FNU> -> vector<8xf16> +func.func @amdgpu.scaled_ext_packed816_invalid_block_size_and_first_scale_byte_16(%v: vector<8xf4E2M1FN>, %scale: vector<4xf8E8M0FNU>) { + // expected-error@+1 {{'amdgpu.scaled_ext_packed816' op blockSize of 16 can only have firstScaleByte be 0 or 1 for f4 and f6}} + %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(16) firstScaleLane(0) firstScaleByte(2) : vector<8xf4E2M1FN>, vector<4xf8E8M0FNU> -> vector<8xf16> func.return } // ----- -func.func @amdgpu.scaled_ext_packed816_invalid_block_size_and_first_scale_byte_32(%v: vector<8xf8E5M2>, %scale: vector<4xf8E8M0FNU>) { - // expected-error@+1 {{'amdgpu.scaled_ext_packed816' op blockSize of 32 can only have firstScaleByte be 0 or 2.}} - %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(1) : vector<8xf8E5M2>, vector<4xf8E8M0FNU> -> vector<8xf16> +func.func @amdgpu.scaled_ext_packed816_invalid_block_size_and_first_scale_byte_32(%v: vector<8xf4E2M1FN>, %scale: vector<4xf8E8M0FNU>) { + // expected-error@+1 {{'amdgpu.scaled_ext_packed816' op blockSize of 32 can only have firstScaleByte be 0 or 2 for f4 and f6.}} + %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(1) : vector<8xf4E2M1FN>, vector<4xf8E8M0FNU> -> vector<8xf16> + func.return +} + +// ----- + +func.func @amdgpu.scaled_ext_packed816_invalid_attributes_for_f8(%v: vector<8xf8E5M2>, %scale: vector<4xf8E8M0FNU>) { + // expected-error@+1 {{'amdgpu.scaled_ext_packed816' op blockSize of 16 can only have firstScaleByte be 0 or 2 for f8.}} + %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(16) firstScaleLane(0) firstScaleByte(1) : vector<8xf8E5M2>, vector<4xf8E8M0FNU> -> vector<8xf16> func.return } diff --git a/mlir/test/IR/invalid-ops.mlir b/mlir/test/IR/invalid-ops.mlir index 0c5fec8c4055a..2f5dd28b51911 100644 --- a/mlir/test/IR/invalid-ops.mlir +++ b/mlir/test/IR/invalid-ops.mlir @@ -145,3 +145,11 @@ func.func @verify_fail_3() { %r = "arith.constant"() {value = -3 : si32} : () -> si32 return } + +// ----- + +// Verify that symbols with results are rejected +module { + // expected-error@+1 {{'test.symbol_with_result' op symbols must not have results}} + %0 = "test.symbol_with_result"() <{sym_name = "test_symbol"}> : () -> i32 +} diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index 275025978a784..670223984fd95 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -120,6 +120,13 @@ def SymbolOp : TEST_Op<"symbol", [NoMemoryEffect, Symbol]> { OptionalAttr:$sym_visibility); } +def SymbolWithResultOp : TEST_Op<"symbol_with_result", [Symbol]> { + let summary = "invalid symbol operation that produces an SSA result"; + let arguments = (ins StrAttr:$sym_name, + OptionalAttr:$sym_visibility); + let results = (outs AnyType:$result); +} + def OverriddenSymbolVisibilityOp : TEST_Op<"overridden_symbol_visibility", [ DeclareOpInterfaceMethods, ]> { diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index effcd615786bf..153c7eeedd0ab 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -9860,6 +9860,7 @@ cc_library( deps = [ ":SparseTensorEnums", ":SparseTensorRuntime", + ":mlir_apfloat_utils", ":mlir_float16_utils", "//llvm:Support", ],