From 996639d6ebb86ff15a8c99b67f1c2e2117636ae7 Mon Sep 17 00:00:00 2001 From: Veera <32646674+veera-sivarajan@users.noreply.github.com> Date: Sun, 9 Nov 2025 21:08:22 -0800 Subject: [PATCH 01/24] [MLIR][BufferResultsToOutParamsPass] Add Option to Modify Public Function's Signature (#167248) Since https://github.com/llvm/llvm-project/pull/162441, `buffer-results-to-out-params` transforms `private` functions only. But, as mentioned in https://github.com/llvm/llvm-project/pull/162441#issuecomment-3404195242, this is a breaking change for pipelines handling C code. Our pipeline @EfficientComputer is also affected by this breaking change. Therefore, this PR adds an opt-in flag to allow `public` functions to be transformed by `BufferResultsToOutParamsPass`. --- .../Dialect/Bufferization/Transforms/Passes.h | 3 ++ .../Bufferization/Transforms/Passes.td | 3 ++ .../Transforms/BufferResultsToOutParams.cpp | 10 ++++- ...to-out-params-modify-public-functions.mlir | 40 +++++++++++++++++++ 4 files changed, 54 insertions(+), 2 deletions(-) create mode 100644 mlir/test/Transforms/buffer-results-to-out-params-modify-public-functions.mlir diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h index 67ac487d8226d..ea158914e445b 100644 --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h @@ -171,6 +171,9 @@ struct BufferResultsToOutParamsOpts { /// If true, the pass eliminates the memref.alloc and memcpy if the returned /// memref is allocated in the current function and has dynamic shape. bool hoistDynamicAllocs = false; + + /// If true, the pass modifies the function signatures of public functions. + bool modifyPublicFunctions = false; }; /// Replace buffers that are returned from a function with an out parameter. diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td index cad44cb15f479..1eb692586bcfc 100644 --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td @@ -258,6 +258,9 @@ def BufferResultsToOutParamsPass /*default=*/"false", "Hoist static allocations to call sites.">, Option<"hoistDynamicAllocs", "hoist-dynamic-allocs", "bool", /*default=*/"false", "Hoist dynamic allocations to call sites.">, + Option<"modifyPublicFunctions", "modify-public-functions", "bool", + /*default=*/"false", "Modify function signatures of public " + "functions.">, ]; let dependentDialects = ["memref::MemRefDialect"]; } diff --git a/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp b/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp index b9ee0a4d401f3..d0742ec27ed60 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp @@ -217,7 +217,9 @@ updateCalls(ModuleOp module, const AllocDynamicSizesMap &map, } if (!options.filterFn(&callee)) return; - if (callee.isExternal() || callee.isPublic()) + if (callee.isPublic() && !options.modifyPublicFunctions) + return; + if (callee.isExternal()) return; SmallVector replaceWithNewCallResults; @@ -295,7 +297,9 @@ LogicalResult mlir::bufferization::promoteBufferResultsToOutParams( // function. AllocDynamicSizesMap map; for (auto func : module.getOps()) { - if (func.isExternal() || func.isPublic()) + if (func.isPublic() && !options.modifyPublicFunctions) + continue; + if (func.isExternal()) continue; if (!options.filterFn(&func)) continue; @@ -326,6 +330,8 @@ struct BufferResultsToOutParamsPass options.hoistStaticAllocs = true; if (hoistDynamicAllocs) options.hoistDynamicAllocs = true; + if (modifyPublicFunctions) + options.modifyPublicFunctions = true; if (failed(bufferization::promoteBufferResultsToOutParams(getOperation(), options))) diff --git a/mlir/test/Transforms/buffer-results-to-out-params-modify-public-functions.mlir b/mlir/test/Transforms/buffer-results-to-out-params-modify-public-functions.mlir new file mode 100644 index 0000000000000..c99bde3f34986 --- /dev/null +++ b/mlir/test/Transforms/buffer-results-to-out-params-modify-public-functions.mlir @@ -0,0 +1,40 @@ +// RUN: mlir-opt -p 'builtin.module(buffer-results-to-out-params{modify-public-functions})' %s | FileCheck %s + +// Test if `public` functions' return values are transformed into out parameters +// when `buffer-results-to-out-params` is invoked with `modifyPublicFunctions`. + +// CHECK-LABEL: func.func @basic( +// CHECK-SAME: %[[ARG0:.*]]: memref) { +// CHECK: %[[VAL_0:.*]] = "test.source"() : () -> memref +// CHECK: memref.copy %[[VAL_0]], %[[ARG0]] : memref to memref +// CHECK: return +// CHECK: } +func.func @basic() -> (memref) { + %0 = "test.source"() : () -> (memref) + return %0 : memref +} + +// CHECK-LABEL: func.func @presence_of_existing_arguments( +// CHECK-SAME: %[[ARG0:.*]]: memref<1xf32>, +// CHECK-SAME: %[[ARG1:.*]]: memref<2xf32>) { +// CHECK: %[[VAL_0:.*]] = "test.source"() : () -> memref<2xf32> +// CHECK: memref.copy %[[VAL_0]], %[[ARG1]] : memref<2xf32> to memref<2xf32> +// CHECK: return +// CHECK: } +func.func @presence_of_existing_arguments(%arg0: memref<1xf32>) -> (memref<2xf32>) { + %0 = "test.source"() : () -> (memref<2xf32>) + return %0 : memref<2xf32> +} + +// CHECK-LABEL: func.func @multiple_results( +// CHECK-SAME: %[[ARG0:.*]]: memref<1xf32>, +// CHECK-SAME: %[[ARG1:.*]]: memref<2xf32>) { +// CHECK: %[[VAL_0:.*]]:2 = "test.source"() : () -> (memref<1xf32>, memref<2xf32>) +// CHECK: memref.copy %[[VAL_0]]#0, %[[ARG0]] : memref<1xf32> to memref<1xf32> +// CHECK: memref.copy %[[VAL_0]]#1, %[[ARG1]] : memref<2xf32> to memref<2xf32> +// CHECK: return +// CHECK: } +func.func @multiple_results() -> (memref<1xf32>, memref<2xf32>) { + %0, %1 = "test.source"() : () -> (memref<1xf32>, memref<2xf32>) + return %0, %1 : memref<1xf32>, memref<2xf32> +} From a8a0ffba739d247e24faaf612ac8f2d8faf1de3c Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Mon, 10 Nov 2025 08:17:08 +0100 Subject: [PATCH 02/24] [clang][bytecode] Check source pointer for bitcast validity (#166907) Unfortunately this is more dynamic than anticipated. Fixes https://github.com/llvm/llvm-project/issues/165006 --- clang/lib/AST/ByteCode/Compiler.cpp | 22 ++++---------- clang/lib/AST/ByteCode/Compiler.h | 2 -- clang/lib/AST/ByteCode/Interp.h | 46 +++++++++++++++++++++++++++++ clang/lib/AST/ByteCode/Opcodes.td | 2 +- clang/test/AST/ByteCode/cxx11.cpp | 11 +++++++ clang/test/AST/ByteCode/invalid.cpp | 18 +++++++++++ 6 files changed, 81 insertions(+), 20 deletions(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 20836f663cdf8..f68422c6eb01d 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -208,19 +208,6 @@ template class LocOverrideScope final { } // namespace interp } // namespace clang -template -bool Compiler::isValidBitCast(const CastExpr *E) { - QualType FromTy = E->getSubExpr()->getType()->getPointeeType(); - QualType ToTy = E->getType()->getPointeeType(); - - if (classify(FromTy) == classify(ToTy)) - return true; - - if (FromTy->isVoidType() || ToTy->isVoidType()) - return true; - return false; -} - template bool Compiler::VisitCastExpr(const CastExpr *CE) { const Expr *SubExpr = CE->getSubExpr(); @@ -506,12 +493,9 @@ bool Compiler::VisitCastExpr(const CastExpr *CE) { if (!FromT || !ToT) return false; - if (!this->isValidBitCast(CE) && - !this->emitInvalidCast(CastKind::ReinterpretLike, /*Fatal=*/false, CE)) - return false; - assert(isPtrType(*FromT)); assert(isPtrType(*ToT)); + bool SrcIsVoidPtr = SubExprTy->isVoidPointerType(); if (FromT == ToT) { if (CE->getType()->isVoidPointerType() && !SubExprTy->isFunctionPointerType()) { @@ -520,6 +504,10 @@ bool Compiler::VisitCastExpr(const CastExpr *CE) { if (!this->visit(SubExpr)) return false; + if (!this->emitCheckBitCast(CETy->getPointeeType().getTypePtr(), + SrcIsVoidPtr, CE)) + return false; + if (CE->getType()->isFunctionPointerType() || SubExprTy->isFunctionPointerType()) { return this->emitFnPtrCast(CE); diff --git a/clang/lib/AST/ByteCode/Compiler.h b/clang/lib/AST/ByteCode/Compiler.h index fac0a7f4e1886..5c46f75af4da3 100644 --- a/clang/lib/AST/ByteCode/Compiler.h +++ b/clang/lib/AST/ByteCode/Compiler.h @@ -425,8 +425,6 @@ class Compiler : public ConstStmtVisitor, bool>, bool refersToUnion(const Expr *E); - bool isValidBitCast(const CastExpr *E); - protected: /// Variable to storage mapping. llvm::DenseMap Locals; diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index c16408cef1fde..cbd60c9f2b37c 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -3290,6 +3290,52 @@ inline bool SideEffect(InterpState &S, CodePtr OpPC) { return S.noteSideEffect(); } +inline bool CheckBitCast(InterpState &S, CodePtr OpPC, const Type *TargetType, + bool SrcIsVoidPtr) { + const auto &Ptr = S.Stk.peek(); + if (Ptr.isZero()) + return true; + if (!Ptr.isBlockPointer()) + return true; + + if (TargetType->isIntegerType()) + return true; + + if (SrcIsVoidPtr && S.getLangOpts().CPlusPlus) { + bool HasValidResult = !Ptr.isZero(); + + if (HasValidResult) { + if (S.getStdAllocatorCaller("allocate")) + return true; + + const auto &E = cast(S.Current->getExpr(OpPC)); + if (S.getLangOpts().CPlusPlus26 && + S.getASTContext().hasSimilarType(Ptr.getType(), + QualType(TargetType, 0))) + return true; + + S.CCEDiag(E, diag::note_constexpr_invalid_void_star_cast) + << E->getSubExpr()->getType() << S.getLangOpts().CPlusPlus26 + << Ptr.getType().getCanonicalType() << E->getType()->getPointeeType(); + } else if (!S.getLangOpts().CPlusPlus26) { + const SourceInfo &E = S.Current->getSource(OpPC); + S.CCEDiag(E, diag::note_constexpr_invalid_cast) + << diag::ConstexprInvalidCastKind::CastFrom << "'void *'" + << S.Current->getRange(OpPC); + } + } + + QualType PtrType = Ptr.getType(); + if (PtrType->isRecordType() && + PtrType->getAsRecordDecl() != TargetType->getAsRecordDecl()) { + S.CCEDiag(S.Current->getSource(OpPC), diag::note_constexpr_invalid_cast) + << diag::ConstexprInvalidCastKind::ThisConversionOrReinterpret + << S.getLangOpts().CPlusPlus << S.Current->getRange(OpPC); + return false; + } + return true; +} + /// Same here, but only for casts. inline bool InvalidCast(InterpState &S, CodePtr OpPC, CastKind Kind, bool Fatal) { diff --git a/clang/lib/AST/ByteCode/Opcodes.td b/clang/lib/AST/ByteCode/Opcodes.td index a2eaa61ea4306..1785fcf4a7b20 100644 --- a/clang/lib/AST/ByteCode/Opcodes.td +++ b/clang/lib/AST/ByteCode/Opcodes.td @@ -422,8 +422,8 @@ def CheckLiteralType : Opcode { } def CheckArraySize : Opcode { let Args = [ArgUint64]; } - def CheckFunctionDecl : Opcode { let Args = [ArgFunctionDecl]; } +def CheckBitCast : Opcode { let Args = [ArgTypePtr, ArgBool]; } // [] -> [Value] def GetGlobal : AccessOpcode; diff --git a/clang/test/AST/ByteCode/cxx11.cpp b/clang/test/AST/ByteCode/cxx11.cpp index 753e51dfbfc1c..95615350f5142 100644 --- a/clang/test/AST/ByteCode/cxx11.cpp +++ b/clang/test/AST/ByteCode/cxx11.cpp @@ -387,3 +387,14 @@ struct Counter { // Passing an lvalue by value makes a non-elidable copy. constexpr int PassByValue(Counter c) { return c.copies; } static_assert(PassByValue(Counter(0)) == 0, "expect no copies"); + +namespace PointerCast { + /// The two interpreters disagree here. + struct S { int x, y; } s; + constexpr S* sptr = &s; + struct U {}; + struct Str { + int e : (Str*)(sptr) == (Str*)(sptr); // expected-error {{not an integral constant expression}} \ + // expected-note {{cast that performs the conversions of a reinterpret_cast}} + }; +} diff --git a/clang/test/AST/ByteCode/invalid.cpp b/clang/test/AST/ByteCode/invalid.cpp index 1f2d6bc1d48eb..115c8663079a1 100644 --- a/clang/test/AST/ByteCode/invalid.cpp +++ b/clang/test/AST/ByteCode/invalid.cpp @@ -88,4 +88,22 @@ namespace InvalidBitCast { // both-note {{in call to}} + struct sockaddr + { + char sa_data[8]; + }; + struct in_addr + { + unsigned int s_addr; + }; + struct sockaddr_in + { + unsigned short int sin_port; + struct in_addr sin_addr; + }; + /// Bitcast from sockaddr to sockaddr_in. Used to crash. + unsigned int get_addr(sockaddr addr) { + return ((sockaddr_in *)&addr)->sin_addr.s_addr; + } + } From d10a85167a26e9489f9daf20acc0092d55687b15 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Mon, 10 Nov 2025 08:07:16 +0000 Subject: [PATCH 03/24] [WebAssembly] Implement more of getCastInstrCost (#164612) Fill out more information for sign and zero extend and add some truncate information; however, the primary change is to int/fp conversions. In particular, fp to (narrow) int appears to be relatively expensive. --- .../WebAssemblyTargetTransformInfo.cpp | 66 ++++- .../CodeGen/WebAssembly/memory-interleave.ll | 278 +----------------- .../WebAssembly/memory-interleave.ll | 82 +++--- 3 files changed, 112 insertions(+), 314 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index 92a9812df2127..70f7b889551a4 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -119,18 +119,82 @@ InstructionCost WebAssemblyTTIImpl::getCastInstrCost( } } - // extend_low static constexpr TypeConversionCostTblEntry ConversionTbl[] = { + // extend_low {ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1}, {ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1}, {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1}, {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1}, {ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1}, {ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1}, + // 2 x extend_low {ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i16, 2}, {ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i16, 2}, {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 2}, {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2}, + // extend_low, extend_high + {ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2}, + {ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2}, + {ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2}, + {ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2}, + {ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2}, + {ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2}, + // 2x extend_low, extend_high + {ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 4}, + {ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, 4}, + {ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4}, + {ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4}, + // shuffle + {ISD::TRUNCATE, MVT::v2i16, MVT::v2i32, 2}, + {ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 4}, + {ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 2}, + {ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 4}, + // narrow, and + {ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2}, + {ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2}, + // narrow, 2x and + {ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3}, + // 3x narrow, 4x and + {ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 7}, + {ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 7}, + // 7x narrow, 8x and + {ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 15}, + // convert_i32x4 + {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1}, + {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1}, + {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1}, + {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1}, + // extend_low, convert + {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2}, + {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2}, + {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2}, + {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2}, + // extend_low x 2, convert + {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3}, + {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3}, + {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3}, + {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3}, + // several shuffles + {ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10}, + {ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10}, + {ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 10}, + {ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10}, + /// trunc_sat, const, and, 3x narrow + {ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 6}, + {ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 6}, + {ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 6}, + {ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 6}, + /// trunc_sat, const, and, narrow + {ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 4}, + {ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 4}, + {ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 4}, + {ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4}, + // 2x trunc_sat, const, 2x and, 3x narrow + {ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 8}, + {ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 8}, + // 2x trunc_sat, const, 2x and, narrow + {ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 6}, + {ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 6}, }; if (const auto *Entry = diff --git a/llvm/test/CodeGen/WebAssembly/memory-interleave.ll b/llvm/test/CodeGen/WebAssembly/memory-interleave.ll index 404db23ba7329..5d58ae223da6f 100644 --- a/llvm/test/CodeGen/WebAssembly/memory-interleave.ll +++ b/llvm/test/CodeGen/WebAssembly/memory-interleave.ll @@ -1720,28 +1720,7 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: two_floats_two_bytes_same_op: -; CHECK: loop -; CHECK: v128.load -; CHECK: v128.load -; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 -; CHECK: v128.load -; CHECK: v128.load -; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 -; CHECK: f32x4.mul -; CHECK: i32x4.trunc_sat_f32x4_s -; CHECK: v128.const 255, 255, 255, 255 -; CHECK: v128.and -; CHECK: i16x8.narrow_i32x4_u -; CHECK: i8x16.narrow_i16x8_u -; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 -; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 -; CHECK: f32x4.mul -; CHECK: i32x4.trunc_sat_f32x4_s -; CHECK: v128.and -; CHECK: i16x8.narrow_i32x4_u -; CHECK: i8x16.narrow_i16x8_u -; CHECK: i8x16.shuffle 0, 16, 1, 17, 2, 18, 3, 19, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK: v128.store64_lane +; CHECK-NOT: v128.load define hidden void @two_floats_two_bytes_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp22.not = icmp eq i32 %N, 0 @@ -1774,28 +1753,7 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: two_floats_two_bytes_vary_op: -; CHECK: loop -; CHECK: v128.load -; CHECK: v128.load -; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 -; CHECK: v128.load -; CHECK: v128.load -; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 -; CHECK: f32x4.add -; CHECK: i32x4.trunc_sat_f32x4_s -; CHECK: v128.const 255, 255, 255, 255 -; CHECK: v128.and -; CHECK: i16x8.narrow_i32x4_u -; CHECK: i8x16.narrow_i16x8_u -; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 -; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 -; CHECK: f32x4.sub -; CHECK: i32x4.trunc_sat_f32x4_s -; CHECK: v128.and -; CHECK: i16x8.narrow_i32x4_u -; CHECK: i8x16.narrow_i16x8_u -; CHECK: i8x16.shuffle 0, 16, 1, 17, 2, 18, 3, 19, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK: v128.store64_lane +; CHECK-NOT: v128.load define hidden void @two_floats_two_bytes_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp21.not = icmp eq i32 %N, 0 @@ -2347,64 +2305,7 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: four_floats_four_bytes_same_op: -; CHECK: loop -; CHECK: v128.load -; CHECK: v128.load -; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: v128.load -; CHECK: v128.load -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: v128.load -; CHECK: v128.load -; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: v128.load -; CHECK: v128.load -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: f32x4.mul -; CHECK: i32x4.trunc_sat_f32x4_s -; CHECK: v128.const 255, 255, 255, 255 -; CHECK: v128.and -; CHECK: i16x8.narrow_i32x4_u -; CHECK: i8x16.narrow_i16x8_u -; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: f32x4.mul -; CHECK: i32x4.trunc_sat_f32x4_s -; CHECK: v128.and -; CHECK: i16x8.narrow_i32x4_u -; CHECK: i8x16.narrow_i16x8_u -; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: f32x4.mul -; CHECK: i32x4.trunc_sat_f32x4_s -; CHECK: v128.and -; CHECK: i16x8.narrow_i32x4_u -; CHECK: i8x16.narrow_i16x8_u -; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: f32x4.mul -; CHECK: i32x4.trunc_sat_f32x4_s -; CHECK: v128.and -; CHECK: i16x8.narrow_i32x4_u -; CHECK: i8x16.narrow_i16x8_u -; CHECK: i8x16.shuffle 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 16, 17, 18, 19 -; CHECK: i8x16.shuffle 0, 4, 24, 28, 1, 5, 25, 29, 2, 6, 26, 30, 3, 7, 27, 31 -; CHECK: v128.store +; CHECK-NOT: v128.load define hidden void @four_floats_four_bytes_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp48.not = icmp eq i32 %N, 0 @@ -2453,64 +2354,7 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: four_floats_four_bytes_vary_op: -; CHECK: loop -; CHECK: v128.load -; CHECK: v128.load -; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: v128.load -; CHECK: v128.load -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: v128.load -; CHECK: v128.load -; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: v128.load -; CHECK: v128.load -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: f32x4.mul -; CHECK: i32x4.trunc_sat_f32x4_s -; CHECK: v128.const 255, 255, 255, 255 -; CHECK: v128.and -; CHECK: i16x8.narrow_i32x4_u -; CHECK: i8x16.narrow_i16x8_u -; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: f32x4.add -; CHECK: i32x4.trunc_sat_f32x4_s -; CHECK: v128.and -; CHECK: i16x8.narrow_i32x4_u -; CHECK: i8x16.narrow_i16x8_u -; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: f32x4.div -; CHECK: i32x4.trunc_sat_f32x4_s -; CHECK: v128.and -; CHECK: i16x8.narrow_i32x4_u -; CHECK: i8x16.narrow_i16x8_u -; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: f32x4.sub -; CHECK: i32x4.trunc_sat_f32x4_s -; CHECK: v128.and -; CHECK: i16x8.narrow_i32x4_u -; CHECK: i8x16.narrow_i16x8_u -; CHECK: i8x16.shuffle 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 16, 17, 18, 19 -; CHECK: i8x16.shuffle 0, 4, 24, 28, 1, 5, 25, 29, 2, 6, 26, 30, 3, 7, 27, 31 -; CHECK: v128.store +; CHECK-NOT: v128.load define hidden void @four_floats_four_bytes_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp45.not = icmp eq i32 %N, 0 @@ -2757,62 +2601,7 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: four_floats_four_shorts_same_op: -; CHECK: loop -; CHECK: v128.load -; CHECK: v128.load -; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: v128.load -; CHECK: v128.load -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: v128.load -; CHECK: v128.load -; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: v128.load -; CHECK: v128.load -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: f32x4.mul -; CHECK: i32x4.trunc_sat_f32x4_s -; CHECK: v128.const 65535, 65535, 65535, 65535 -; CHECK: v128.and -; CHECK: i16x8.narrow_i32x4_u -; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: f32x4.mul -; CHECK: i32x4.trunc_sat_f32x4_s -; CHECK: v128.and -; CHECK: i16x8.narrow_i32x4_u -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 -; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: f32x4.mul -; CHECK: i32x4.trunc_sat_f32x4_s -; CHECK: v128.and -; CHECK: i16x8.narrow_i32x4_u -; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: f32x4.mul -; CHECK: i32x4.trunc_sat_f32x4_s -; CHECK: v128.and -; CHECK: i16x8.narrow_i32x4_u -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 -; CHECK: i8x16.shuffle 4, 5, 12, 13, 20, 21, 28, 29, 6, 7, 14, 15, 22, 23, 30, 31 -; CHECK: v128.store -; CHECK: i8x16.shuffle 0, 1, 8, 9, 16, 17, 24, 25, 2, 3, 10, 11, 18, 19, 26, 27 -; CHECK: v128.store +; CHECK-NOT: v128.load define hidden void @four_floats_four_shorts_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp48.not = icmp eq i32 %N, 0 @@ -2861,62 +2650,7 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: four_floats_four_shorts_vary_op: -; CHECK: loop -; CHECK: v128.load -; CHECK: v128.load -; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: v128.load -; CHECK: v128.load -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: v128.load -; CHECK: v128.load -; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: v128.load -; CHECK: v128.load -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: f32x4.mul -; CHECK: i32x4.trunc_sat_f32x4_s -; CHECK: v128.const 65535, 65535, 65535, 65535 -; CHECK: v128.and -; CHECK: i16x8.narrow_i32x4_u -; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: f32x4.add -; CHECK: i32x4.trunc_sat_f32x4_s -; CHECK: v128.and -; CHECK: i16x8.narrow_i32x4_u -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 -; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: f32x4.div -; CHECK: i32x4.trunc_sat_f32x4_s -; CHECK: v128.and -; CHECK: i16x8.narrow_i32x4_u -; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: f32x4.sub -; CHECK: i32x4.trunc_sat_f32x4_s -; CHECK: v128.and -; CHECK: i16x8.narrow_i32x4_u -; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 -; CHECK: i8x16.shuffle 4, 5, 12, 13, 20, 21, 28, 29, 6, 7, 14, 15, 22, 23, 30, 31 -; CHECK: v128.store -; CHECK: i8x16.shuffle 0, 1, 8, 9, 16, 17, 24, 25, 2, 3, 10, 11, 18, 19, 26, 27 -; CHECK: v128.store +; CHECK-NOT: v128.load define hidden void @four_floats_four_shorts_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp45.not = icmp eq i32 %N, 0 diff --git a/llvm/test/Transforms/LoopVectorize/WebAssembly/memory-interleave.ll b/llvm/test/Transforms/LoopVectorize/WebAssembly/memory-interleave.ll index b26e9cf55ddbf..718e03cfa0c67 100644 --- a/llvm/test/Transforms/LoopVectorize/WebAssembly/memory-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/WebAssembly/memory-interleave.ll @@ -1231,7 +1231,7 @@ define hidden void @scale_uv_row_down2(ptr nocapture noundef readonly %0, i32 no ; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: %14 = load i8 ; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: %20 = load i8 ; CHECK: LV: Found an estimated cost of 7 for VF 8 For instruction: store i8 %48 -; CHECK: LV: Vector loop of width 8 costs: 10. +; CHECK: LV: Vector loop of width 8 costs: 11. ; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: %14 = load i8 ; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: %20 = load i8 ; CHECK: LV: Found an estimated cost of 6 for VF 16 For instruction: store i8 %48 @@ -1442,8 +1442,8 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: LV: Scalar loop costs: 18 -; CHECK: LV: Vector loop of width 2 costs: 23 -; CHECK: LV: Vector loop of width 4 costs: 13 +; CHECK: LV: Vector loop of width 2 costs: 27 +; CHECK: LV: Vector loop of width 4 costs: 15 ; CHECK: LV: Selecting VF: 4. define hidden void @two_bytes_two_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -1484,8 +1484,8 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: LV: Scalar loop costs: 18 -; CHECK: LV: Vector loop of width 2 costs: 23 -; CHECK: LV: Vector loop of width 4 costs: 13 +; CHECK: LV: Vector loop of width 2 costs: 27 +; CHECK: LV: Vector loop of width 4 costs: 15 ; CHECK: LV: Selecting VF: 4. define hidden void @two_bytes_two_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -1526,9 +1526,9 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: LV: Scalar loop costs: 16 -; CHECK: LV: Vector loop of width 2 costs: 21 -; CHECK: LV: Vector loop of width 4 costs: 14. -; CHECK: LV: Selecting VF: 4. +; CHECK: LV: Vector loop of width 2 costs: 26 +; CHECK: LV: Vector loop of width 4 costs: 16. +; CHECK: LV: Selecting VF: 1. define hidden void @two_floats_two_bytes_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp22.not = icmp eq i32 %N, 0 @@ -1566,9 +1566,9 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: LV: Scalar loop costs: 16 -; CHECK: LV: Vector loop of width 2 costs: 21 -; CHECK: LV: Vector loop of width 4 costs: 14. -; CHECK: LV: Selecting VF: 4. +; CHECK: LV: Vector loop of width 2 costs: 26 +; CHECK: LV: Vector loop of width 4 costs: 16. +; CHECK: LV: Selecting VF: 1. define hidden void @two_floats_two_bytes_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp21.not = icmp eq i32 %N, 0 @@ -1608,8 +1608,8 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: LV: Scalar loop costs: 18 -; CHECK: LV: Vector loop of width 2 costs: 22 -; CHECK: LV: Vector loop of width 4 costs: 11. +; CHECK: LV: Vector loop of width 2 costs: 24 +; CHECK: LV: Vector loop of width 4 costs: 12 ; CHECK: LV: Selecting VF: 4. define hidden void @two_shorts_two_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -1652,8 +1652,8 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: LV: Scalar loop costs: 18 -; CHECK: LV: Vector loop of width 2 costs: 22 -; CHECK: LV: Vector loop of width 4 costs: 11. +; CHECK: LV: Vector loop of width 2 costs: 24 +; CHECK: LV: Vector loop of width 4 costs: 12 ; CHECK: LV: Selecting VF: 4. define hidden void @two_shorts_two_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -1696,9 +1696,9 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: LV: Scalar loop costs: 16 -; CHECK: LV: Vector loop of width 2 costs: 20 -; CHECK: LV: Vector loop of width 4 costs: 13. -; CHECK: LV: Selecting VF: 4. +; CHECK: LV: Vector loop of width 2 costs: 23 +; CHECK: LV: Vector loop of width 4 costs: 14 +; CHECK: LV: Selecting VF: 4 define hidden void @two_floats_two_shorts_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp22.not = icmp eq i32 %N, 0 @@ -1738,9 +1738,9 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: LV: Scalar loop costs: 16 -; CHECK: LV: Vector loop of width 2 costs: 20 -; CHECK: LV: Vector loop of width 4 costs: 13. -; CHECK: LV: Selecting VF: 4. +; CHECK: LV: Vector loop of width 2 costs: 23 +; CHECK: LV: Vector loop of width 4 costs: 14 +; CHECK: LV: Selecting VF: 4 define hidden void @two_floats_two_shorts_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp21.not = icmp eq i32 %N, 0 @@ -1883,8 +1883,8 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: LV: Scalar loop costs: 32 -; CHECK: LV: Vector loop of width 2 costs: 43 -; CHECK: LV: Vector loop of width 4 costs: 23 +; CHECK: LV: Vector loop of width 2 costs: 51 +; CHECK: LV: Vector loop of width 4 costs: 27 ; CHECK: LV: Selecting VF: 4 define hidden void @four_bytes_four_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -1943,8 +1943,8 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: LV: Scalar loop costs: 32 -; CHECK: LV: Vector loop of width 2 costs: 43 -; CHECK: LV: Vector loop of width 4 costs: 23 +; CHECK: LV: Vector loop of width 2 costs: 51 +; CHECK: LV: Vector loop of width 4 costs: 27 ; CHECK: LV: Selecting VF: 4 define hidden void @four_bytes_four_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -2004,9 +2004,9 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: LV: Scalar loop costs: 28 -; CHECK: LV: Vector loop of width 2 costs: 38 -; CHECK: LV: Vector loop of width 4 costs: 26 -; CHECK: LV: Selecting VF: 4 +; CHECK: LV: Vector loop of width 2 costs: 48 +; CHECK: LV: Vector loop of width 4 costs: 31 +; CHECK: LV: Selecting VF: 1 define hidden void @four_floats_four_bytes_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp48.not = icmp eq i32 %N, 0 @@ -2061,9 +2061,9 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: LV: Scalar loop costs: 28 -; CHECK: LV: Vector loop of width 2 costs: 38 -; CHECK: LV: Vector loop of width 4 costs: 26 -; CHECK: LV: Selecting VF: 4 +; CHECK: LV: Vector loop of width 2 costs: 48 +; CHECK: LV: Vector loop of width 4 costs: 31 +; CHECK: LV: Selecting VF: 1 define hidden void @four_floats_four_bytes_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp45.not = icmp eq i32 %N, 0 @@ -2119,8 +2119,8 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: LV: Scalar loop costs: 32 -; CHECK: LV: Vector loop of width 2 costs: 37 -; CHECK: LV: Vector loop of width 4 costs: 23 +; CHECK: LV: Vector loop of width 2 costs: 41 +; CHECK: LV: Vector loop of width 4 costs: 25 ; CHECK: LV: Selecting VF: 4 define hidden void @four_shorts_four_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -2181,8 +2181,8 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: LV: Scalar loop costs: 32 -; CHECK: LV: Vector loop of width 2 costs: 37 -; CHECK: LV: Vector loop of width 4 costs: 23 +; CHECK: LV: Vector loop of width 2 costs: 41 +; CHECK: LV: Vector loop of width 4 costs: 25 ; CHECK: LV: Selecting VF: 4 define hidden void @four_shorts_four_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -2243,9 +2243,9 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: LV: Scalar loop costs: 28 -; CHECK: LV: Vector loop of width 2 costs: 35 -; CHECK: LV: Vector loop of width 4 costs: 26 -; CHECK: LV: Selecting VF: 4 +; CHECK: LV: Vector loop of width 2 costs: 41 +; CHECK: LV: Vector loop of width 4 costs: 29 +; CHECK: LV: Selecting VF: 1 define hidden void @four_floats_four_shorts_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp48.not = icmp eq i32 %N, 0 @@ -2301,9 +2301,9 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: LV: Scalar loop costs: 28 -; CHECK: LV: Vector loop of width 2 costs: 35 -; CHECK: LV: Vector loop of width 4 costs: 26 -; CHECK: LV: Selecting VF: 4 +; CHECK: LV: Vector loop of width 2 costs: 41 +; CHECK: LV: Vector loop of width 4 costs: 29 +; CHECK: LV: Selecting VF: 1 define hidden void @four_floats_four_shorts_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp45.not = icmp eq i32 %N, 0 From f6138015ef17b0c522a1b60299659ef1587bb8d0 Mon Sep 17 00:00:00 2001 From: Brandon Wu Date: Mon, 10 Nov 2025 16:09:57 +0800 Subject: [PATCH 04/24] [RISCV][llvm] Support Smpmpmt version 0.6 (#166322) spec: https://github.com/riscv/riscv-isa-manual/blob/smpmpmt/src/smpmpmt.adoc Co-Authored-by: Jesse Huang --- clang/test/Driver/print-supported-extensions-riscv.c | 1 + clang/test/Preprocessor/riscv-target-features.c | 9 +++++++++ llvm/docs/RISCVUsage.rst | 3 +++ llvm/lib/Target/RISCV/RISCVFeatures.td | 3 +++ llvm/test/CodeGen/RISCV/attributes.ll | 4 ++++ llvm/test/CodeGen/RISCV/features-info.ll | 1 + llvm/test/MC/RISCV/attribute-arch.s | 3 +++ llvm/unittests/TargetParser/RISCVISAInfoTest.cpp | 1 + 8 files changed, 25 insertions(+) diff --git a/clang/test/Driver/print-supported-extensions-riscv.c b/clang/test/Driver/print-supported-extensions-riscv.c index cb812736786a9..681c912bd1612 100644 --- a/clang/test/Driver/print-supported-extensions-riscv.c +++ b/clang/test/Driver/print-supported-extensions-riscv.c @@ -227,6 +227,7 @@ // CHECK-NEXT: zvfofp8min 0.2 'Zvfofp8min' (Vector OFP8 Converts) // CHECK-NEXT: zvkgs 0.7 'Zvkgs' (Vector-Scalar GCM instructions for Cryptography) // CHECK-NEXT: zvqdotq 0.0 'Zvqdotq' (Vector quad widening 4D Dot Product) +// CHECK-NEXT: smpmpmt 0.6 'Smpmpmt' (PMP-based Memory Types Extension) // CHECK-NEXT: svukte 0.3 'Svukte' (Address-Independent Latency of User-Mode Faults to Supervisor Addresses) // CHECK-NEXT: xqccmp 0.3 'Xqccmp' (Qualcomm 16-bit Push/Pop and Double Moves) // CHECK-NEXT: xqcia 0.7 'Xqcia' (Qualcomm uC Arithmetic Extension) diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c index 77731a9776be8..56c738bc007fb 100644 --- a/clang/test/Preprocessor/riscv-target-features.c +++ b/clang/test/Preprocessor/riscv-target-features.c @@ -40,6 +40,7 @@ // CHECK-NOT: __riscv_smepmp {{.*$}} // CHECK-NOT: __riscv_smmpm{{.*$}} // CHECK-NOT: __riscv_smnpm{{.*$}} +// CHECK-NOT: __riscv_smpmpmt {{.*$}} // CHECK-NOT: __riscv_smrnmi {{.*$}} // CHECK-NOT: __riscv_smstateen {{.*$}} // CHECK-NOT: __riscv_ssaia {{.*$}} @@ -1333,6 +1334,14 @@ // RUN: -o - | FileCheck --check-prefix=CHECK-SMEPMP-EXT %s // CHECK-SMEPMP-EXT: __riscv_smepmp 1000000{{$}} +// RUN: %clang --target=riscv32 -menable-experimental-extensions \ +// RUN: -march=rv32ismpmpmt0p6 -x c -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-SMPMPMT %s +// RUN: %clang --target=riscv64 -menable-experimental-extensions \ +// RUN: -march=rv64ismpmpmt0p6 -x c -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-SMPMPMT %s +// CHECK-SMPMPMT: __riscv_smpmpmt 6000{{$}} + // RUN: %clang --target=riscv32 \ // RUN: -march=rv32ismrnmi1p0 -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-SMRNMI-EXT %s diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst index d03f383a92b3b..a21f03d389444 100644 --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -351,6 +351,9 @@ The primary goal of experimental support is to assist in the process of ratifica ``experimental-zvqdotq`` LLVM implements the `0.0.1 draft specification `__. +``experimental-smpmpmt`` + LLVM implements the `0.6 draft specification `__. + To use an experimental extension from `clang`, you must add `-menable-experimental-extensions` to the command line, and specify the exact version of the experimental extension you are using. To use an experimental extension with LLVM's internal developer tools (e.g. `llc`, `llvm-objdump`, `llvm-mc`), you must prefix the extension name with `experimental-`. Note that you don't need to specify the version with internal tools, and shouldn't include the `experimental-` prefix with `clang`. Vendor Extensions diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 5b72334f58d45..0b964c4808d8a 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -956,6 +956,9 @@ def FeatureStdExtSsdbltrp def FeatureStdExtSmepmp : RISCVExtension<1, 0, "Enhanced Physical Memory Protection">; +def FeatureStdExtSmpmpmt + : RISCVExperimentalExtension<0, 6, "PMP-based Memory Types Extension">; + def FeatureStdExtSmrnmi : RISCVExtension<1, 0, "Resumable Non-Maskable Interrupts">; def HasStdExtSmrnmi : Predicate<"Subtarget->hasStdExtSmrnmi()">, diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index 22c2d8102b5ca..f26d4f09c92fb 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -125,6 +125,7 @@ ; RUN: llc -mtriple=riscv32 -mattr=+smcdeleg %s -o - | FileCheck --check-prefixes=CHECK,RV32SMCDELEG %s ; RUN: llc -mtriple=riscv32 -mattr=+smcntrpmf %s -o - | FileCheck --check-prefixes=CHECK,RV32SMCNTRPMF %s ; RUN: llc -mtriple=riscv32 -mattr=+smepmp %s -o - | FileCheck --check-prefixes=CHECK,RV32SMEPMP %s +; RUN: llc -mtriple=riscv32 -mattr=+experimental-smpmpmt %s -o - | FileCheck --check-prefixes=CHECK,RV32SMPMPMT %s ; RUN: llc -mtriple=riscv32 -mattr=+smrnmi %s -o - | FileCheck --check-prefixes=CHECK,RV32SMRNMI %s ; RUN: llc -mtriple=riscv32 -mattr=+zfbfmin %s -o - | FileCheck --check-prefixes=CHECK,RV32ZFBFMIN %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zvfbfa %s -o - | FileCheck --check-prefixes=CHECK,RV32ZVFBFA %s @@ -275,6 +276,7 @@ ; RUN: llc -mtriple=riscv64 -mattr=+smcdeleg %s -o - | FileCheck --check-prefixes=CHECK,RV64SMCDELEG %s ; RUN: llc -mtriple=riscv64 -mattr=+smcntrpmf %s -o - | FileCheck --check-prefixes=CHECK,RV64SMCNTRPMF %s ; RUN: llc -mtriple=riscv64 -mattr=+smepmp %s -o - | FileCheck --check-prefixes=CHECK,RV64SMEPMP %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-smpmpmt %s -o - | FileCheck --check-prefixes=CHECK,RV64SMPMPMT %s ; RUN: llc -mtriple=riscv64 -mattr=+smrnmi %s -o - | FileCheck --check-prefixes=CHECK,RV64SMRNMI %s ; RUN: llc -mtriple=riscv64 -mattr=+zfbfmin %s -o - | FileCheck --check-prefixes=CHECK,RV64ZFBFMIN %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zvfbfa %s -o - | FileCheck --check-prefixes=CHECK,RV64ZVFBFA %s @@ -439,6 +441,7 @@ ; RV32SMCDELEG: .attribute 5, "rv32i2p1_smcdeleg1p0" ; RV32SMCNTRPMF: .attribute 5, "rv32i2p1_smcntrpmf1p0" ; RV32SMEPMP: .attribute 5, "rv32i2p1_smepmp1p0" +; RV32SMPMPMT: .attribute 5, "rv32i2p1_smpmpmt0p6" ; RV32SMRNMI: .attribute 5, "rv32i2p1_smrnmi1p0" ; RV32ZFBFMIN: .attribute 5, "rv32i2p1_f2p2_zicsr2p0_zfbfmin1p0" ; RV32ZVFBFA: .attribute 5, "rv32i2p1_f2p2_zicsr2p0_zfbfmin1p0_zve32f1p0_zve32x1p0_zvfbfa0p1_zvl32b1p0" @@ -587,6 +590,7 @@ ; RV64SMCDELEG: .attribute 5, "rv64i2p1_smcdeleg1p0" ; RV64SMCNTRPMF: .attribute 5, "rv64i2p1_smcntrpmf1p0" ; RV64SMEPMP: .attribute 5, "rv64i2p1_smepmp1p0" +; RV64SMPMPMT: .attribute 5, "rv64i2p1_smpmpmt0p6" ; RV64SMRNMI: .attribute 5, "rv64i2p1_smrnmi1p0" ; RV64ZFBFMIN: .attribute 5, "rv64i2p1_f2p2_zicsr2p0_zfbfmin1p0" ; RV64ZVFBFA: .attribute 5, "rv64i2p1_f2p2_zicsr2p0_zfbfmin1p0_zve32f1p0_zve32x1p0_zvfbfa0p1_zvl32b1p0" diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll index cf44af608542c..3d9906fdcbeb3 100644 --- a/llvm/test/CodeGen/RISCV/features-info.ll +++ b/llvm/test/CodeGen/RISCV/features-info.ll @@ -27,6 +27,7 @@ ; CHECK-NEXT: experimental - Experimental intrinsics. ; CHECK-NEXT: experimental-p - 'P' ('Base P' (Packed SIMD)). ; CHECK-NEXT: experimental-rvm23u32 - RISC-V experimental-rvm23u32 profile. +; CHECK-NEXT: experimental-smpmpmt - 'Smpmpmt' (PMP-based Memory Types Extension). ; CHECK-NEXT: experimental-svukte - 'Svukte' (Address-Independent Latency of User-Mode Faults to Supervisor Addresses). ; CHECK-NEXT: experimental-xqccmp - 'Xqccmp' (Qualcomm 16-bit Push/Pop and Double Moves). ; CHECK-NEXT: experimental-xqcia - 'Xqcia' (Qualcomm uC Arithmetic Extension). diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s index 111616df254d3..e41c9eac982a7 100644 --- a/llvm/test/MC/RISCV/attribute-arch.s +++ b/llvm/test/MC/RISCV/attribute-arch.s @@ -348,6 +348,9 @@ .attribute arch, "rv32i_smepmp1p0" # CHECK: attribute 5, "rv32i2p1_smepmp1p0" +.attribute arch, "rv32i_smpmpmt0p6" +# CHECK: attribute 5, "rv32i2p1_smpmpmt0p6" + .attribute arch, "rv32i_smrnmi1p0" # CHECK: attribute 5, "rv32i2p1_smrnmi1p0" diff --git a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp index bfc127530570d..c55cd94048cc5 100644 --- a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp +++ b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp @@ -1204,6 +1204,7 @@ Experimental extensions zvfofp8min 0.2 zvkgs 0.7 zvqdotq 0.0 + smpmpmt 0.6 svukte 0.3 xqccmp 0.3 xqcia 0.7 From 6408703de5a523e331ee47bbb6bea5a13b1a2758 Mon Sep 17 00:00:00 2001 From: Karthika Devi C Date: Mon, 10 Nov 2025 13:48:15 +0530 Subject: [PATCH 05/24] [Polly] Retain vectorization for fallback loop when RTC is unsatisfiable (#165525) When Polly generates a false runtime condition (RTC), the associated Polly generated loop is never executed and is eventually eliminated. As a result, the fallback loop becomes the default execution path. Disabling vectorization for this fallback loop will be counterproductive. This patch ensures that vectorization is only disabled when the RTC is not false (no Codegen failure). --- polly/lib/CodeGen/CodeGeneration.cpp | 24 ++++++++++------ .../CodeGen/Metadata/fallback_vec_annotate.ll | 28 +++++++++++++++++++ 2 files changed, 43 insertions(+), 9 deletions(-) create mode 100644 polly/test/CodeGen/Metadata/fallback_vec_annotate.ll diff --git a/polly/lib/CodeGen/CodeGeneration.cpp b/polly/lib/CodeGen/CodeGeneration.cpp index 2d8b393cc039c..062cdfbcfe3b5 100644 --- a/polly/lib/CodeGen/CodeGeneration.cpp +++ b/polly/lib/CodeGen/CodeGeneration.cpp @@ -235,15 +235,6 @@ static bool generateCode(Scop &S, IslAstInfo &AI, LoopInfo &LI, NodeBuilder.allocateNewArrays(StartExitBlocks); Annotator.buildAliasScopes(S); - // The code below annotates the "llvm.loop.vectorize.enable" to false - // for the code flow taken when RTCs fail. Because we don't want the - // Loop Vectorizer to come in later and vectorize the original fall back - // loop when Polly is enabled. - for (Loop *L : LI.getLoopsInPreorder()) { - if (S.contains(L)) - addStringMetadataToLoop(L, "llvm.loop.vectorize.enable", 0); - } - if (PerfMonitoring) { PerfMonitor P(S, EnteringBB->getParent()->getParent()); P.initialize(); @@ -285,6 +276,21 @@ static bool generateCode(Scop &S, IslAstInfo &AI, LoopInfo &LI, Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC); + auto *CI = dyn_cast(RTC); + // The code below annotates the "llvm.loop.vectorize.enable" to false + // for the code flow taken when RTCs fail. Because we don't want the + // Loop Vectorizer to come in later and vectorize the original fall back + // loop when Polly is enabled. This avoids loop versioning on fallback + // loop by Loop Vectorizer. Don't do this when Polly's RTC value is + // false (due to code generation failure), as we are left with only one + // version of Loop. + if (!(CI && CI->isZero())) { + for (Loop *L : LI.getLoopsInPreorder()) { + if (S.contains(L)) + addStringMetadataToLoop(L, "llvm.loop.vectorize.enable", 0); + } + } + // Explicitly set the insert point to the end of the block to avoid that a // split at the builder's current // insert position would move the malloc calls to the wrong BasicBlock. diff --git a/polly/test/CodeGen/Metadata/fallback_vec_annotate.ll b/polly/test/CodeGen/Metadata/fallback_vec_annotate.ll new file mode 100644 index 0000000000000..317d30649ab1d --- /dev/null +++ b/polly/test/CodeGen/Metadata/fallback_vec_annotate.ll @@ -0,0 +1,28 @@ +; RUN: opt %loadNPMPolly -S -passes=polly-codegen -polly-annotate-metadata-vectorize < %s | FileCheck %s +; RUN: opt %loadNPMPolly -S -passes=polly-codegen < %s | FileCheck %s + +; Verify vectorization is not disabled when RTC of Polly is false + +; CHECK: attributes {{.*}} = { "polly-optimized" } +; CHECK-NOT: {{.*}} = !{!"llvm.loop.vectorize.enable", i32 0} + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "aarch64-unknown-linux-android10000" + +define void @ham(i64 %arg) { +bb: + br label %bb1 + +bb1: ; preds = %bb3, %bb + %phi = phi ptr [ %getelementptr4, %bb3 ], [ null, %bb ] + br label %bb2 + +bb2: ; preds = %bb2, %bb1 + %getelementptr = getelementptr i8, ptr %phi, i64 1 + store i8 0, ptr %getelementptr, align 1 + br i1 false, label %bb2, label %bb3 + +bb3: ; preds = %bb2 + %getelementptr4 = getelementptr i8, ptr %phi, i64 %arg + br label %bb1 +} From 4b433cbdeec7870fac318222d280dd0294ee34e3 Mon Sep 17 00:00:00 2001 From: Baranov Victor Date: Mon, 10 Nov 2025 11:26:38 +0300 Subject: [PATCH 06/24] [clang-tidy] Rename 'cert-err60-cpp' to 'bugprone-exception-copy-constructor-throws' (#164061) Closes https://github.com/llvm/llvm-project/issues/157299. --------- Co-authored-by: Victor Chernyakin --- .../bugprone/BugproneTidyModule.cpp | 3 ++ .../clang-tidy/bugprone/CMakeLists.txt | 1 + .../ExceptionCopyConstructorThrowsCheck.cpp} | 12 ++++--- .../ExceptionCopyConstructorThrowsCheck.h} | 16 +++++----- .../clang-tidy/cert/CERTTidyModule.cpp | 5 +-- .../clang-tidy/cert/CMakeLists.txt | 1 - clang-tools-extra/docs/ReleaseNotes.rst | 4 +++ .../exception-copy-constructor-throws.rst | 31 +++++++++++++++++++ .../docs/clang-tidy/checks/cert/err60-cpp.rst | 9 ++++-- .../docs/clang-tidy/checks/list.rst | 3 ++ .../exception-copy-constructor-throws.cpp} | 4 +-- 11 files changed, 68 insertions(+), 21 deletions(-) rename clang-tools-extra/clang-tidy/{cert/ThrownExceptionTypeCheck.cpp => bugprone/ExceptionCopyConstructorThrowsCheck.cpp} (75%) rename clang-tools-extra/clang-tidy/{cert/ThrownExceptionTypeCheck.h => bugprone/ExceptionCopyConstructorThrowsCheck.h} (58%) create mode 100644 clang-tools-extra/docs/clang-tidy/checks/bugprone/exception-copy-constructor-throws.rst rename clang-tools-extra/test/clang-tidy/checkers/{cert/throw-exception-type.cpp => bugprone/exception-copy-constructor-throws.cpp} (93%) diff --git a/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp b/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp index baea231f6e060..3ba1532334e4a 100644 --- a/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp @@ -30,6 +30,7 @@ #include "DynamicStaticInitializersCheck.h" #include "EasilySwappableParametersCheck.h" #include "EmptyCatchCheck.h" +#include "ExceptionCopyConstructorThrowsCheck.h" #include "ExceptionEscapeCheck.h" #include "FloatLoopCounterCheck.h" #include "FoldInitTypeCheck.h" @@ -155,6 +156,8 @@ class BugproneModule : public ClangTidyModule { CheckFactories.registerCheck( "bugprone-easily-swappable-parameters"); CheckFactories.registerCheck("bugprone-empty-catch"); + CheckFactories.registerCheck( + "bugprone-exception-copy-constructor-throws"); CheckFactories.registerCheck( "bugprone-exception-escape"); CheckFactories.registerCheck( diff --git a/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt b/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt index aacaa61888147..49c467aa5090c 100644 --- a/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt @@ -26,6 +26,7 @@ add_clang_library(clangTidyBugproneModule STATIC DynamicStaticInitializersCheck.cpp EasilySwappableParametersCheck.cpp EmptyCatchCheck.cpp + ExceptionCopyConstructorThrowsCheck.cpp ExceptionEscapeCheck.cpp FloatLoopCounterCheck.cpp FoldInitTypeCheck.cpp diff --git a/clang-tools-extra/clang-tidy/cert/ThrownExceptionTypeCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ExceptionCopyConstructorThrowsCheck.cpp similarity index 75% rename from clang-tools-extra/clang-tidy/cert/ThrownExceptionTypeCheck.cpp rename to clang-tools-extra/clang-tidy/bugprone/ExceptionCopyConstructorThrowsCheck.cpp index 2225a90aeece1..73658459b8e26 100644 --- a/clang-tools-extra/clang-tidy/cert/ThrownExceptionTypeCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ExceptionCopyConstructorThrowsCheck.cpp @@ -6,15 +6,16 @@ // //===----------------------------------------------------------------------===// -#include "ThrownExceptionTypeCheck.h" +#include "ExceptionCopyConstructorThrowsCheck.h" #include "clang/AST/ASTContext.h" #include "clang/ASTMatchers/ASTMatchFinder.h" using namespace clang::ast_matchers; -namespace clang::tidy::cert { +namespace clang::tidy::bugprone { -void ThrownExceptionTypeCheck::registerMatchers(MatchFinder *Finder) { +void ExceptionCopyConstructorThrowsCheck::registerMatchers( + MatchFinder *Finder) { Finder->addMatcher( traverse( TK_AsIs, @@ -25,10 +26,11 @@ void ThrownExceptionTypeCheck::registerMatchers(MatchFinder *Finder) { this); } -void ThrownExceptionTypeCheck::check(const MatchFinder::MatchResult &Result) { +void ExceptionCopyConstructorThrowsCheck::check( + const MatchFinder::MatchResult &Result) { const auto *E = Result.Nodes.getNodeAs("expr"); diag(E->getExprLoc(), "thrown exception type is not nothrow copy constructible"); } -} // namespace clang::tidy::cert +} // namespace clang::tidy::bugprone diff --git a/clang-tools-extra/clang-tidy/cert/ThrownExceptionTypeCheck.h b/clang-tools-extra/clang-tidy/bugprone/ExceptionCopyConstructorThrowsCheck.h similarity index 58% rename from clang-tools-extra/clang-tidy/cert/ThrownExceptionTypeCheck.h rename to clang-tools-extra/clang-tidy/bugprone/ExceptionCopyConstructorThrowsCheck.h index 41a5145209686..f1d7cca0e5bad 100644 --- a/clang-tools-extra/clang-tidy/cert/ThrownExceptionTypeCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/ExceptionCopyConstructorThrowsCheck.h @@ -6,20 +6,20 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CERT_THROWNEXCEPTIONTYPECHECK_H -#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CERT_THROWNEXCEPTIONTYPECHECK_H +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_EXCEPTIONCOPYCONSTRUCTORTHROWSCHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_EXCEPTIONCOPYCONSTRUCTORTHROWSCHECK_H #include "../ClangTidyCheck.h" -namespace clang::tidy::cert { +namespace clang::tidy::bugprone { /// Checks whether a thrown object is nothrow copy constructible. /// /// For the user-facing documentation see: -/// https://clang.llvm.org/extra/clang-tidy/checks/cert/err60-cpp.html -class ThrownExceptionTypeCheck : public ClangTidyCheck { +/// https://clang.llvm.org/extra/clang-tidy/checks/bugprone/exception-copy-constructor-throws.html +class ExceptionCopyConstructorThrowsCheck : public ClangTidyCheck { public: - ThrownExceptionTypeCheck(StringRef Name, ClangTidyContext *Context) + ExceptionCopyConstructorThrowsCheck(StringRef Name, ClangTidyContext *Context) : ClangTidyCheck(Name, Context) {} bool isLanguageVersionSupported(const LangOptions &LangOpts) const override { return LangOpts.CPlusPlus; @@ -28,6 +28,6 @@ class ThrownExceptionTypeCheck : public ClangTidyCheck { void check(const ast_matchers::MatchFinder::MatchResult &Result) override; }; -} // namespace clang::tidy::cert +} // namespace clang::tidy::bugprone -#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CERT_THROWNEXCEPTIONTYPECHECK_H +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_EXCEPTIONCOPYCONSTRUCTORTHROWSCHECK_H diff --git a/clang-tools-extra/clang-tidy/cert/CERTTidyModule.cpp b/clang-tools-extra/clang-tidy/cert/CERTTidyModule.cpp index 2f6fc4db46545..6dbcecee1e023 100644 --- a/clang-tools-extra/clang-tidy/cert/CERTTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/cert/CERTTidyModule.cpp @@ -13,6 +13,7 @@ #include "../bugprone/CommandProcessorCheck.h" #include "../bugprone/CopyConstructorMutatesArgumentCheck.h" #include "../bugprone/DefaultOperatorNewOnOveralignedTypeCheck.h" +#include "../bugprone/ExceptionCopyConstructorThrowsCheck.h" #include "../bugprone/FloatLoopCounterCheck.h" #include "../bugprone/PointerArithmeticOnPolymorphicObjectCheck.h" #include "../bugprone/RawMemoryCallOnNonTrivialTypeCheck.h" @@ -41,7 +42,6 @@ #include "../readability/UppercaseLiteralSuffixCheck.h" #include "LimitedRandomnessCheck.h" #include "ProperlySeededRandomGeneratorCheck.h" -#include "ThrownExceptionTypeCheck.h" namespace { @@ -262,7 +262,8 @@ class CERTModule : public ClangTidyModule { "cert-err52-cpp"); CheckFactories.registerCheck( "cert-err58-cpp"); - CheckFactories.registerCheck("cert-err60-cpp"); + CheckFactories.registerCheck( + "cert-err60-cpp"); CheckFactories.registerCheck( "cert-err61-cpp"); // MEM diff --git a/clang-tools-extra/clang-tidy/cert/CMakeLists.txt b/clang-tools-extra/clang-tidy/cert/CMakeLists.txt index 5abb47277e78f..81015a02023ba 100644 --- a/clang-tools-extra/clang-tidy/cert/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/cert/CMakeLists.txt @@ -7,7 +7,6 @@ add_clang_library(clangTidyCERTModule STATIC CERTTidyModule.cpp LimitedRandomnessCheck.cpp ProperlySeededRandomGeneratorCheck.cpp - ThrownExceptionTypeCheck.cpp LINK_LIBS clangTidy diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 48a2a1f5d39d5..c233301a08f36 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -269,6 +269,10 @@ New check aliases ` keeping initial check as an alias to the new one. +- Renamed :doc:`cert-err60-cpp ` to + :doc:`bugprone-exception-copy-constructor-throws + ` + - Renamed :doc:`cert-flp30-c ` to :doc:`bugprone-float-loop-counter ` diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/exception-copy-constructor-throws.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/exception-copy-constructor-throws.rst new file mode 100644 index 0000000000000..8c3becf80a541 --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/exception-copy-constructor-throws.rst @@ -0,0 +1,31 @@ +.. title:: clang-tidy - bugprone-exception-copy-constructor-throws + +bugprone-exception-copy-constructor-throws +========================================== + +Checks whether a thrown object's copy constructor can throw. + +Exception objects are required to be copy constructible in C++. However, an +exception's copy constructor should not throw to avoid potential issues when +unwinding the stack. If an exception is thrown during stack unwinding (such +as from a copy constructor of an exception object), the program will +terminate via ``std::terminate``. + +.. code-block:: c++ + + class SomeException { + public: + SomeException() = default; + SomeException(const SomeException&) { /* may throw */ } + }; + + void f() { + throw SomeException(); // warning: thrown exception type's copy constructor can throw + } + +References +---------- + +This check corresponds to the CERT C++ Coding Standard rule +`ERR60-CPP. Exception objects must be nothrow copy constructible +`_. \ No newline at end of file diff --git a/clang-tools-extra/docs/clang-tidy/checks/cert/err60-cpp.rst b/clang-tools-extra/docs/clang-tidy/checks/cert/err60-cpp.rst index 9fcb840fc06f8..8d6dd1bf4b9b7 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/cert/err60-cpp.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/cert/err60-cpp.rst @@ -1,11 +1,14 @@ .. title:: clang-tidy - cert-err60-cpp +.. meta:: + :http-equiv=refresh: 5;URL=../bugprone/exception-copy-constructor-throws.html cert-err60-cpp ============== -This check flags all throw expressions where the exception object is not nothrow -copy constructible. +The `cert-err60-cpp` check is an alias, please see +`bugprone-exception-copy-constructor-throws <../bugprone/exception-copy-constructor-throws.html>`_ +for more information. This check corresponds to the CERT C++ Coding Standard rule `ERR60-CPP. Exception objects must be nothrow copy constructible -`_. +`_. diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index e2875604af72b..3b0ff3ef33365 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -98,6 +98,7 @@ Clang-Tidy Checks :doc:`bugprone-dynamic-static-initializers `, :doc:`bugprone-easily-swappable-parameters `, :doc:`bugprone-empty-catch `, + :doc:`bugprone-exception-copy-constructor-throws `, :doc:`bugprone-exception-escape `, :doc:`bugprone-float-loop-counter `, :doc:`bugprone-fold-init-type `, @@ -180,6 +181,7 @@ Clang-Tidy Checks :doc:`bugprone-virtual-near-miss `, "Yes" :doc:`cert-err33-c `, :doc:`cert-err60-cpp `, + :doc:`cert-flp30-c `, :doc:`cert-msc50-cpp `, :doc:`cert-msc51-cpp `, :doc:`cert-oop58-cpp `, @@ -449,6 +451,7 @@ Check aliases :doc:`cert-err34-c `, :doc:`bugprone-unchecked-string-to-number-conversion `, :doc:`cert-err52-cpp `, :doc:`modernize-avoid-setjmp-longjmp `, :doc:`cert-err58-cpp `, :doc:`bugprone-throwing-static-initialization `, + :doc:`cert-err60-cpp `, :doc:`bugprone-exception-copy-constructor-throws `, :doc:`cert-err61-cpp `, :doc:`misc-throw-by-value-catch-by-reference `, :doc:`cert-exp42-c `, :doc:`bugprone-suspicious-memory-comparison `, :doc:`cert-fio38-c `, :doc:`misc-non-copyable-objects `, diff --git a/clang-tools-extra/test/clang-tidy/checkers/cert/throw-exception-type.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-copy-constructor-throws.cpp similarity index 93% rename from clang-tools-extra/test/clang-tidy/checkers/cert/throw-exception-type.cpp rename to clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-copy-constructor-throws.cpp index 34ca83795c397..7e2d586175c1b 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/cert/throw-exception-type.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-copy-constructor-throws.cpp @@ -1,4 +1,4 @@ -// RUN: %check_clang_tidy -std=c++11,c++14 %s cert-err60-cpp %t -- -- -fcxx-exceptions +// RUN: %check_clang_tidy -std=c++11,c++14 %s bugprone-exception-copy-constructor-throws %t -- -- -fcxx-exceptions // FIXME: Split off parts of this test that rely on dynamic exception // specifications, and run this test in all language modes. // FIXME: Fix the checker to work in C++17 or later mode. @@ -92,7 +92,7 @@ void f() { throw U(); // ok throw V(); // ok throw W(); // match, noexcept(false) - // CHECK-MESSAGES: :[[@LINE-1]]:9: warning: thrown exception type is not nothrow copy constructible [cert-err60-cpp] + // CHECK-MESSAGES: :[[@LINE-1]]:9: warning: thrown exception type is not nothrow copy constructible [bugprone-exception-copy-constructor-throws] throw X(); // match, no noexcept clause, nontrivial // CHECK-MESSAGES: :[[@LINE-1]]:9: warning: thrown exception type is not nothrow copy constructible throw Y(); // ok From abdb9a0ec57e84ad9f09642ac6c910184aab31bd Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 10 Nov 2025 08:26:46 +0000 Subject: [PATCH 07/24] [gn build] Port 4b433cbdeec7 --- .../gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn | 1 + .../gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn index b01cfb9f4c915..de812cd7d5561 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn @@ -38,6 +38,7 @@ static_library("bugprone") { "DynamicStaticInitializersCheck.cpp", "EasilySwappableParametersCheck.cpp", "EmptyCatchCheck.cpp", + "ExceptionCopyConstructorThrowsCheck.cpp", "ExceptionEscapeCheck.cpp", "FloatLoopCounterCheck.cpp", "FoldInitTypeCheck.cpp", diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn index 18708f68b59c5..65c149b9d9360 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn @@ -18,6 +18,5 @@ static_library("cert") { "CERTTidyModule.cpp", "LimitedRandomnessCheck.cpp", "ProperlySeededRandomGeneratorCheck.cpp", - "ThrownExceptionTypeCheck.cpp", ] } From d0081aa92923cf5ee1d2ec4c6191a492c5a8cf25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= Date: Mon, 10 Nov 2025 09:38:15 +0100 Subject: [PATCH 08/24] [NFC][SPIRV] Make the zero-length-array.ll test explicit about what is generated (#166910) This patch doesn't change anything. Just adds more explicit checks to verify what is generated in this case when an alloca has a zero-sized array. I'd expect an `OpRuntimeArray`, but nothing is generated. --- llvm/test/CodeGen/SPIRV/zero-length-array.ll | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/llvm/test/CodeGen/SPIRV/zero-length-array.ll b/llvm/test/CodeGen/SPIRV/zero-length-array.ll index 666176c87adb6..5fd94d25dfd87 100644 --- a/llvm/test/CodeGen/SPIRV/zero-length-array.ll +++ b/llvm/test/CodeGen/SPIRV/zero-length-array.ll @@ -1,10 +1,17 @@ ; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} -; CHECK: %[[#type:]] = OpTypeInt 32 0 -; CHECK: %[[#ext:]] = OpConstant %[[#type]] 0 +; Nothing is generated, but compilation doesn't crash. +; CHECK: OpName %[[#FOO:]] "foo" +; CHECK: OpName %[[#RTM:]] "reg2mem alloca point" +; CHECK: %[[#INT:]] = OpTypeInt 32 0 +; CHECK: %[[#RTM]] = OpConstant %[[#INT]] 0 +; CHECK: %[[#FOO]] = OpFunction +; CHECK-NEXT: = OpLabel +; CHECK-NEXT: OpReturn +; CHECK-NEXT: OpFunctionEnd -define spir_func void @_Z3foov() { +define spir_func void @foo() { entry: %i = alloca [0 x i32], align 4 ret void From eaa889ab1791618eddc6a22d777750ac936b65a1 Mon Sep 17 00:00:00 2001 From: Baranov Victor Date: Mon, 10 Nov 2025 11:41:21 +0300 Subject: [PATCH 09/24] [clang-tidy] Add fine-graded configuration for 'bugprone-exception-escape' (#164081) Need these options to complete https://github.com/llvm/llvm-project/issues/160825, but I think it's generally beneficial to fine-tune this check. --------- Co-authored-by: EugeneZelenko Co-authored-by: Victor Chernyakin --- .../bugprone/ExceptionEscapeCheck.cpp | 42 +++++++++++++---- .../bugprone/ExceptionEscapeCheck.h | 7 +++ clang-tools-extra/docs/ReleaseNotes.rst | 4 +- .../checks/bugprone/exception-escape.rst | 25 ++++++++++ .../bugprone/exception-escape-options.cpp | 47 +++++++++++++++++++ 5 files changed, 114 insertions(+), 11 deletions(-) create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape-options.cpp diff --git a/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.cpp index 837a86ff8655e..b7de8395ffa05 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.cpp @@ -36,13 +36,22 @@ ExceptionEscapeCheck::ExceptionEscapeCheck(StringRef Name, ClangTidyContext *Context) : ClangTidyCheck(Name, Context), RawFunctionsThatShouldNotThrow(Options.get( "FunctionsThatShouldNotThrow", "")), - RawIgnoredExceptions(Options.get("IgnoredExceptions", "")) { + RawIgnoredExceptions(Options.get("IgnoredExceptions", "")), + RawCheckedSwapFunctions( + Options.get("CheckedSwapFunctions", "swap,iter_swap,iter_move")), + CheckDestructors(Options.get("CheckDestructors", true)), + CheckMoveMemberFunctions(Options.get("CheckMoveMemberFunctions", true)), + CheckMain(Options.get("CheckMain", true)), + CheckNothrowFunctions(Options.get("CheckNothrowFunctions", true)) { llvm::SmallVector FunctionsThatShouldNotThrowVec, - IgnoredExceptionsVec; + IgnoredExceptionsVec, CheckedSwapFunctionsVec; RawFunctionsThatShouldNotThrow.split(FunctionsThatShouldNotThrowVec, ",", -1, false); FunctionsThatShouldNotThrow.insert_range(FunctionsThatShouldNotThrowVec); + RawCheckedSwapFunctions.split(CheckedSwapFunctionsVec, ",", -1, false); + CheckedSwapFunctions.insert_range(CheckedSwapFunctionsVec); + llvm::StringSet<> IgnoredExceptions; RawIgnoredExceptions.split(IgnoredExceptionsVec, ",", -1, false); IgnoredExceptions.insert_range(IgnoredExceptionsVec); @@ -54,20 +63,33 @@ void ExceptionEscapeCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { Options.store(Opts, "FunctionsThatShouldNotThrow", RawFunctionsThatShouldNotThrow); Options.store(Opts, "IgnoredExceptions", RawIgnoredExceptions); + Options.store(Opts, "CheckedSwapFunctions", RawCheckedSwapFunctions); + Options.store(Opts, "CheckDestructors", CheckDestructors); + Options.store(Opts, "CheckMoveMemberFunctions", CheckMoveMemberFunctions); + Options.store(Opts, "CheckMain", CheckMain); + Options.store(Opts, "CheckNothrowFunctions", CheckNothrowFunctions); } void ExceptionEscapeCheck::registerMatchers(MatchFinder *Finder) { + auto MatchIf = [](bool Enabled, const auto &Matcher) { + ast_matchers::internal::Matcher Nothing = unless(anything()); + return Enabled ? Matcher : Nothing; + }; Finder->addMatcher( functionDecl( isDefinition(), - anyOf(isNoThrow(), - allOf(anyOf(cxxDestructorDecl(), - cxxConstructorDecl(isMoveConstructor()), - cxxMethodDecl(isMoveAssignmentOperator()), isMain(), - allOf(hasAnyName("swap", "iter_swap", "iter_move"), - hasAtLeastOneParameter())), - unless(isExplicitThrow())), - isEnabled(FunctionsThatShouldNotThrow))) + anyOf( + MatchIf(CheckNothrowFunctions, isNoThrow()), + allOf(anyOf(MatchIf(CheckDestructors, cxxDestructorDecl()), + MatchIf( + CheckMoveMemberFunctions, + anyOf(cxxConstructorDecl(isMoveConstructor()), + cxxMethodDecl(isMoveAssignmentOperator()))), + MatchIf(CheckMain, isMain()), + allOf(isEnabled(CheckedSwapFunctions), + hasAtLeastOneParameter())), + unless(isExplicitThrow())), + isEnabled(FunctionsThatShouldNotThrow))) .bind("thrower"), this); } diff --git a/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.h b/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.h index 31d9e85082c52..c3bf4a4335273 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.h @@ -35,8 +35,15 @@ class ExceptionEscapeCheck : public ClangTidyCheck { private: StringRef RawFunctionsThatShouldNotThrow; StringRef RawIgnoredExceptions; + StringRef RawCheckedSwapFunctions; + + const bool CheckDestructors; + const bool CheckMoveMemberFunctions; + const bool CheckMain; + const bool CheckNothrowFunctions; llvm::StringSet<> FunctionsThatShouldNotThrow; + llvm::StringSet<> CheckedSwapFunctions; utils::ExceptionAnalyzer Tracer; }; diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index c233301a08f36..f3d5b6f43a227 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -306,7 +306,9 @@ Changes in existing checks exceptions from captures are now diagnosed, exceptions in the bodies of lambdas that aren't actually invoked are not. Additionally, fixed an issue where the check wouldn't diagnose throws in arguments to functions or - constructors. + constructors. Added fine-grained configuration via options + `CheckDestructors`, `CheckMoveMemberFunctions`, `CheckMain`, + `CheckedSwapFunctions`, and `CheckNothrowFunctions`. - Improved :doc:`bugprone-infinite-loop ` check by adding detection for diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/exception-escape.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/exception-escape.rst index 182fade7f47a0..7eaa333d5403a 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/exception-escape.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/exception-escape.rst @@ -35,6 +35,31 @@ WARNING! This check may be expensive on large source files. Options ------- +.. option:: CheckDestructors + + When `true`, destructors are analyzed to not throw exceptions. + Default value is `true`. + +.. option:: CheckMoveMemberFunctions + + When `true`, move constructors and move assignment operators are analyzed + to not throw exceptions. Default value is `true`. + +.. option:: CheckMain + + When `true`, the ``main()`` function is analyzed to not throw exceptions. + Default value is `true`. + +.. option:: CheckNothrowFunctions + + When `true`, functions marked with ``noexcept`` or ``throw()`` exception + specifications are analyzed to not throw exceptions. Default value is `true`. + +.. option:: CheckedSwapFunctions + + Comma-separated list of swap function names which should not throw exceptions. + Default value is `swap,iter_swap,iter_move`. + .. option:: FunctionsThatShouldNotThrow Comma separated list containing function names which should not throw. An diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape-options.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape-options.cpp new file mode 100644 index 0000000000000..48c9bacd1b2e5 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape-options.cpp @@ -0,0 +1,47 @@ +// RUN: %check_clang_tidy -std=c++11-or-later %s bugprone-exception-escape %t -- \ +// RUN: -config="{CheckOptions: { \ +// RUN: bugprone-exception-escape.CheckDestructors: false, \ +// RUN: bugprone-exception-escape.CheckMoveMemberFunctions: false, \ +// RUN: bugprone-exception-escape.CheckMain: false, \ +// RUN: bugprone-exception-escape.CheckedSwapFunctions: '', \ +// RUN: bugprone-exception-escape.CheckNothrowFunctions: false \ +// RUN: }}" \ +// RUN: -- -fexceptions + +// CHECK-MESSAGES-NOT: warning: + +struct destructor { + ~destructor() { + throw 1; + } +}; + +struct move { + move(move&&) { throw 42; } + move& operator=(move&&) { throw 42; } +}; + +void swap(int&, int&) { + throw 1; +} + +void iter_swap(int&, int&) { + throw 1; +} + +void iter_move(int&) { + throw 1; +} + +void nothrow_func() throw() { + throw 1; +} + +void noexcept_func() noexcept { + throw 1; +} + +int main() { + throw 1; + return 0; +} From 3637f66b4d819c1cef6a3b58466fc8b1f983cfe0 Mon Sep 17 00:00:00 2001 From: Elvis Wang Date: Mon, 10 Nov 2025 17:01:43 +0800 Subject: [PATCH 10/24] [RISCV][TTI] Fix crash of non-built-in vector type cost quering. (#167258) For the non-built-in vector type, the RISCV cost model cannot handle this properly. So fall back to the BasicTTI for this situation. Fixes: #166732 --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 3 ++- llvm/test/Analysis/CostModel/RISCV/cast.ll | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 332433b4e530b..3d8eb4097604a 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1683,7 +1683,8 @@ InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, !TypeSize::isKnownLE(DL.getTypeSizeInBits(Src), SrcLT.second.getSizeInBits()) || !TypeSize::isKnownLE(DL.getTypeSizeInBits(Dst), - DstLT.second.getSizeInBits())) + DstLT.second.getSizeInBits()) || + SrcLT.first > 1 || DstLT.first > 1) return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); // The split cost is handled by the base getCastInstrCost diff --git a/llvm/test/Analysis/CostModel/RISCV/cast.ll b/llvm/test/Analysis/CostModel/RISCV/cast.ll index e64bce2d9c9e5..6dacd59f07fde 100644 --- a/llvm/test/Analysis/CostModel/RISCV/cast.ll +++ b/llvm/test/Analysis/CostModel/RISCV/cast.ll @@ -6239,3 +6239,13 @@ define void @legalization_crash() { fptoui <192 x float> undef to <192 x i1> ret void } + +; Test that types that need to be split go through BasicTTIImpl. +define void @BitInt_crash() { +; ZVE64X-LABEL: 'BitInt_crash' +; ZVE64X-NEXT: Cost Model: Found an estimated cost of 2043 for instruction: %1 = bitcast <16 x i64> poison to <512 x i2> +; ZVE64X-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + bitcast <16 x i64> poison to <512 x i2> + ret void +} From 152bda726958c45be709270bc8a5e1fda642f375 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Mon, 10 Nov 2025 10:09:14 +0100 Subject: [PATCH 11/24] [libc++] Replace the last uses of __tuple_types with __type_list (#167214) `__tuple_types` is at this point just a `__type_list` with a weird name, so we can just replace the few places it's still used. --- libcxx/include/CMakeLists.txt | 1 - libcxx/include/__tuple/tuple_size.h | 1 - libcxx/include/__tuple/tuple_types.h | 25 ------------------------- libcxx/include/module.modulemap.in | 1 - libcxx/include/tuple | 14 +++++++------- 5 files changed, 7 insertions(+), 35 deletions(-) delete mode 100644 libcxx/include/__tuple/tuple_types.h diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 46e17b584432e..09d4552664dd7 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -786,7 +786,6 @@ set(files __tuple/tuple_like.h __tuple/tuple_like_no_subrange.h __tuple/tuple_size.h - __tuple/tuple_types.h __type_traits/add_cv_quals.h __type_traits/add_pointer.h __type_traits/add_reference.h diff --git a/libcxx/include/__tuple/tuple_size.h b/libcxx/include/__tuple/tuple_size.h index 60f2a667a1ba3..719edc0e342c0 100644 --- a/libcxx/include/__tuple/tuple_size.h +++ b/libcxx/include/__tuple/tuple_size.h @@ -12,7 +12,6 @@ #include <__config> #include <__cstddef/size_t.h> #include <__fwd/tuple.h> -#include <__tuple/tuple_types.h> #include <__type_traits/enable_if.h> #include <__type_traits/integral_constant.h> #include <__type_traits/is_const.h> diff --git a/libcxx/include/__tuple/tuple_types.h b/libcxx/include/__tuple/tuple_types.h deleted file mode 100644 index 7e1256cf8790e..0000000000000 --- a/libcxx/include/__tuple/tuple_types.h +++ /dev/null @@ -1,25 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef _LIBCPP___TUPLE_TUPLE_TYPES_H -#define _LIBCPP___TUPLE_TUPLE_TYPES_H - -#include <__config> - -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif - -_LIBCPP_BEGIN_NAMESPACE_STD - -template -struct __tuple_types {}; - -_LIBCPP_END_NAMESPACE_STD - -#endif // _LIBCPP___TUPLE_TUPLE_TYPES_H diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in index f77c885da5b6a..2266a1d1d4c1c 100644 --- a/libcxx/include/module.modulemap.in +++ b/libcxx/include/module.modulemap.in @@ -2124,7 +2124,6 @@ module std [system] { module tuple_like_no_subrange { header "__tuple/tuple_like_no_subrange.h" } module tuple_like { header "__tuple/tuple_like.h" } module tuple_size { header "__tuple/tuple_size.h" } - module tuple_types { header "__tuple/tuple_types.h" } header "tuple" export * diff --git a/libcxx/include/tuple b/libcxx/include/tuple index 3c5330dd6e14e..a960b64a71763 100644 --- a/libcxx/include/tuple +++ b/libcxx/include/tuple @@ -235,7 +235,6 @@ template # include <__tuple/tuple_element.h> # include <__tuple/tuple_like.h> # include <__tuple/tuple_size.h> -# include <__tuple/tuple_types.h> # include <__type_traits/common_reference.h> # include <__type_traits/common_type.h> # include <__type_traits/conditional.h> @@ -265,6 +264,7 @@ template # include <__type_traits/remove_cv.h> # include <__type_traits/remove_cvref.h> # include <__type_traits/remove_reference.h> +# include <__type_traits/type_list.h> # include <__type_traits/unwrap_ref.h> # include <__utility/declval.h> # include <__utility/forward.h> @@ -571,7 +571,7 @@ __memberwise_copy_assign(_Dest& __dest, _Source const& __source, __index_sequenc template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void -__memberwise_forward_assign(_Dest& __dest, _Source&& __source, __tuple_types<_Up...>, __index_sequence<_Np...>) { +__memberwise_forward_assign(_Dest& __dest, _Source&& __source, __type_list<_Up...>, __index_sequence<_Np...>) { std::__swallow(((std::get<_Np>(__dest) = std::forward<_Up>(std::get<_Np>(__source))), void(), 0)...); } @@ -876,7 +876,7 @@ public: requires(_And...>::value) { std::__memberwise_forward_assign( - *this, std::move(__tuple), __tuple_types<_Tp...>(), __make_index_sequence()); + *this, std::move(__tuple), __type_list<_Tp...>(), __make_index_sequence()); return *this; } # endif // _LIBCPP_STD_VER >= 23 @@ -885,7 +885,7 @@ public: operator=(_If<_And...>::value, tuple, __nat>&& __tuple) noexcept( _And...>::value) { std::__memberwise_forward_assign( - *this, std::move(__tuple), __tuple_types<_Tp...>(), __make_index_sequence()); + *this, std::move(__tuple), __type_list<_Tp...>(), __make_index_sequence()); return *this; } @@ -905,7 +905,7 @@ public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 tuple& operator=(tuple<_Up...>&& __tuple) noexcept(_And...>::value) { std::__memberwise_forward_assign( - *this, std::move(__tuple), __tuple_types<_Up...>(), __make_index_sequence()); + *this, std::move(__tuple), __type_list<_Up...>(), __make_index_sequence()); return *this; } @@ -922,7 +922,7 @@ public: enable_if_t< _And<_BoolConstant, is_assignable...>::value>* = nullptr> _LIBCPP_HIDE_FROM_ABI constexpr const tuple& operator=(tuple<_UTypes...>&& __u) const { - std::__memberwise_forward_assign(*this, __u, __tuple_types<_UTypes...>(), __make_index_sequence()); + std::__memberwise_forward_assign(*this, __u, __type_list<_UTypes...>(), __make_index_sequence()); return *this; } # endif // _LIBCPP_STD_VER >= 23 @@ -1000,7 +1000,7 @@ public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 tuple& operator=(array<_Up, _Np>&& __array) noexcept(_And...>::value) { std::__memberwise_forward_assign( - *this, std::move(__array), __tuple_types<_If...>(), __make_index_sequence()); + *this, std::move(__array), __type_list<_If...>(), __make_index_sequence()); return *this; } From 6ef174c44c9bd2847aef06f1dcdabdd1fb5834c5 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 10 Nov 2025 09:09:37 +0000 Subject: [PATCH 12/24] [gn build] Port 152bda726958 --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 27bd2ce9849f6..66531c706ac7b 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -1432,7 +1432,6 @@ if (current_toolchain == default_toolchain) { "__tuple/tuple_like.h", "__tuple/tuple_like_no_subrange.h", "__tuple/tuple_size.h", - "__tuple/tuple_types.h", "__type_traits/add_cv_quals.h", "__type_traits/add_pointer.h", "__type_traits/add_reference.h", From 471dbb90cb35e45cb33512b5022b4d02f319b54e Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Mon, 10 Nov 2025 11:00:05 +0100 Subject: [PATCH 13/24] [libc++] Replace __libcpp_is_final with a variable template (#167137) --- libcxx/include/__exception/nested_exception.h | 2 +- libcxx/include/__memory/compressed_pair.h | 2 +- libcxx/include/__type_traits/is_final.h | 2 +- libcxx/include/tuple | 2 +- .../util.smartptr.shared/libcxx.control_block_layout.pass.cpp | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/libcxx/include/__exception/nested_exception.h b/libcxx/include/__exception/nested_exception.h index 90b14158d57a2..dc3266a27cdfd 100644 --- a/libcxx/include/__exception/nested_exception.h +++ b/libcxx/include/__exception/nested_exception.h @@ -73,7 +73,7 @@ template __throw_with_nested<_Tp, _Up, is_class<_Up>::value && !is_base_of::value && - !__libcpp_is_final<_Up>::value>::__do_throw(std::forward<_Tp>(__t)); + !__is_final_v<_Up> >::__do_throw(std::forward<_Tp>(__t)); #else ((void)__t); // FIXME: Make this abort diff --git a/libcxx/include/__memory/compressed_pair.h b/libcxx/include/__memory/compressed_pair.h index 0388d752ccc8b..f1f1c920453cf 100644 --- a/libcxx/include/__memory/compressed_pair.h +++ b/libcxx/include/__memory/compressed_pair.h @@ -67,7 +67,7 @@ inline const size_t __compressed_pair_alignment<_Tp&> = _LIBCPP_ALIGNOF(void*); template inline const bool __is_reference_or_unpadded_object = - (is_empty<_ToPad>::value && !__libcpp_is_final<_ToPad>::value) || sizeof(_ToPad) == __datasizeof_v<_ToPad>; + (is_empty<_ToPad>::value && !__is_final_v<_ToPad>) || sizeof(_ToPad) == __datasizeof_v<_ToPad>; template inline const bool __is_reference_or_unpadded_object<_Tp&> = true; diff --git a/libcxx/include/__type_traits/is_final.h b/libcxx/include/__type_traits/is_final.h index e9ef1425c9760..ab1cace52c4f6 100644 --- a/libcxx/include/__type_traits/is_final.h +++ b/libcxx/include/__type_traits/is_final.h @@ -19,7 +19,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template -struct __libcpp_is_final : integral_constant {}; +inline const bool __is_final_v = __is_final(_Tp); #if _LIBCPP_STD_VER >= 14 template diff --git a/libcxx/include/tuple b/libcxx/include/tuple index a960b64a71763..0cfcd9a4fd9c5 100644 --- a/libcxx/include/tuple +++ b/libcxx/include/tuple @@ -347,7 +347,7 @@ using __tuple_common_comparison_category _LIBCPP_NODEBUG = // __tuple_leaf -template ::value && !__libcpp_is_final<_Hp>::value > +template ::value && !__is_final_v<_Hp> > class __tuple_leaf; template diff --git a/libcxx/test/libcxx/utilities/memory/util.smartptr/util.smartptr.shared/libcxx.control_block_layout.pass.cpp b/libcxx/test/libcxx/utilities/memory/util.smartptr/util.smartptr.shared/libcxx.control_block_layout.pass.cpp index 0b48bc92f02af..9cb5b2ffbae97 100644 --- a/libcxx/test/libcxx/utilities/memory/util.smartptr/util.smartptr.shared/libcxx.control_block_layout.pass.cpp +++ b/libcxx/test/libcxx/utilities/memory/util.smartptr/util.smartptr.shared/libcxx.control_block_layout.pass.cpp @@ -30,7 +30,7 @@ struct value_init_tag {}; -template ::value && !std::__libcpp_is_final::value> +template ::value && !std::__is_final_v> struct compressed_pair_elem { explicit compressed_pair_elem(value_init_tag) : value_() {} From 0b52b829552f9f5cc9712c2f5047c3ebedccacb7 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Mon, 10 Nov 2025 11:00:59 +0100 Subject: [PATCH 14/24] [libc++] Merge insert/emplace(const_iterator, Args...) implementations (#166470) --- libcxx/include/deque | 116 +++++-------------------------------------- 1 file changed, 12 insertions(+), 104 deletions(-) diff --git a/libcxx/include/deque b/libcxx/include/deque index ab41b9db9de26..cbf4b98e07a5b 100644 --- a/libcxx/include/deque +++ b/libcxx/include/deque @@ -779,6 +779,10 @@ public: // 23.2.2.3 modifiers: _LIBCPP_HIDE_FROM_ABI void push_front(const value_type& __v); _LIBCPP_HIDE_FROM_ABI void push_back(const value_type& __v); + + template + _LIBCPP_HIDE_FROM_ABI iterator __emplace(const_iterator __p, _Args&&... __args); + # ifndef _LIBCPP_CXX03_LANG # if _LIBCPP_STD_VER >= 17 template @@ -791,8 +795,11 @@ public: template _LIBCPP_HIDE_FROM_ABI void emplace_back(_Args&&... __args); # endif + template - _LIBCPP_HIDE_FROM_ABI iterator emplace(const_iterator __p, _Args&&... __args); + _LIBCPP_HIDE_FROM_ABI iterator emplace(const_iterator __p, _Args&&... __args) { + return __emplace(__p, std::forward<_Args>(__args)...); + } _LIBCPP_HIDE_FROM_ABI void push_front(value_type&& __v); _LIBCPP_HIDE_FROM_ABI void push_back(value_type&& __v); @@ -809,13 +816,13 @@ public: } # endif - _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, value_type&& __v); + _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, value_type&& __v) { return __emplace(__p, std::move(__v)); } _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, initializer_list __il) { return insert(__p, __il.begin(), __il.end()); } # endif // _LIBCPP_CXX03_LANG - _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, const value_type& __v); + _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, const value_type& __v) { return __emplace(__p, __v); } _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, size_type __n, const value_type& __v); template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, _InputIter __f, _InputIter __l); @@ -1661,56 +1668,11 @@ deque<_Tp, _Allocator>::emplace_front(_Args&&... __args) { return *begin(); # endif } - -template -typename deque<_Tp, _Allocator>::iterator deque<_Tp, _Allocator>::insert(const_iterator __p, value_type&& __v) { - size_type __pos = __p - begin(); - size_type __to_end = size() - __pos; - allocator_type& __a = __alloc(); - if (__pos < __to_end) { // insert by shifting things backward - if (__front_spare() == 0) - __add_front_capacity(); - // __front_spare() >= 1 - __annotate_increase_front(1); - if (__pos == 0) { - __alloc_traits::construct(__a, std::addressof(*--begin()), std::move(__v)); - --__start_; - ++__size(); - } else { - iterator __b = begin(); - iterator __bm1 = std::prev(__b); - __alloc_traits::construct(__a, std::addressof(*__bm1), std::move(*__b)); - --__start_; - ++__size(); - if (__pos > 1) - __b = std::move(std::next(__b), __b + __pos, __b); - *__b = std::move(__v); - } - } else { // insert by shifting things forward - if (__back_spare() == 0) - __add_back_capacity(); - // __back_capacity >= 1 - __annotate_increase_back(1); - size_type __de = size() - __pos; - if (__de == 0) { - __alloc_traits::construct(__a, std::addressof(*end()), std::move(__v)); - ++__size(); - } else { - iterator __e = end(); - iterator __em1 = std::prev(__e); - __alloc_traits::construct(__a, std::addressof(*__e), std::move(*__em1)); - ++__size(); - if (__de > 1) - __e = std::move_backward(__e - __de, __em1, __e); - *--__e = std::move(__v); - } - } - return begin() + __pos; -} +# endif // _LIBCPP_CXX03_LANG template template -typename deque<_Tp, _Allocator>::iterator deque<_Tp, _Allocator>::emplace(const_iterator __p, _Args&&... __args) { +typename deque<_Tp, _Allocator>::iterator deque<_Tp, _Allocator>::__emplace(const_iterator __p, _Args&&... __args) { size_type __pos = __p - begin(); size_type __to_end = size() - __pos; allocator_type& __a = __alloc(); @@ -1757,60 +1719,6 @@ typename deque<_Tp, _Allocator>::iterator deque<_Tp, _Allocator>::emplace(const_ return begin() + __pos; } -# endif // _LIBCPP_CXX03_LANG - -template -typename deque<_Tp, _Allocator>::iterator deque<_Tp, _Allocator>::insert(const_iterator __p, const value_type& __v) { - size_type __pos = __p - begin(); - size_type __to_end = size() - __pos; - allocator_type& __a = __alloc(); - if (__pos < __to_end) { // insert by shifting things backward - if (__front_spare() == 0) - __add_front_capacity(); - // __front_spare() >= 1 - __annotate_increase_front(1); - if (__pos == 0) { - __alloc_traits::construct(__a, std::addressof(*--begin()), __v); - --__start_; - ++__size(); - } else { - const_pointer __vt = pointer_traits::pointer_to(__v); - iterator __b = begin(); - iterator __bm1 = std::prev(__b); - if (__vt == pointer_traits::pointer_to(*__b)) - __vt = pointer_traits::pointer_to(*__bm1); - __alloc_traits::construct(__a, std::addressof(*__bm1), std::move(*__b)); - --__start_; - ++__size(); - if (__pos > 1) - __b = __move_and_check(std::next(__b), __b + __pos, __b, __vt); - *__b = *__vt; - } - } else { // insert by shifting things forward - if (__back_spare() == 0) - __add_back_capacity(); - // __back_capacity >= 1 - __annotate_increase_back(1); - size_type __de = size() - __pos; - if (__de == 0) { - __alloc_traits::construct(__a, std::addressof(*end()), __v); - ++__size(); - } else { - const_pointer __vt = pointer_traits::pointer_to(__v); - iterator __e = end(); - iterator __em1 = std::prev(__e); - if (__vt == pointer_traits::pointer_to(*__em1)) - __vt = pointer_traits::pointer_to(*__e); - __alloc_traits::construct(__a, std::addressof(*__e), std::move(*__em1)); - ++__size(); - if (__de > 1) - __e = __move_backward_and_check(__e - __de, __em1, __e, __vt); - *--__e = *__vt; - } - } - return begin() + __pos; -} - template typename deque<_Tp, _Allocator>::iterator deque<_Tp, _Allocator>::insert(const_iterator __p, size_type __n, const value_type& __v) { From 57dad86cb3dbbaf0da5aac6b3d3c5d5e1bae64fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= Date: Mon, 10 Nov 2025 11:02:53 +0100 Subject: [PATCH 15/24] [SPIRV] Fix failing assertion in SPIRVAsmPrinter (#166909) With `+SPV_KHR_float_controls2` and when there is a non-int `OpConstantNull` we would call `MI.getOperand(1).getImm()` when `MI` was not an `OpTypeInt` (the associated test has an `OpTypeArray` zeroinitialized). Under this conditions an assertion is triggered. This patch adds the missing condition. --- llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp | 23 +++++++++-------- .../CodeGen/SPIRV/non_int_constant_null.ll | 25 +++++++++++++++++++ 2 files changed, 37 insertions(+), 11 deletions(-) create mode 100644 llvm/test/CodeGen/SPIRV/non_int_constant_null.ll diff --git a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp index 0175f2fb3698b..970b83de5ee33 100644 --- a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp @@ -612,13 +612,10 @@ void SPIRVAsmPrinter::outputExecutionMode(const Module &M) { // Collect the SPIRVTypes for fp16, fp32, and fp64 and the constant of // type int32 with 0 value to represent the FP Fast Math Mode. std::vector SPIRVFloatTypes; - const MachineInstr *ConstZero = nullptr; + const MachineInstr *ConstZeroInt32 = nullptr; for (const MachineInstr *MI : MAI->getMSInstrs(SPIRV::MB_TypeConstVars)) { - // Skip if the instruction is not OpTypeFloat or OpConstant. unsigned OpCode = MI->getOpcode(); - if (OpCode != SPIRV::OpTypeFloat && OpCode != SPIRV::OpConstantNull) - continue; // Collect the SPIRV type if it's a float. if (OpCode == SPIRV::OpTypeFloat) { @@ -629,14 +626,18 @@ void SPIRVAsmPrinter::outputExecutionMode(const Module &M) { continue; } SPIRVFloatTypes.push_back(MI); - } else { + continue; + } + + if (OpCode == SPIRV::OpConstantNull) { // Check if the constant is int32, if not skip it. const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo(); MachineInstr *TypeMI = MRI.getVRegDef(MI->getOperand(1).getReg()); - if (!TypeMI || TypeMI->getOperand(1).getImm() != 32) - continue; - - ConstZero = MI; + bool IsInt32Ty = TypeMI && + TypeMI->getOpcode() == SPIRV::OpTypeInt && + TypeMI->getOperand(1).getImm() == 32; + if (IsInt32Ty) + ConstZeroInt32 = MI; } } @@ -657,9 +658,9 @@ void SPIRVAsmPrinter::outputExecutionMode(const Module &M) { MCRegister TypeReg = MAI->getRegisterAlias(MF, MI->getOperand(0).getReg()); Inst.addOperand(MCOperand::createReg(TypeReg)); - assert(ConstZero && "There should be a constant zero."); + assert(ConstZeroInt32 && "There should be a constant zero."); MCRegister ConstReg = MAI->getRegisterAlias( - ConstZero->getMF(), ConstZero->getOperand(0).getReg()); + ConstZeroInt32->getMF(), ConstZeroInt32->getOperand(0).getReg()); Inst.addOperand(MCOperand::createReg(ConstReg)); outputMCInst(Inst); } diff --git a/llvm/test/CodeGen/SPIRV/non_int_constant_null.ll b/llvm/test/CodeGen/SPIRV/non_int_constant_null.ll new file mode 100644 index 0000000000000..0ba016aaa30aa --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/non_int_constant_null.ll @@ -0,0 +1,25 @@ +; RUN: llc -mtriple spirv64-unknown-unknown %s --spirv-ext=+SPV_KHR_float_controls2 -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -mtriple spirv64-unknown-unknown %s --spirv-ext=+SPV_KHR_float_controls2 -o - -filetype=obj | spirv-val %} + +@A = addrspace(1) constant [1 x i8] zeroinitializer + +; CHECK: OpName %[[#FOO:]] "foo" +; CHECK: OpName %[[#A:]] "A" +; CHECK: OpDecorate %[[#A]] Constant +; CHECK: OpDecorate %[[#A]] LinkageAttributes "A" Export +; CHECK: %[[#INT8:]] = OpTypeInt 8 0 +; CHECK: %[[#INT32:]] = OpTypeInt 32 0 +; CHECK: %[[#ONE:]] = OpConstant %[[#INT32]] 1 +; CHECK: %[[#ARR_INT8:]] = OpTypeArray %[[#INT8]] %7 +; CHECK: %[[#ARR_INT8_PTR:]] = OpTypePointer CrossWorkgroup %[[#ARR_INT8]] +; CHECK: %[[#ARR_INT8_ZERO:]] = OpConstantNull %[[#ARR_INT8]] +; CHECK: %13 = OpVariable %[[#ARR_INT8_PTR]] CrossWorkgroup %[[#ARR_INT8_ZERO]] +; CHECK: %[[#FOO]] = OpFunction +; CHECK: = OpLabel +; CHECK: OpReturn +; CHECK: OpFunctionEnd + +define spir_kernel void @foo() { +entry: + ret void +} From d84a911e7e752e6337d1c0113818942b5b9350f5 Mon Sep 17 00:00:00 2001 From: Ricardo Jesus Date: Mon, 10 Nov 2025 10:07:15 +0000 Subject: [PATCH 16/24] [AArch64][SVE] Avoid redundant extend of unsigned i8/i16 extracts. (#165863) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extracts of unsigned i8 or i16 elements from the bottom 128 bits of a scalable register lead to the implied zero-extend being transformed to an AND mask. The mask is redundant since UMOV already zeroes the high bits of the destination register. For example: ```c int foo(svuint8_t x) {   return x[3]; } ``` Currently: ```gas foo:   umov    w8, v0.b[3]   and     w0, w8, #0xff   ret ``` Becomes: ```gas foo:   umov    w0, v0.b[3]   ret ``` --- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 12 ++ .../CodeGen/AArch64/sve-extract-element.ll | 132 ++++++++++++++++++ 2 files changed, 144 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index e1f43867bbe5b..65b6077894673 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -3597,6 +3597,18 @@ let Predicates = [HasSVE_or_SME] in { def : Pat<(sext (i32 (vector_extract nxv4i32:$vec, VectorIndexS:$index))), (SMOVvi32to64 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index)>; + + // Extracts of ``unsigned'' i8 or i16 elements lead to the zero-extend being + // transformed to an AND mask. The mask is redundant since UMOV already zeroes + // the high bits of the destination register. + def : Pat<(i32 (and (vector_extract nxv16i8:$vec, VectorIndexB:$index), 0xff)), + (UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index)>; + def : Pat<(i32 (and (vector_extract nxv8i16:$vec, VectorIndexH:$index), 0xffff)), + (UMOVvi16 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index)>; + def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract nxv16i8:$vec, VectorIndexB:$index)))), (i64 0xff))), + (SUBREG_TO_REG (i64 0), (i32 (UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index)), sub_32)>; + def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract nxv8i16:$vec, VectorIndexH:$index)))), (i64 0xffff))), + (SUBREG_TO_REG (i64 0), (i32 (UMOVvi16 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index)), sub_32)>; } // End HasNEON // Extract first element from vector. diff --git a/llvm/test/CodeGen/AArch64/sve-extract-element.ll b/llvm/test/CodeGen/AArch64/sve-extract-element.ll index c340df1385124..0cc2e04bfb315 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-element.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-element.ll @@ -12,6 +12,26 @@ define i8 @test_lane0_16xi8( %a) #0 { ret i8 %b } +define i32 @test_lane0_16xi8_zext_i32( %a) #0 { +; CHECK-LABEL: test_lane0_16xi8_zext_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w0, v0.b[0] +; CHECK-NEXT: ret + %b = extractelement %a, i32 0 + %c = zext i8 %b to i32 + ret i32 %c +} + +define i64 @test_lane0_16xi8_zext_i64( %a) #0 { +; CHECK-LABEL: test_lane0_16xi8_zext_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w0, v0.b[0] +; CHECK-NEXT: ret + %b = extractelement %a, i32 0 + %c = zext i8 %b to i64 + ret i64 %c +} + define i8 @test_lane15_16xi8( %a) #0 { ; CHECK-LABEL: test_lane15_16xi8: ; CHECK: // %bb.0: @@ -21,6 +41,26 @@ define i8 @test_lane15_16xi8( %a) #0 { ret i8 %b } +define i32 @test_lane15_16xi8_zext_i32( %a) #0 { +; CHECK-LABEL: test_lane15_16xi8_zext_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w0, v0.b[15] +; CHECK-NEXT: ret + %b = extractelement %a, i32 15 + %c = zext i8 %b to i32 + ret i32 %c +} + +define i64 @test_lane15_16xi8_zext_i64( %a) #0 { +; CHECK-LABEL: test_lane15_16xi8_zext_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w0, v0.b[15] +; CHECK-NEXT: ret + %b = extractelement %a, i32 15 + %c = zext i8 %b to i64 + ret i64 %c +} + define i8 @test_lane16_16xi8( %a) #0 { ; CHECK-LABEL: test_lane16_16xi8: ; CHECK: // %bb.0: @@ -31,6 +71,32 @@ define i8 @test_lane16_16xi8( %a) #0 { ret i8 %b } +; FIXME: FMOV+AND -> UMOV. +define i32 @test_lane16_16xi8_zext_i32( %a) #0 { +; CHECK-LABEL: test_lane16_16xi8_zext_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.b, z0.b[16] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: and w0, w8, #0xff +; CHECK-NEXT: ret + %b = extractelement %a, i32 16 + %c = zext i8 %b to i32 + ret i32 %c +} + +; FIXME: FMOV+AND -> UMOV. +define i64 @test_lane16_16xi8_zext_i64( %a) #0 { +; CHECK-LABEL: test_lane16_16xi8_zext_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.b, z0.b[16] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: and x0, x8, #0xff +; CHECK-NEXT: ret + %b = extractelement %a, i32 16 + %c = zext i8 %b to i64 + ret i64 %c +} + define i16 @test_lane0_8xi16( %a) #0 { ; CHECK-LABEL: test_lane0_8xi16: ; CHECK: // %bb.0: @@ -40,6 +106,26 @@ define i16 @test_lane0_8xi16( %a) #0 { ret i16 %b } +define i32 @test_lane0_8xi16_zext_i32( %a) #0 { +; CHECK-LABEL: test_lane0_8xi16_zext_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w0, v0.h[0] +; CHECK-NEXT: ret + %b = extractelement %a, i32 0 + %c = zext i16 %b to i32 + ret i32 %c +} + +define i64 @test_lane0_8xi16_zext_i64( %a) #0 { +; CHECK-LABEL: test_lane0_8xi16_zext_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w0, v0.h[0] +; CHECK-NEXT: ret + %b = extractelement %a, i32 0 + %c = zext i16 %b to i64 + ret i64 %c +} + define i16 @test_lane7_8xi16( %a) #0 { ; CHECK-LABEL: test_lane7_8xi16: ; CHECK: // %bb.0: @@ -49,6 +135,26 @@ define i16 @test_lane7_8xi16( %a) #0 { ret i16 %b } +define i32 @test_lane7_8xi16_zext_i32( %a) #0 { +; CHECK-LABEL: test_lane7_8xi16_zext_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w0, v0.h[7] +; CHECK-NEXT: ret + %b = extractelement %a, i32 7 + %c = zext i16 %b to i32 + ret i32 %c +} + +define i64 @test_lane7_8xi16_zext_i64( %a) #0 { +; CHECK-LABEL: test_lane7_8xi16_zext_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w0, v0.h[7] +; CHECK-NEXT: ret + %b = extractelement %a, i32 7 + %c = zext i16 %b to i64 + ret i64 %c +} + define i16 @test_lane8_8xi16( %a) #0 { ; CHECK-LABEL: test_lane8_8xi16: ; CHECK: // %bb.0: @@ -59,6 +165,32 @@ define i16 @test_lane8_8xi16( %a) #0 { ret i16 %b } +; FIXME: FMOV+AND -> UMOV. +define i32 @test_lane8_8xi16_zext_i32( %a) #0 { +; CHECK-LABEL: test_lane8_8xi16_zext_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, z0.h[8] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: and w0, w8, #0xffff +; CHECK-NEXT: ret + %b = extractelement %a, i32 8 + %c = zext i16 %b to i32 + ret i32 %c +} + +; FIXME: FMOV+AND -> UMOV. +define i64 @test_lane8_8xi16_zext_i64( %a) #0 { +; CHECK-LABEL: test_lane8_8xi16_zext_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, z0.h[8] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: and x0, x8, #0xffff +; CHECK-NEXT: ret + %b = extractelement %a, i32 8 + %c = zext i16 %b to i64 + ret i64 %c +} + define i32 @test_lane0_4xi32( %a) #0 { ; CHECK-LABEL: test_lane0_4xi32: ; CHECK: // %bb.0: From ff1efe9e7310fb57a96bb27caddc185779120f43 Mon Sep 17 00:00:00 2001 From: Ricardo Jesus Date: Mon, 10 Nov 2025 10:26:05 +0000 Subject: [PATCH 17/24] [AArch64] Combine subtract with borrow to SBC. (#165271) Specifically, this patch adds the following combines: SUB x, (CSET LO, (CMP a, b)) -> SBC x, 0, (CMP a, b) SUB (SUB x, y), (CSET LO, (CMP a, b)) -> SBC x, y, (CMP a, b) The CSET may be preceded by a ZEXT. Fixes #164748. --- .../Target/AArch64/AArch64ISelLowering.cpp | 33 ++ llvm/test/CodeGen/AArch64/sbc.ll | 392 ++++++++++++++++++ 2 files changed, 425 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/sbc.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 76a790dc2dbc9..8457f6178fdc2 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -22308,6 +22308,37 @@ static SDValue performExtBinopLoadFold(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(N->getOpcode(), DL, VT, Ext0, NShift); } +// Attempt to combine the following patterns: +// SUB x, (CSET LO, (CMP a, b)) -> SBC x, 0, (CMP a, b) +// SUB (SUB x, y), (CSET LO, (CMP a, b)) -> SBC x, y, (CMP a, b) +// The CSET may be preceded by a ZEXT. +static SDValue performSubWithBorrowCombine(SDNode *N, SelectionDAG &DAG) { + if (N->getOpcode() != ISD::SUB) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + + SDValue N1 = N->getOperand(1); + if (N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) + N1 = N1.getOperand(0); + if (!N1.hasOneUse() || getCSETCondCode(N1) != AArch64CC::LO) + return SDValue(); + + SDValue Flags = N1.getOperand(3); + if (Flags.getOpcode() != AArch64ISD::SUBS) + return SDValue(); + + SDLoc DL(N); + SDValue N0 = N->getOperand(0); + if (N0->getOpcode() == ISD::SUB) + return DAG.getNode(AArch64ISD::SBC, DL, VT, N0.getOperand(0), + N0.getOperand(1), Flags); + return DAG.getNode(AArch64ISD::SBC, DL, VT, N0, DAG.getConstant(0, DL, VT), + Flags); +} + static SDValue performAddSubCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { // Try to change sum of two reductions. @@ -22329,6 +22360,8 @@ static SDValue performAddSubCombine(SDNode *N, return Val; if (SDValue Val = performAddSubIntoVectorOp(N, DCI.DAG)) return Val; + if (SDValue Val = performSubWithBorrowCombine(N, DCI.DAG)) + return Val; if (SDValue Val = performExtBinopLoadFold(N, DCI.DAG)) return Val; diff --git a/llvm/test/CodeGen/AArch64/sbc.ll b/llvm/test/CodeGen/AArch64/sbc.ll new file mode 100644 index 0000000000000..fff63c1709218 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sbc.ll @@ -0,0 +1,392 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck --check-prefixes=CHECK,CHECK-SD %s +; RUN: llc < %s -global-isel | FileCheck --check-prefixes=CHECK,CHECK-GI %s + +target triple = "aarch64-none-linux-gnu" + +define i32 @test_basic_i32(i32 %a, i32 %b, i32 %x, i32 %y) { +; CHECK-SD-LABEL: test_basic_i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: sbc w0, w2, w3 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_basic_i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: sub w9, w2, w3 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: sub w0, w9, w8 +; CHECK-GI-NEXT: ret + %cc = icmp ult i32 %a, %b + %carry = zext i1 %cc to i32 + %sub = sub i32 %x, %y + %res = sub i32 %sub, %carry + ret i32 %res +} + +define i64 @test_basic_i64(i64 %a, i64 %b, i64 %x, i64 %y) { +; CHECK-SD-LABEL: test_basic_i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp x0, x1 +; CHECK-SD-NEXT: sbc x0, x2, x3 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_basic_i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp x0, x1 +; CHECK-GI-NEXT: sub x9, x2, x3 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: sub x0, x9, x8 +; CHECK-GI-NEXT: ret + %cc = icmp ult i64 %a, %b + %carry = zext i1 %cc to i64 + %sub = sub i64 %x, %y + %res = sub i64 %sub, %carry + ret i64 %res +} + +define i64 @test_mixed_i32_i64(i32 %a, i32 %b, i64 %x, i64 %y) { +; CHECK-SD-LABEL: test_mixed_i32_i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: sbc x0, x2, x3 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_mixed_i32_i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: sub x9, x2, x3 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: sub x0, x9, x8 +; CHECK-GI-NEXT: ret + %cc = icmp ult i32 %a, %b + %carry = zext i1 %cc to i64 + %sub = sub i64 %x, %y + %res = sub i64 %sub, %carry + ret i64 %res +} + +define i32 @test_mixed_i64_i32(i64 %a, i64 %b, i32 %x, i32 %y) { +; CHECK-SD-LABEL: test_mixed_i64_i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp x0, x1 +; CHECK-SD-NEXT: sbc w0, w2, w3 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_mixed_i64_i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp x0, x1 +; CHECK-GI-NEXT: sub w9, w2, w3 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: sub w0, w9, w8 +; CHECK-GI-NEXT: ret + %cc = icmp ult i64 %a, %b + %carry = zext i1 %cc to i32 + %sub = sub i32 %x, %y + %res = sub i32 %sub, %carry + ret i32 %res +} + +define i32 @test_only_borrow(i32 %a, i32 %b, i32 %x) { +; CHECK-SD-LABEL: test_only_borrow: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: sbc w0, w2, wzr +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_only_borrow: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: sub w0, w2, w8 +; CHECK-GI-NEXT: ret + %cc = icmp ult i32 %a, %b + %carry = zext i1 %cc to i32 + %res = sub i32 %x, %carry + ret i32 %res +} + +define i32 @test_sext_add(i32 %a, i32 %b, i32 %x, i32 %y) { +; CHECK-SD-LABEL: test_sext_add: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: sbc w0, w2, w3 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_sext_add: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: sub w9, w2, w3 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: sbfx w8, w8, #0, #1 +; CHECK-GI-NEXT: add w0, w9, w8 +; CHECK-GI-NEXT: ret + %cc = icmp ult i32 %a, %b + %carry = sext i1 %cc to i32 + %sub = sub i32 %x, %y + %res = add i32 %sub, %carry + ret i32 %res +} + +; FIXME: This case could be supported with reversed operands to the CMP. +define i32 @test_ugt(i32 %a, i32 %b, i32 %x, i32 %y) { +; CHECK-SD-LABEL: test_ugt: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: sub w8, w2, w3 +; CHECK-SD-NEXT: cset w9, hi +; CHECK-SD-NEXT: sub w0, w8, w9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_ugt: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: sub w9, w2, w3 +; CHECK-GI-NEXT: cset w8, hi +; CHECK-GI-NEXT: sub w0, w9, w8 +; CHECK-GI-NEXT: ret + %cc = icmp ugt i32 %a, %b + %carry = zext i1 %cc to i32 + %sub = sub i32 %x, %y + %res = sub i32 %sub, %carry + ret i32 %res +} + +define i32 @test_unsupported_cc_slt(i32 %a, i32 %b, i32 %x, i32 %y) { +; CHECK-SD-LABEL: test_unsupported_cc_slt: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: sub w8, w2, w3 +; CHECK-SD-NEXT: cset w9, lt +; CHECK-SD-NEXT: sub w0, w8, w9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_unsupported_cc_slt: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: sub w9, w2, w3 +; CHECK-GI-NEXT: cset w8, lt +; CHECK-GI-NEXT: sub w0, w9, w8 +; CHECK-GI-NEXT: ret + %cc = icmp slt i32 %a, %b + %carry = zext i1 %cc to i32 + %sub = sub i32 %x, %y + %res = sub i32 %sub, %carry + ret i32 %res +} + +define i32 @test_unsupported_cc_sgt(i32 %a, i32 %b, i32 %x, i32 %y) { +; CHECK-SD-LABEL: test_unsupported_cc_sgt: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: sub w8, w2, w3 +; CHECK-SD-NEXT: cset w9, gt +; CHECK-SD-NEXT: sub w0, w8, w9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_unsupported_cc_sgt: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: sub w9, w2, w3 +; CHECK-GI-NEXT: cset w8, gt +; CHECK-GI-NEXT: sub w0, w9, w8 +; CHECK-GI-NEXT: ret + %cc = icmp sgt i32 %a, %b + %carry = zext i1 %cc to i32 + %sub = sub i32 %x, %y + %res = sub i32 %sub, %carry + ret i32 %res +} + +define i32 @test_multiple_setcc_uses(i32 %a, i32 %b, i32 %x) { +; CHECK-SD-LABEL: test_multiple_setcc_uses: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: .cfi_offset w19, -8 +; CHECK-SD-NEXT: .cfi_offset w30, -16 +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: cset w0, lo +; CHECK-SD-NEXT: sub w19, w2, w0 +; CHECK-SD-NEXT: bl use +; CHECK-SD-NEXT: mov w0, w19 +; CHECK-SD-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_multiple_setcc_uses: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w30, -32 +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: mov w19, w2 +; CHECK-GI-NEXT: cset w20, lo +; CHECK-GI-NEXT: mov w0, w20 +; CHECK-GI-NEXT: bl use +; CHECK-GI-NEXT: sub w0, w19, w20 +; CHECK-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %cc = icmp ult i32 %a, %b + %carry = zext i1 %cc to i32 + %res = sub i32 %x, %carry + tail call void @use(i1 %cc) + ret i32 %res +} + +define i32 @test_multiple_carry_uses(i32 %a, i32 %b, i32 %x) { +; CHECK-SD-LABEL: test_multiple_carry_uses: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: .cfi_offset w19, -8 +; CHECK-SD-NEXT: .cfi_offset w30, -16 +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: cset w0, lo +; CHECK-SD-NEXT: sub w19, w2, w0 +; CHECK-SD-NEXT: bl use +; CHECK-SD-NEXT: mov w0, w19 +; CHECK-SD-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_multiple_carry_uses: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w30, -32 +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: mov w19, w2 +; CHECK-GI-NEXT: cset w20, lo +; CHECK-GI-NEXT: mov w0, w20 +; CHECK-GI-NEXT: bl use +; CHECK-GI-NEXT: sub w0, w19, w20 +; CHECK-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %cc = icmp ult i32 %a, %b + %carry = zext i1 %cc to i32 + %res = sub i32 %x, %carry + tail call void @use(i32 %carry) + ret i32 %res +} + +define i32 @test_multiple_sub_uses(i32 %a, i32 %b, i32 %x, i32 %y) { +; CHECK-SD-LABEL: test_multiple_sub_uses: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: .cfi_offset w19, -8 +; CHECK-SD-NEXT: .cfi_offset w30, -16 +; CHECK-SD-NEXT: sub w8, w2, w3 +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: mov w0, w8 +; CHECK-SD-NEXT: sbc w19, w2, w3 +; CHECK-SD-NEXT: bl use +; CHECK-SD-NEXT: mov w0, w19 +; CHECK-SD-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_multiple_sub_uses: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w30, -32 +; CHECK-GI-NEXT: sub w19, w2, w3 +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: mov w0, w19 +; CHECK-GI-NEXT: cset w20, lo +; CHECK-GI-NEXT: bl use +; CHECK-GI-NEXT: sub w0, w19, w20 +; CHECK-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %cc = icmp ult i32 %a, %b + %carry = zext i1 %cc to i32 + %sub = sub i32 %x, %y + %res = sub i32 %sub, %carry + tail call void @use(i32 %sub) + ret i32 %res +} + +define i8 @test_i8(i8 %a, i8 %b, i8 %x, i8 %y) { +; CHECK-SD-LABEL: test_i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: and w8, w0, #0xff +; CHECK-SD-NEXT: cmp w8, w1, uxtb +; CHECK-SD-NEXT: sbc w0, w2, w3 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: and w8, w0, #0xff +; CHECK-GI-NEXT: sub w9, w2, w3 +; CHECK-GI-NEXT: cmp w8, w1, uxtb +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: sub w0, w9, w8 +; CHECK-GI-NEXT: ret + %cc = icmp ult i8 %a, %b + %carry = zext i1 %cc to i8 + %sub = sub i8 %x, %y + %res = sub i8 %sub, %carry + ret i8 %res +} + +define i16 @test_i16(i16 %a, i16 %b, i16 %x, i16 %y) { +; CHECK-SD-LABEL: test_i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: and w8, w0, #0xffff +; CHECK-SD-NEXT: cmp w8, w1, uxth +; CHECK-SD-NEXT: sbc w0, w2, w3 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: and w8, w0, #0xffff +; CHECK-GI-NEXT: sub w9, w2, w3 +; CHECK-GI-NEXT: cmp w8, w1, uxth +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: sub w0, w9, w8 +; CHECK-GI-NEXT: ret + %cc = icmp ult i16 %a, %b + %carry = zext i1 %cc to i16 + %sub = sub i16 %x, %y + %res = sub i16 %sub, %carry + ret i16 %res +} + +define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %x, <4 x i32> %y) { +; CHECK-SD-LABEL: test_v4i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sub v2.4s, v2.4s, v3.4s +; CHECK-SD-NEXT: cmhi v0.4s, v1.4s, v0.4s +; CHECK-SD-NEXT: add v0.4s, v2.4s, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_v4i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v4.4s, #1 +; CHECK-GI-NEXT: cmhi v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: sub v1.4s, v2.4s, v3.4s +; CHECK-GI-NEXT: and v0.16b, v0.16b, v4.16b +; CHECK-GI-NEXT: sub v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: ret + %cc = icmp ult <4 x i32> %a, %b + %carry = zext <4 x i1> %cc to <4 x i32> + %sub = sub <4 x i32> %x, %y + %res = sub <4 x i32> %sub, %carry + ret <4 x i32> %res +} + +declare void @use() +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} From 898d6fecf6636521321af17fcd69a19f30fa8cf8 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Mon, 10 Nov 2025 11:30:37 +0100 Subject: [PATCH 18/24] Remove unused inclusion (#166942) --- llvm/benchmarks/FormatVariadicBM.cpp | 1 + llvm/include/llvm/Frontend/OpenMP/ClauseT.h | 1 - llvm/include/llvm/IR/DiagnosticInfo.h | 1 - llvm/include/llvm/IR/Dominators.h | 1 - llvm/include/llvm/IR/ProfileSummary.h | 1 - llvm/include/llvm/MC/MCAssembler.h | 1 - llvm/include/llvm/Support/TypeSize.h | 1 - llvm/include/llvm/Support/UnicodeCharRanges.h | 1 - llvm/include/llvm/TableGen/DirectiveEmitter.h | 1 - .../llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h | 1 - llvm/include/llvm/XRay/FDRRecordConsumer.h | 1 - llvm/lib/CodeGen/RegAllocGreedy.h | 1 - llvm/lib/Support/Unix/Unix.h | 1 - llvm/lib/Support/Windows/Signals.inc | 1 - llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp | 1 - llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h | 1 - llvm/lib/Target/X86/X86FlagsCopyLowering.cpp | 1 - llvm/tools/llvm-stress/llvm-stress.cpp | 1 - llvm/unittests/ADT/CombinationGeneratorTest.cpp | 1 - llvm/unittests/ADT/DAGDeltaAlgorithmTest.cpp | 1 - llvm/unittests/ADT/DeltaAlgorithmTest.cpp | 1 - llvm/unittests/ADT/SequenceTest.cpp | 1 - llvm/unittests/Analysis/DomTreeUpdaterTest.cpp | 1 - llvm/unittests/TextAPI/TextStubHelpers.h | 1 - llvm/utils/KillTheDoctor/KillTheDoctor.cpp | 1 - 25 files changed, 1 insertion(+), 24 deletions(-) diff --git a/llvm/benchmarks/FormatVariadicBM.cpp b/llvm/benchmarks/FormatVariadicBM.cpp index 3e851f0975e8b..b451d1079d29b 100644 --- a/llvm/benchmarks/FormatVariadicBM.cpp +++ b/llvm/benchmarks/FormatVariadicBM.cpp @@ -10,6 +10,7 @@ #include "llvm/Support/FormatVariadic.h" #include #include +#include using namespace llvm; using namespace std; diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h index a86dca06f8ecd..12dfb6c607bb9 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h @@ -50,7 +50,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include #include #include #include diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h index a426fb079ec04..1c86d181e4375 100644 --- a/llvm/include/llvm/IR/DiagnosticInfo.h +++ b/llvm/include/llvm/IR/DiagnosticInfo.h @@ -26,7 +26,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TypeSize.h" -#include #include #include #include diff --git a/llvm/include/llvm/IR/Dominators.h b/llvm/include/llvm/IR/Dominators.h index bf128a3936cbd..1209def5ac0bd 100644 --- a/llvm/include/llvm/IR/Dominators.h +++ b/llvm/include/llvm/IR/Dominators.h @@ -32,7 +32,6 @@ #include "llvm/Support/CFGUpdate.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/GenericDomTree.h" -#include #include namespace llvm { diff --git a/llvm/include/llvm/IR/ProfileSummary.h b/llvm/include/llvm/IR/ProfileSummary.h index 6c087ea02b3c3..34012151f729f 100644 --- a/llvm/include/llvm/IR/ProfileSummary.h +++ b/llvm/include/llvm/IR/ProfileSummary.h @@ -14,7 +14,6 @@ #define LLVM_IR_PROFILESUMMARY_H #include "llvm/Support/Compiler.h" -#include #include #include #include diff --git a/llvm/include/llvm/MC/MCAssembler.h b/llvm/include/llvm/MC/MCAssembler.h index 144f2118e715c..152b81e284c1a 100644 --- a/llvm/include/llvm/MC/MCAssembler.h +++ b/llvm/include/llvm/MC/MCAssembler.h @@ -19,7 +19,6 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/SMLoc.h" -#include #include #include #include diff --git a/llvm/include/llvm/Support/TypeSize.h b/llvm/include/llvm/Support/TypeSize.h index 0a7ae15edbb33..421d6613bfafc 100644 --- a/llvm/include/llvm/Support/TypeSize.h +++ b/llvm/include/llvm/Support/TypeSize.h @@ -20,7 +20,6 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include #include #include #include diff --git a/llvm/include/llvm/Support/UnicodeCharRanges.h b/llvm/include/llvm/Support/UnicodeCharRanges.h index 2b5fc83d34690..03515cd61515f 100644 --- a/llvm/include/llvm/Support/UnicodeCharRanges.h +++ b/llvm/include/llvm/Support/UnicodeCharRanges.h @@ -12,7 +12,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include #define DEBUG_TYPE "unicode" diff --git a/llvm/include/llvm/TableGen/DirectiveEmitter.h b/llvm/include/llvm/TableGen/DirectiveEmitter.h index ce3e87e470b9d..2080f75eb8cfc 100644 --- a/llvm/include/llvm/TableGen/DirectiveEmitter.h +++ b/llvm/include/llvm/TableGen/DirectiveEmitter.h @@ -20,7 +20,6 @@ #include "llvm/Frontend/Directive/Spelling.h" #include "llvm/Support/MathExtras.h" #include "llvm/TableGen/Record.h" -#include #include #include diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h index 4385df518a111..050396674e159 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h @@ -19,7 +19,6 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h" -#include namespace llvm::sandboxir { diff --git a/llvm/include/llvm/XRay/FDRRecordConsumer.h b/llvm/include/llvm/XRay/FDRRecordConsumer.h index 13bb711328fdc..4ff65f043fe17 100644 --- a/llvm/include/llvm/XRay/FDRRecordConsumer.h +++ b/llvm/include/llvm/XRay/FDRRecordConsumer.h @@ -11,7 +11,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Error.h" #include "llvm/XRay/FDRRecords.h" -#include #include #include diff --git a/llvm/lib/CodeGen/RegAllocGreedy.h b/llvm/lib/CodeGen/RegAllocGreedy.h index 7f013d1f1f726..4affa275cbf8b 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.h +++ b/llvm/lib/CodeGen/RegAllocGreedy.h @@ -33,7 +33,6 @@ #include "llvm/CodeGen/SpillPlacement.h" #include "llvm/CodeGen/Spiller.h" #include "llvm/CodeGen/TargetRegisterInfo.h" -#include #include #include #include diff --git a/llvm/lib/Support/Unix/Unix.h b/llvm/lib/Support/Unix/Unix.h index a1d44c69ab1ab..f24d524982b23 100644 --- a/llvm/lib/Support/Unix/Unix.h +++ b/llvm/lib/Support/Unix/Unix.h @@ -22,7 +22,6 @@ #include "llvm/Support/Chrono.h" #include "llvm/Support/Errno.h" #include "llvm/Support/ErrorHandling.h" -#include #include #include #include diff --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc index da68994970ebb..bacbb76e09e6c 100644 --- a/llvm/lib/Support/Windows/Signals.inc +++ b/llvm/lib/Support/Windows/Signals.inc @@ -16,7 +16,6 @@ #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/WindowsError.h" -#include #include #include #include diff --git a/llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp b/llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp index 7885d93cbad98..a2cf0a57675c7 100644 --- a/llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp +++ b/llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp @@ -48,7 +48,6 @@ #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include #include #include #include diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h index 2573066cd5d63..4146c0ec6ab07 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -21,7 +21,6 @@ #include "WebAssemblyTargetMachine.h" #include "llvm/CodeGen/BasicTTIImpl.h" -#include namespace llvm { diff --git a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp index ab6e6d0687b71..b3bf37a9a462c 100644 --- a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp +++ b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp @@ -50,7 +50,6 @@ #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include #include #include #include diff --git a/llvm/tools/llvm-stress/llvm-stress.cpp b/llvm/tools/llvm-stress/llvm-stress.cpp index 133812e419d2b..2fe5d6b7e5254 100644 --- a/llvm/tools/llvm-stress/llvm-stress.cpp +++ b/llvm/tools/llvm-stress/llvm-stress.cpp @@ -40,7 +40,6 @@ #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" -#include #include #include #include diff --git a/llvm/unittests/ADT/CombinationGeneratorTest.cpp b/llvm/unittests/ADT/CombinationGeneratorTest.cpp index f3e174a83ba69..219e18bc5e12c 100644 --- a/llvm/unittests/ADT/CombinationGeneratorTest.cpp +++ b/llvm/unittests/ADT/CombinationGeneratorTest.cpp @@ -12,7 +12,6 @@ #include "llvm/Support/ErrorHandling.h" #include "gmock/gmock.h" #include "gtest/gtest.h" -#include #include #include #include diff --git a/llvm/unittests/ADT/DAGDeltaAlgorithmTest.cpp b/llvm/unittests/ADT/DAGDeltaAlgorithmTest.cpp index 918a2e63da935..9f5149099e309 100644 --- a/llvm/unittests/ADT/DAGDeltaAlgorithmTest.cpp +++ b/llvm/unittests/ADT/DAGDeltaAlgorithmTest.cpp @@ -9,7 +9,6 @@ #include "llvm/ADT/DAGDeltaAlgorithm.h" #include "llvm/ADT/STLExtras.h" #include "gtest/gtest.h" -#include #include using namespace llvm; diff --git a/llvm/unittests/ADT/DeltaAlgorithmTest.cpp b/llvm/unittests/ADT/DeltaAlgorithmTest.cpp index 24e18f42eb33c..530bd1cb51173 100644 --- a/llvm/unittests/ADT/DeltaAlgorithmTest.cpp +++ b/llvm/unittests/ADT/DeltaAlgorithmTest.cpp @@ -9,7 +9,6 @@ #include "llvm/ADT/DeltaAlgorithm.h" #include "llvm/ADT/STLExtras.h" #include "gtest/gtest.h" -#include #include using namespace llvm; diff --git a/llvm/unittests/ADT/SequenceTest.cpp b/llvm/unittests/ADT/SequenceTest.cpp index 7b7dc85cb79be..7aa39568888b2 100644 --- a/llvm/unittests/ADT/SequenceTest.cpp +++ b/llvm/unittests/ADT/SequenceTest.cpp @@ -11,7 +11,6 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" -#include #include using namespace llvm; diff --git a/llvm/unittests/Analysis/DomTreeUpdaterTest.cpp b/llvm/unittests/Analysis/DomTreeUpdaterTest.cpp index cabfc2aba57cf..9f5fe5742a44d 100644 --- a/llvm/unittests/Analysis/DomTreeUpdaterTest.cpp +++ b/llvm/unittests/Analysis/DomTreeUpdaterTest.cpp @@ -17,7 +17,6 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "gtest/gtest.h" -#include using namespace llvm; diff --git a/llvm/unittests/TextAPI/TextStubHelpers.h b/llvm/unittests/TextAPI/TextStubHelpers.h index 7c9c74a252760..87ca7e1c0b4d4 100644 --- a/llvm/unittests/TextAPI/TextStubHelpers.h +++ b/llvm/unittests/TextAPI/TextStubHelpers.h @@ -8,7 +8,6 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/TextAPI/InterfaceFile.h" -#include #include #ifndef TEXT_STUB_HELPERS_H diff --git a/llvm/utils/KillTheDoctor/KillTheDoctor.cpp b/llvm/utils/KillTheDoctor/KillTheDoctor.cpp index 4f642f8886df5..0495560b6c1dc 100644 --- a/llvm/utils/KillTheDoctor/KillTheDoctor.cpp +++ b/llvm/utils/KillTheDoctor/KillTheDoctor.cpp @@ -43,7 +43,6 @@ #include "llvm/Support/WindowsError.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/type_traits.h" -#include #include #include #include From 2d1d5fe78ed01810c89f3705acfe93a7e219c08f Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Mon, 10 Nov 2025 10:43:37 +0000 Subject: [PATCH 19/24] [VPlan] Simplify branch-cond with getVectorTripCount (#155604) Call getVectorTripCount first, and call getTripCount failing that, in simplifyBranchConditionForVFAndUF, to simplify missed cases. While at it, strip the dead check for a zero TC. --- .../Transforms/Vectorize/VPlanTransforms.cpp | 18 +++++----- .../AArch64/partial-reduce-dot-product.ll | 3 +- .../LoopVectorize/RISCV/low-trip-count.ll | 3 +- .../X86/limit-vf-by-tripcount.ll | 33 ++++++++----------- .../LoopVectorize/X86/load-deref-pred.ll | 3 +- .../vector-loop-backedge-elimination.ll | 32 +++++++----------- 6 files changed, 37 insertions(+), 55 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 634df51a12965..b319fbc7a78c0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1745,17 +1745,17 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF, if (match(Term, m_BranchOnCount()) || match(Term, m_BranchOnCond(m_Not(m_ActiveLaneMask( m_VPValue(), m_VPValue(), m_VPValue()))))) { - // Try to simplify the branch condition if TC <= VF * UF when the latch - // terminator is BranchOnCount or BranchOnCond where the input is - // Not(ActiveLaneMask). - const SCEV *TripCount = - vputils::getSCEVExprForVPValue(Plan.getTripCount(), SE); - assert(!isa(TripCount) && + // Try to simplify the branch condition if VectorTC <= VF * UF when the + // latch terminator is BranchOnCount or BranchOnCond(Not(ActiveLaneMask)). + const SCEV *VectorTripCount = + vputils::getSCEVExprForVPValue(&Plan.getVectorTripCount(), SE); + if (isa(VectorTripCount)) + VectorTripCount = vputils::getSCEVExprForVPValue(Plan.getTripCount(), SE); + assert(!isa(VectorTripCount) && "Trip count SCEV must be computable"); ElementCount NumElements = BestVF.multiplyCoefficientBy(BestUF); - const SCEV *C = SE.getElementCount(TripCount->getType(), NumElements); - if (TripCount->isZero() || - !SE.isKnownPredicate(CmpInst::ICMP_ULE, TripCount, C)) + const SCEV *C = SE.getElementCount(VectorTripCount->getType(), NumElements); + if (!SE.isKnownPredicate(CmpInst::ICMP_ULE, VectorTripCount, C)) return false; } else if (match(Term, m_BranchOnCond(m_VPValue(Cond)))) { // For BranchOnCond, check if we can prove the condition to be true using VF diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll index d77ca9875bf01..37eac89acfd11 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll @@ -1589,8 +1589,7 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = mul nuw nsw <16 x i64> [[TMP8]], [[TMP9]] ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE5]] = call <2 x i64> @llvm.vector.partial.reduce.add.v2i64.v16i64(<2 x i64> [[VEC_PHI1]], <16 x i64> [[TMP10]]) ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[PARTIAL_REDUCE5]], [[PARTIAL_REDUCE]] ; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]]) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll index 8ef53cade01ac..345f6f632158a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll @@ -295,8 +295,7 @@ define i8 @mul_non_pow_2_low_trip_count(ptr noalias %a) { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; CHECK-NEXT: [[TMP1]] = mul <8 x i8> [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8 -; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> [[TMP1]]) ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll b/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll index c1272e56836f8..6e3b2a5390948 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll @@ -12,27 +12,22 @@ define void @test_tc_17_no_epilogue_vectorization(ptr noalias %src, ptr noalias ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 64 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[INDEX]] -; CHECK-NEXT: store <16 x i8> [[WIDE_LOAD]], ptr [[TMP3]], align 64 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1:%.*]], align 64 +; CHECK-NEXT: store <16 x i8> [[WIDE_LOAD]], ptr [[TMP3:%.*]], align 64 +; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ 16, [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[LDADDR:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[I]] +; CHECK-NEXT: [[LDADDR:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[I]] ; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[LDADDR]], align 64 -; CHECK-NEXT: [[STADDR:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I]] +; CHECK-NEXT: [[STADDR:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 [[I]] ; CHECK-NEXT: store i8 [[VAL]], ptr [[STADDR]], align 64 ; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1 ; CHECK-NEXT: [[IS_NEXT:%.*]] = icmp ult i64 [[I_NEXT]], 17 -; CHECK-NEXT: br i1 [[IS_NEXT]], label [[LOOP]], label [[EXIT:%.*]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[IS_NEXT]], label [[LOOP]], label [[EXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -69,11 +64,11 @@ define void @test_tc_18(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: store <16 x i8> [[WIDE_LOAD]], ptr [[TMP3]], align 64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF4:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] @@ -140,7 +135,7 @@ define void @test_tc_19(ptr noalias %src, ptr noalias %dst) { ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF4]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] @@ -219,7 +214,7 @@ define void @test_tc_20(ptr noalias %src, ptr noalias %dst) { ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF11:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] @@ -231,7 +226,7 @@ define void @test_tc_20(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD5]], ptr [[TMP15]], align 64 ; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], 4 ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT6]], 20 -; CHECK-NEXT: br i1 [[TMP17]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP17]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: @@ -245,7 +240,7 @@ define void @test_tc_20(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: store i8 [[VAL]], ptr [[STADDR]], align 64 ; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1 ; CHECK-NEXT: [[IS_NEXT:%.*]] = icmp ult i64 [[I_NEXT]], 20 -; CHECK-NEXT: br i1 [[IS_NEXT]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[IS_NEXT]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -281,7 +276,7 @@ define void @limit_main_loop_vf_to_avoid_dead_main_vector_loop(ptr noalias %src, ; CHECK-NEXT: store <8 x i8> [[STRIDED_VEC]], ptr [[TMP3]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -294,7 +289,7 @@ define void @limit_main_loop_vf_to_avoid_dead_main_vector_loop(ptr noalias %src, ; CHECK-NEXT: store i8 [[L]], ptr [[GEP_DST]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 32 -; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll index 8771dc9a20379..6605338771c47 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll @@ -2581,8 +2581,7 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP114]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI5]] ; CHECK-NEXT: [[TMP115]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP116:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP116]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP113]], [[TMP112]] ; CHECK-NEXT: [[BIN_RDX7:%.*]] = add <4 x i32> [[TMP114]], [[BIN_RDX]] diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll index e160a15ece47d..bba459f776050 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll @@ -1140,18 +1140,14 @@ define void @test_vector_tc_eq_16(ptr %A) { ; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 16 ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VF8UF2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 8 -; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[NEXT_GEP]], align 1 +; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i32 8 +; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1 ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 ; VF8UF2-NEXT: [[TMP2:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10) ; VF8UF2-NEXT: [[TMP3:%.*]] = add nsw <8 x i8> [[WIDE_LOAD1]], splat (i8 10) -; VF8UF2-NEXT: store <8 x i8> [[TMP2]], ptr [[NEXT_GEP]], align 1 +; VF8UF2-NEXT: store <8 x i8> [[TMP2]], ptr [[A]], align 1 ; VF8UF2-NEXT: store <8 x i8> [[TMP3]], ptr [[TMP1]], align 1 -; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; VF8UF2-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; VF8UF2-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF8UF2: [[MIDDLE_BLOCK]]: ; VF8UF2-NEXT: br label %[[SCALAR_PH:.*]] ; VF8UF2: [[SCALAR_PH]]: @@ -1165,7 +1161,7 @@ define void @test_vector_tc_eq_16(ptr %A) { ; VF8UF2-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1 ; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 ; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17 -; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; VF8UF2: [[EXIT]]: ; VF8UF2-NEXT: ret void ; @@ -1177,14 +1173,10 @@ define void @test_vector_tc_eq_16(ptr %A) { ; VF16UF1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 16 ; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]] ; VF16UF1: [[VECTOR_BODY]]: -; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VF16UF1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 +; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[A]], align 1 ; VF16UF1-NEXT: [[TMP1:%.*]] = add nsw <16 x i8> [[WIDE_LOAD]], splat (i8 10) -; VF16UF1-NEXT: store <16 x i8> [[TMP1]], ptr [[NEXT_GEP]], align 1 -; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; VF16UF1-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; VF16UF1-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; VF16UF1-NEXT: store <16 x i8> [[TMP1]], ptr [[A]], align 1 +; VF16UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF16UF1: [[MIDDLE_BLOCK]]: ; VF16UF1-NEXT: br label %[[SCALAR_PH:.*]] ; VF16UF1: [[SCALAR_PH]]: @@ -1198,7 +1190,7 @@ define void @test_vector_tc_eq_16(ptr %A) { ; VF16UF1-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1 ; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 ; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17 -; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; VF16UF1: [[EXIT]]: ; VF16UF1-NEXT: ret void ; @@ -1232,12 +1224,10 @@ exit: ; VF8UF2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; VF8UF2: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"} ; VF8UF2: [[META2]] = !{!"llvm.loop.isvectorized", i32 1} -; VF8UF2: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -; VF8UF2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; VF8UF2: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} ;. ; VF16UF1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; VF16UF1: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"} ; VF16UF1: [[META2]] = !{!"llvm.loop.isvectorized", i32 1} -; VF16UF1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -; VF16UF1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; VF16UF1: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} ;. From 80fa6e1bcea054109a154916e37ccfe7b9b0a9fb Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 10 Nov 2025 10:50:15 +0000 Subject: [PATCH 20/24] [DropAssumes] Drop dereferenceable assumptions after vectorization. (#166947) This patch adds another run of DropUnnecessaryAssumes after vectorization, to clean up assumes that are not longer needed after this point. The main example of such an assume is currently dereferenceable assumptions. This complements https://github.com/llvm/llvm-project/pull/166945, which avoids sinking code if it would mean remove a dereferenceable assumption. There are a few additional cases where some unneeded assumes are left over after vectorization that also get cleaned up. The main motivation is to work together with https://github.com/llvm/llvm-project/pull/166945, but there may be a better solution. Adding another instance of this pass to the pipeline is not great, but compile-time impact seems in the noise: https://llvm-compile-time-tracker.com/compare.php?from=55e71fe08b6406ec7ce2c81ce042e48717acf204&to=85da4ee3a74126f557cdc74c7b40e048dacb3fc4&stat=instructions:u PR: https://github.com/llvm/llvm-project/pull/166947 --- llvm/include/llvm/Passes/PassBuilder.h | 2 +- .../Scalar/DropUnnecessaryAssumes.h | 6 ++ llvm/lib/Passes/PassBuilder.cpp | 5 ++ llvm/lib/Passes/PassBuilderPipelines.cpp | 14 +++- llvm/lib/Passes/PassRegistry.def | 5 +- .../Scalar/DropUnnecessaryAssumes.cpp | 7 +- llvm/test/Other/new-pm-defaults.ll | 1 + llvm/test/Other/new-pm-lto-defaults.ll | 1 + .../Other/new-pm-thinlto-postlink-defaults.ll | 1 + .../new-pm-thinlto-postlink-pgo-defaults.ll | 1 + ...-pm-thinlto-postlink-samplepgo-defaults.ll | 1 + .../DropUnnecessaryAssumes/dereferenceable.ll | 54 +++++++++++++++ .../AArch64/matrix-extract-insert.ll | 68 ------------------- .../PhaseOrdering/AArch64/std-find.ll | 8 +-- 14 files changed, 95 insertions(+), 79 deletions(-) create mode 100644 llvm/test/Transforms/DropUnnecessaryAssumes/dereferenceable.ll diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index 8538a8b2afe14..8fa21f2cb2dd6 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -742,7 +742,7 @@ class PassBuilder { void addRequiredLTOPreLinkPasses(ModulePassManager &MPM); void addVectorPasses(OptimizationLevel Level, FunctionPassManager &FPM, - bool IsFullLTO); + ThinOrFullLTOPhase LTOPhase); static std::optional> parsePipelineText(StringRef Text); diff --git a/llvm/include/llvm/Transforms/Scalar/DropUnnecessaryAssumes.h b/llvm/include/llvm/Transforms/Scalar/DropUnnecessaryAssumes.h index 4ff442ff80c76..54ddcc09f7204 100644 --- a/llvm/include/llvm/Transforms/Scalar/DropUnnecessaryAssumes.h +++ b/llvm/include/llvm/Transforms/Scalar/DropUnnecessaryAssumes.h @@ -19,7 +19,13 @@ namespace llvm { struct DropUnnecessaryAssumesPass : public PassInfoMixin { + DropUnnecessaryAssumesPass(bool DropDereferenceable = false) + : DropDereferenceable(DropDereferenceable) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + +private: + bool DropDereferenceable; }; } // end namespace llvm diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 40ceb6f6ae28f..e0babc4385aab 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -900,6 +900,11 @@ Expected parseEntryExitInstrumenterPassOptions(StringRef Params) { "EntryExitInstrumenter"); } +Expected parseDropUnnecessaryAssumesPassOptions(StringRef Params) { + return PassBuilder::parseSinglePassOption(Params, "drop-deref", + "DropUnnecessaryAssumes"); +} + Expected parseLoopExtractorPassOptions(StringRef Params) { return PassBuilder::parseSinglePassOption(Params, "single", "LoopExtractor"); } diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 3f41618b18fcf..2fe963b3b68d9 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1298,10 +1298,18 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, /// TODO: Should LTO cause any differences to this set of passes? void PassBuilder::addVectorPasses(OptimizationLevel Level, - FunctionPassManager &FPM, bool IsFullLTO) { + FunctionPassManager &FPM, + ThinOrFullLTOPhase LTOPhase) { + const bool IsFullLTO = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink; + FPM.addPass(LoopVectorizePass( LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization))); + // Drop dereferenceable assumes after vectorization, as they are no longer + // needed and can inhibit further optimization. + if (!isLTOPreLink(LTOPhase)) + FPM.addPass(DropUnnecessaryAssumesPass(/*DropDereferenceable=*/true)); + FPM.addPass(InferAlignmentPass()); if (IsFullLTO) { // The vectorizer may have significantly shortened a loop body; unroll @@ -1572,7 +1580,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, // from the TargetLibraryInfo. OptimizePM.addPass(InjectTLIMappings()); - addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false); + addVectorPasses(Level, OptimizePM, LTOPhase); invokeVectorizerEndEPCallbacks(OptimizePM, Level); @@ -2162,7 +2170,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, MainFPM.addPass(LoopDistributePass()); - addVectorPasses(Level, MainFPM, /* IsFullLTO */ true); + addVectorPasses(Level, MainFPM, ThinOrFullLTOPhase::FullLTOPostLink); invokeVectorizerEndEPCallbacks(MainFPM, Level); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index d870f99aad552..d8305fe5c8e73 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -432,7 +432,6 @@ FUNCTION_PASS("dot-post-dom", PostDomPrinter()) FUNCTION_PASS("dot-post-dom-only", PostDomOnlyPrinter()) FUNCTION_PASS("dse", DSEPass()) FUNCTION_PASS("dwarf-eh-prepare", DwarfEHPreparePass(*TM)) -FUNCTION_PASS("drop-unnecessary-assumes", DropUnnecessaryAssumesPass()) FUNCTION_PASS("expand-large-div-rem", ExpandLargeDivRemPass(*TM)) FUNCTION_PASS("expand-memcmp", ExpandMemCmpPass(*TM)) FUNCTION_PASS("expand-reductions", ExpandReductionsPass()) @@ -584,6 +583,10 @@ FUNCTION_PASS_WITH_PARAMS( "early-cse", "EarlyCSEPass", [](bool UseMemorySSA) { return EarlyCSEPass(UseMemorySSA); }, parseEarlyCSEPassOptions, "memssa") +FUNCTION_PASS_WITH_PARAMS( + "drop-unnecessary-assumes", "DropUnnecessaryAssumesPass", + [](bool DropDereferenceable) { return DropUnnecessaryAssumesPass(DropDereferenceable); }, + parseDropUnnecessaryAssumesPassOptions, "drop-deref") FUNCTION_PASS_WITH_PARAMS( "ee-instrument", "EntryExitInstrumenterPass", [](bool PostInlining) { return EntryExitInstrumenterPass(PostInlining); }, diff --git a/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp b/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp index a577f517d1e89..4a7144fe6c77a 100644 --- a/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp +++ b/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp @@ -78,11 +78,16 @@ DropUnnecessaryAssumesPass::run(Function &F, FunctionAnalysisManager &FAM) { SmallVector KeptBundles; unsigned NumBundles = Assume->getNumOperandBundles(); for (unsigned I = 0; I != NumBundles; ++I) { - auto IsDead = [](OperandBundleUse Bundle) { + auto IsDead = [&](OperandBundleUse Bundle) { // "ignore" operand bundles are always dead. if (Bundle.getTagName() == "ignore") return true; + // "dereferenceable" operand bundles are only dropped if requested + // (e.g., after loop vectorization has run). + if (Bundle.getTagName() == "dereferenceable") + return DropDereferenceable; + // Bundles without arguments do not affect any specific values. // Always keep them for now. if (Bundle.Inputs.empty()) diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll index 62975a3cf8ac4..b59d4cf6af998 100644 --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -261,6 +261,7 @@ ; CHECK-O-NEXT: Running analysis: LoopAccessAnalysis on foo ; CHECK-O-NEXT: Running pass: InjectTLIMappings ; CHECK-O-NEXT: Running pass: LoopVectorizePass +; CHECK-DEFAULT-NEXT: Running pass: DropUnnecessaryAssumesPass ; CHECK-O-NEXT: Running pass: InferAlignmentPass ; CHECK-O-NEXT: Running pass: LoopLoadEliminationPass ; CHECK-O-NEXT: Running pass: InstCombinePass diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll index f595dfe1d6845..c865d77c86d77 100644 --- a/llvm/test/Other/new-pm-lto-defaults.ll +++ b/llvm/test/Other/new-pm-lto-defaults.ll @@ -129,6 +129,7 @@ ; CHECK-O23SZ-NEXT: Running analysis: LoopAccessAnalysis on foo ; CHECK-O23SZ-NEXT: Running pass: LoopVectorizePass on foo ; CHECK-O23SZ-NEXT: Running analysis: DemandedBitsAnalysis on foo +; CHECK-O23SZ-NEXT: Running pass: DropUnnecessaryAssumesPass on foo ; CHECK-O23SZ-NEXT: Running pass: InferAlignmentPass on foo ; CHECK-O23SZ-NEXT: Running pass: LoopUnrollPass on foo ; CHECK-O23SZ-NEXT: WarnMissedTransformationsPass on foo diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll index 012a1ab5802b5..c1d8b42505c84 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll @@ -180,6 +180,7 @@ ; CHECK-POSTLINK-O-NEXT: Running analysis: LoopAccessAnalysis on foo ; CHECK-POSTLINK-O-NEXT: Running pass: InjectTLIMappings ; CHECK-POSTLINK-O-NEXT: Running pass: LoopVectorizePass +; CHECK-POSTLINK-O-NEXT: Running pass: DropUnnecessaryAssumesPass ; CHECK-POSTLINK-O-NEXT: Running pass: InferAlignmentPass ; CHECK-POSTLINK-O-NEXT: Running pass: LoopLoadEliminationPass ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll index e021ff3124b60..45f090252eaf7 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -165,6 +165,7 @@ ; CHECK-O-NEXT: Running analysis: LoopAccessAnalysis on foo ; CHECK-O-NEXT: Running pass: InjectTLIMappings ; CHECK-O-NEXT: Running pass: LoopVectorizePass +; CHECK-O-NEXT: Running pass: DropUnnecessaryAssumesPass ; CHECK-O-NEXT: Running pass: InferAlignmentPass ; CHECK-O-NEXT: Running pass: LoopLoadEliminationPass ; CHECK-O-NEXT: Running pass: InstCombinePass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll index 20f94bc2e0f6c..4c330f44d30cc 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -174,6 +174,7 @@ ; CHECK-O-NEXT: Running analysis: LoopAccessAnalysis ; CHECK-O-NEXT: Running pass: InjectTLIMappings ; CHECK-O-NEXT: Running pass: LoopVectorizePass +; CHECK-O-NEXT: Running pass: DropUnnecessaryAssumesPass ; CHECK-O-NEXT: Running pass: InferAlignmentPass ; CHECK-O-NEXT: Running pass: LoopLoadEliminationPass ; CHECK-O-NEXT: Running pass: InstCombinePass diff --git a/llvm/test/Transforms/DropUnnecessaryAssumes/dereferenceable.ll b/llvm/test/Transforms/DropUnnecessaryAssumes/dereferenceable.ll new file mode 100644 index 0000000000000..43fa08c070828 --- /dev/null +++ b/llvm/test/Transforms/DropUnnecessaryAssumes/dereferenceable.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes='drop-unnecessary-assumes' -S %s | FileCheck %s +; RUN: opt -passes='drop-unnecessary-assumes' -S %s | FileCheck --check-prefix=DROP-DEREF %s + +declare void @use(ptr) + +define i8 @test_dereferenceable_assume_ptr_not_used(ptr %p, i64 %size) { +; CHECK-LABEL: define i8 @test_dereferenceable_assume_ptr_not_used( +; CHECK-SAME: ptr [[P:%.*]], i64 [[SIZE:%.*]]) { +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 [[SIZE]]) ] +; CHECK-NEXT: ret i8 0 +; +; DROP-DEREF-LABEL: define i8 @test_dereferenceable_assume_ptr_not_used( +; DROP-DEREF-SAME: ptr [[P:%.*]], i64 [[SIZE:%.*]]) { +; DROP-DEREF-NEXT: ret i8 0 +; + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 %size) ] + ret i8 0 +} + +define i8 @test_dereferenceable_assume_ptr_used_variable_size(ptr %p, i64 %size) { +; CHECK-LABEL: define i8 @test_dereferenceable_assume_ptr_used_variable_size( +; CHECK-SAME: ptr [[P:%.*]], i64 [[SIZE:%.*]]) { +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 [[SIZE]]) ] +; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[P]], align 1 +; CHECK-NEXT: ret i8 [[VAL]] +; +; DROP-DEREF-LABEL: define i8 @test_dereferenceable_assume_ptr_used_variable_size( +; DROP-DEREF-SAME: ptr [[P:%.*]], i64 [[SIZE:%.*]]) { +; DROP-DEREF-NEXT: [[VAL:%.*]] = load i8, ptr [[P]], align 1 +; DROP-DEREF-NEXT: ret i8 [[VAL]] +; + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 %size) ] + %val = load i8, ptr %p + ret i8 %val +} + +define i8 @test_dereferenceable_with_align_ptr_used(ptr %p, i64 %size) { +; CHECK-LABEL: define i8 @test_dereferenceable_with_align_ptr_used( +; CHECK-SAME: ptr [[P:%.*]], i64 [[SIZE:%.*]]) { +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 [[SIZE]]), "align"(ptr [[P]], i64 8) ] +; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[P]], align 1 +; CHECK-NEXT: ret i8 [[VAL]] +; +; DROP-DEREF-LABEL: define i8 @test_dereferenceable_with_align_ptr_used( +; DROP-DEREF-SAME: ptr [[P:%.*]], i64 [[SIZE:%.*]]) { +; DROP-DEREF-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P]], i64 8) ] +; DROP-DEREF-NEXT: [[VAL:%.*]] = load i8, ptr [[P]], align 1 +; DROP-DEREF-NEXT: ret i8 [[VAL]] +; + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 %size), "align"(ptr %p, i64 8) ] + %val = load i8, ptr %p + ret i8 %val +} diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll index e3765ed541e7a..75276c0412647 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll @@ -106,23 +106,6 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = or disjoint i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = or disjoint i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> [[TMP7]], i64 [[TMP4]], i64 1 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP6]], i64 1 -; CHECK-NEXT: [[TMP11:%.*]] = icmp ult <2 x i64> [[TMP8]], splat (i64 225) -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP11]], i64 0 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i64 1 -; CHECK-NEXT: [[TMP12:%.*]] = icmp ult <2 x i64> [[TMP10]], splat (i64 225) -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP12]], i64 0 -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP12]], i64 1 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]]) -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP14]]) -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP15]]) -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP16]]) ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw double, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP17]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP17]], align 8, !alias.scope [[META0:![0-9]+]] @@ -182,23 +165,6 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK: vector.body.1: ; CHECK-NEXT: [[INDEX_1:%.*]] = phi i64 [ 0, [[VECTOR_PH_1]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY_1]] ] ; CHECK-NEXT: [[TMP33:%.*]] = add nuw nsw i64 [[INDEX_1]], 15 -; CHECK-NEXT: [[TMP34:%.*]] = add nuw nsw i64 [[INDEX_1]], 16 -; CHECK-NEXT: [[TMP35:%.*]] = insertelement <2 x i64> poison, i64 [[TMP33]], i64 0 -; CHECK-NEXT: [[TMP36:%.*]] = insertelement <2 x i64> [[TMP35]], i64 [[TMP34]], i64 1 -; CHECK-NEXT: [[TMP37:%.*]] = add nuw nsw i64 [[INDEX_1]], 17 -; CHECK-NEXT: [[TMP38:%.*]] = add nuw nsw i64 [[INDEX_1]], 18 -; CHECK-NEXT: [[TMP39:%.*]] = insertelement <2 x i64> poison, i64 [[TMP37]], i64 0 -; CHECK-NEXT: [[TMP40:%.*]] = insertelement <2 x i64> [[TMP39]], i64 [[TMP38]], i64 1 -; CHECK-NEXT: [[TMP41:%.*]] = icmp ult <2 x i64> [[TMP36]], splat (i64 225) -; CHECK-NEXT: [[TMP43:%.*]] = extractelement <2 x i1> [[TMP41]], i64 0 -; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i1> [[TMP41]], i64 1 -; CHECK-NEXT: [[TMP42:%.*]] = icmp ult <2 x i64> [[TMP40]], splat (i64 225) -; CHECK-NEXT: [[TMP45:%.*]] = extractelement <2 x i1> [[TMP42]], i64 0 -; CHECK-NEXT: [[TMP46:%.*]] = extractelement <2 x i1> [[TMP42]], i64 1 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP43]]) -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP44]]) -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP45]]) -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP46]]) ; CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw double, ptr [[A]], i64 [[TMP33]] ; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP47]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD_1:%.*]] = load <2 x double>, ptr [[TMP47]], align 8, !alias.scope [[META0]] @@ -259,23 +225,6 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK: vector.body.2: ; CHECK-NEXT: [[INDEX_2:%.*]] = phi i64 [ 0, [[VECTOR_PH_2]] ], [ [[INDEX_NEXT_2:%.*]], [[VECTOR_BODY_2]] ] ; CHECK-NEXT: [[TMP64:%.*]] = add nuw nsw i64 [[INDEX_2]], 30 -; CHECK-NEXT: [[TMP65:%.*]] = add nuw nsw i64 [[INDEX_2]], 31 -; CHECK-NEXT: [[TMP66:%.*]] = insertelement <2 x i64> poison, i64 [[TMP64]], i64 0 -; CHECK-NEXT: [[TMP67:%.*]] = insertelement <2 x i64> [[TMP66]], i64 [[TMP65]], i64 1 -; CHECK-NEXT: [[TMP68:%.*]] = add nuw nsw i64 [[INDEX_2]], 32 -; CHECK-NEXT: [[TMP69:%.*]] = add nuw nsw i64 [[INDEX_2]], 33 -; CHECK-NEXT: [[TMP70:%.*]] = insertelement <2 x i64> poison, i64 [[TMP68]], i64 0 -; CHECK-NEXT: [[TMP71:%.*]] = insertelement <2 x i64> [[TMP70]], i64 [[TMP69]], i64 1 -; CHECK-NEXT: [[TMP72:%.*]] = icmp ult <2 x i64> [[TMP67]], splat (i64 225) -; CHECK-NEXT: [[TMP74:%.*]] = extractelement <2 x i1> [[TMP72]], i64 0 -; CHECK-NEXT: [[TMP75:%.*]] = extractelement <2 x i1> [[TMP72]], i64 1 -; CHECK-NEXT: [[TMP73:%.*]] = icmp ult <2 x i64> [[TMP71]], splat (i64 225) -; CHECK-NEXT: [[TMP76:%.*]] = extractelement <2 x i1> [[TMP73]], i64 0 -; CHECK-NEXT: [[TMP77:%.*]] = extractelement <2 x i1> [[TMP73]], i64 1 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP74]]) -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP75]]) -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP76]]) -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP77]]) ; CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds nuw double, ptr [[A]], i64 [[TMP64]] ; CHECK-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP78]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD_2:%.*]] = load <2 x double>, ptr [[TMP78]], align 8, !alias.scope [[META0]] @@ -336,23 +285,6 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK: vector.body.3: ; CHECK-NEXT: [[INDEX_3:%.*]] = phi i64 [ 0, [[VECTOR_PH_3]] ], [ [[INDEX_NEXT_3:%.*]], [[VECTOR_BODY_3]] ] ; CHECK-NEXT: [[TMP95:%.*]] = add nuw nsw i64 [[INDEX_3]], 45 -; CHECK-NEXT: [[TMP96:%.*]] = add nuw nsw i64 [[INDEX_3]], 46 -; CHECK-NEXT: [[TMP97:%.*]] = insertelement <2 x i64> poison, i64 [[TMP95]], i64 0 -; CHECK-NEXT: [[TMP98:%.*]] = insertelement <2 x i64> [[TMP97]], i64 [[TMP96]], i64 1 -; CHECK-NEXT: [[TMP99:%.*]] = add nuw nsw i64 [[INDEX_3]], 47 -; CHECK-NEXT: [[TMP100:%.*]] = add nuw nsw i64 [[INDEX_3]], 48 -; CHECK-NEXT: [[TMP101:%.*]] = insertelement <2 x i64> poison, i64 [[TMP99]], i64 0 -; CHECK-NEXT: [[TMP102:%.*]] = insertelement <2 x i64> [[TMP101]], i64 [[TMP100]], i64 1 -; CHECK-NEXT: [[TMP103:%.*]] = icmp ult <2 x i64> [[TMP98]], splat (i64 225) -; CHECK-NEXT: [[TMP105:%.*]] = extractelement <2 x i1> [[TMP103]], i64 0 -; CHECK-NEXT: [[TMP106:%.*]] = extractelement <2 x i1> [[TMP103]], i64 1 -; CHECK-NEXT: [[TMP104:%.*]] = icmp ult <2 x i64> [[TMP102]], splat (i64 225) -; CHECK-NEXT: [[TMP107:%.*]] = extractelement <2 x i1> [[TMP104]], i64 0 -; CHECK-NEXT: [[TMP108:%.*]] = extractelement <2 x i1> [[TMP104]], i64 1 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP105]]) -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP106]]) -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP107]]) -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP108]]) ; CHECK-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw double, ptr [[A]], i64 [[TMP95]] ; CHECK-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP109]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD_3:%.*]] = load <2 x double>, ptr [[TMP109]], align 8, !alias.scope [[META0]] diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll index e9149795954ec..fd7b75f22cb6d 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll @@ -8,7 +8,6 @@ define i64 @std_find_i16_constant_offset_with_assumptions(ptr %first.coerce, i16 ; CHECK-SAME: ptr [[FIRST_COERCE:%.*]], i16 noundef signext [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[FIRST_COERCE]], i64 2) ] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[FIRST_COERCE]], i64 256) ] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[S]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -133,15 +132,14 @@ define ptr @std_find_caller(ptr noundef %first, ptr noundef %last) { ; CHECK-LABEL: define noundef ptr @std_find_caller( ; CHECK-SAME: ptr noundef [[FIRST:%.*]], ptr noundef [[LAST:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[FIRST3:%.*]] = ptrtoint ptr [[FIRST]] to i64 -; CHECK-NEXT: [[LAST_I64:%.*]] = ptrtoint ptr [[LAST]] to i64 -; CHECK-NEXT: [[PTR_SUB:%.*]] = sub i64 [[LAST_I64]], [[FIRST3]] ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[FIRST]], i64 2) ] ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[LAST]], i64 2) ] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[FIRST]], i64 [[PTR_SUB]]) ] ; CHECK-NEXT: [[PRE_I:%.*]] = icmp eq ptr [[FIRST]], [[LAST]] ; CHECK-NEXT: br i1 [[PRE_I]], label %[[STD_FIND_GENERIC_IMPL_EXIT:.*]], label %[[LOOP_HEADER_I_PREHEADER:.*]] ; CHECK: [[LOOP_HEADER_I_PREHEADER]]: +; CHECK-NEXT: [[LAST_I64:%.*]] = ptrtoint ptr [[LAST]] to i64 +; CHECK-NEXT: [[FIRST3:%.*]] = ptrtoint ptr [[FIRST]] to i64 +; CHECK-NEXT: [[PTR_SUB:%.*]] = sub i64 [[LAST_I64]], [[FIRST3]] ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[PTR_SUB]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LAST_I64]], -2 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[FIRST3]] From 0e6c8daabb76fe40e4e60e7e82a907648412e3cd Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 10 Nov 2025 10:57:29 +0000 Subject: [PATCH 21/24] [X86] ldexp-avx512.ll - add v8f16/v16f16/v32f16 test coverage for #165694 (#167294) --- llvm/test/CodeGen/X86/ldexp-avx512.ll | 1288 ++++++++++++++++++++++++- 1 file changed, 1285 insertions(+), 3 deletions(-) diff --git a/llvm/test/CodeGen/X86/ldexp-avx512.ll b/llvm/test/CodeGen/X86/ldexp-avx512.ll index ea93a911a1ad0..21491bc2cc8f5 100644 --- a/llvm/test/CodeGen/X86/ldexp-avx512.ll +++ b/llvm/test/CodeGen/X86/ldexp-avx512.ll @@ -47,6 +47,187 @@ entry: } declare fp128 @ldexpl(fp128, i32) memory(none) +define <8 x half> @test_ldexp_8xhalf(<8 x half> %x, <8 x i16> %exp) nounwind { +; AVX512-LABEL: test_ldexp_8xhalf: +; AVX512: # %bb.0: +; AVX512-NEXT: subq $88, %rsp +; AVX512-NEXT: vmovdqa %xmm1, (%rsp) # 16-byte Spill +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512-NEXT: vcvtph2ps %xmm2, %xmm0 +; AVX512-NEXT: vpextrw $7, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = mem[3,3,3,3] +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $6, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $5, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = mem[1,0] +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $4, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-NEXT: vpsrlq $48, %xmm0, %xmm0 +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $3, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = mem[1,1,3,3] +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $2, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-NEXT: vpsrld $16, %xmm0, %xmm0 +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $1, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload +; AVX512-NEXT: vmovd %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512-NEXT: addq $88, %rsp +; AVX512-NEXT: retq +; +; AVX512VL-LABEL: test_ldexp_8xhalf: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: subq $88, %rsp +; AVX512VL-NEXT: vmovdqa %xmm1, (%rsp) # 16-byte Spill +; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm0 +; AVX512VL-NEXT: vpextrw $7, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = mem[3,3,3,3] +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $6, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $5, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = mem[1,0] +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $4, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $3, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = mem[1,1,3,3] +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $2, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $1, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vmovd %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512VL-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512VL-NEXT: addq $88, %rsp +; AVX512VL-NEXT: retq + %r = call <8 x half> @llvm.ldexp.v8f16.v8i16(<8 x half> %x, <8 x i16> %exp) + ret <8 x half> %r +} +declare <8 x half> @llvm.ldexp.v8f16.v8i16(<8 x half>, <8 x i16>) + define <4 x float> @test_ldexp_4xfloat(<4 x float> %x, <4 x i32> %exp) nounwind { ; CHECK-LABEL: test_ldexp_4xfloat: ; CHECK: # %bb.0: @@ -109,6 +290,381 @@ define <2 x double> @test_ldexp_2xdouble(<2 x double> %x, <2 x i32> %exp) nounwi } declare <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double>, <2 x i32>) +define <16 x half> @test_ldexp_16xhalf(<16 x half> %x, <16 x i16> %exp) nounwind { +; AVX512-LABEL: test_ldexp_16xhalf: +; AVX512: # %bb.0: +; AVX512-NEXT: subq $168, %rsp +; AVX512-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX512-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm1 +; AVX512-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vpextrw $7, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = mem[3,3,3,3] +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $6, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $5, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX512-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = mem[1,0] +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $4, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload +; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload +; AVX512-NEXT: vpextrw $7, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX512-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = mem[3,3,3,3] +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload +; AVX512-NEXT: vpextrw $6, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload +; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload +; AVX512-NEXT: vpextrw $5, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = mem[1,0] +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload +; AVX512-NEXT: vpextrw $4, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX512-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-NEXT: vpsrlq $48, %xmm0, %xmm0 +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $3, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX512-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = mem[1,1,3,3] +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $2, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-NEXT: vpsrld $16, %xmm0, %xmm0 +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $1, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vmovd %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload +; AVX512-NEXT: vpsrlq $48, %xmm0, %xmm0 +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload +; AVX512-NEXT: vpextrw $3, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = mem[1,1,3,3] +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload +; AVX512-NEXT: vpextrw $2, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload +; AVX512-NEXT: vpsrld $16, %xmm0, %xmm0 +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload +; AVX512-NEXT: vpextrw $1, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX512-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload +; AVX512-NEXT: vmovd %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX512-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload +; AVX512-NEXT: # ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] +; AVX512-NEXT: addq $168, %rsp +; AVX512-NEXT: retq +; +; AVX512VL-LABEL: test_ldexp_16xhalf: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: subq $168, %rsp +; AVX512VL-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX512VL-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vextracti128 $1, %ymm1, %xmm1 +; AVX512VL-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpextrw $7, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = mem[3,3,3,3] +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $6, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $5, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX512VL-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = mem[1,0] +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $4, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload +; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload +; AVX512VL-NEXT: vpextrw $7, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX512VL-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = mem[3,3,3,3] +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload +; AVX512VL-NEXT: vpextrw $6, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX512VL-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload +; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload +; AVX512VL-NEXT: vpextrw $5, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = mem[1,0] +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload +; AVX512VL-NEXT: vpextrw $4, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512VL-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX512VL-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX512VL-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $3, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX512VL-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = mem[1,1,3,3] +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $2, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX512VL-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $1, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vmovd %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload +; AVX512VL-NEXT: vpextrw $3, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = mem[1,1,3,3] +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload +; AVX512VL-NEXT: vpextrw $2, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload +; AVX512VL-NEXT: vpextrw $1, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX512VL-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload +; AVX512VL-NEXT: vmovd %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512VL-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX512VL-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload +; AVX512VL-NEXT: # ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] +; AVX512VL-NEXT: addq $168, %rsp +; AVX512VL-NEXT: retq + %r = call <16 x half> @llvm.ldexp.v16f16.v16i16(<16 x half> %x, <16 x i16> %exp) + ret <16 x half> %r +} +declare <16 x half> @llvm.ldexp.v16f16.v16i16(<16 x half>, <16 x i16>) + define <8 x float> @test_ldexp_8xfloat(<8 x float> %x, <8 x i32> %exp) nounwind { ; CHECK-LABEL: test_ldexp_8xfloat: ; CHECK: # %bb.0: @@ -230,6 +786,735 @@ define <4 x double> @test_ldexp_4xdouble(<4 x double> %x, <4 x i32> %exp) nounwi } declare <4 x double> @llvm.ldexp.v4f64.v4i32(<4 x double>, <4 x i32>) +define <32 x half> @test_ldexp_32xhalf(<32 x half> %x, <32 x i16> %exp) nounwind { +; AVX512-LABEL: test_ldexp_32xhalf: +; AVX512: # %bb.0: +; AVX512-NEXT: subq $360, %rsp # imm = 0x168 +; AVX512-NEXT: vmovdqu64 %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; AVX512-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; AVX512-NEXT: vextracti32x4 $3, %zmm0, %xmm0 +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vextracti32x4 $3, %zmm1, %xmm1 +; AVX512-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vpextrw $7, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX512-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = mem[3,3,3,3] +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $6, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $5, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = mem[1,0] +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $4, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload +; AVX512-NEXT: vextracti32x4 $2, %zmm0, %xmm0 +; AVX512-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload +; AVX512-NEXT: vextracti32x4 $2, %zmm1, %xmm1 +; AVX512-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vpextrw $7, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vpermilps $255, (%rsp), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = mem[3,3,3,3] +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $6, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload +; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $5, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = mem[1,0] +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $4, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX512-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload +; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload +; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm1 +; AVX512-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vpextrw $7, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = mem[3,3,3,3] +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $6, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $5, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = mem[1,0] +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $4, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload +; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload +; AVX512-NEXT: vpextrw $7, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = mem[3,3,3,3] +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload +; AVX512-NEXT: vpextrw $6, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload +; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload +; AVX512-NEXT: vpextrw $5, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = mem[1,0] +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload +; AVX512-NEXT: vpextrw $4, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX512-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload +; AVX512-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-NEXT: vpsrlq $48, %xmm0, %xmm0 +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $3, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = mem[1,1,3,3] +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $2, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-NEXT: vpsrld $16, %xmm0, %xmm0 +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $1, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vmovd %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload +; AVX512-NEXT: vpsrlq $48, %xmm0, %xmm0 +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $3, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = mem[1,1,3,3] +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $2, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload +; AVX512-NEXT: vpsrld $16, %xmm0, %xmm0 +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $1, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vcvtph2ps (%rsp), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vmovd %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX512-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-NEXT: vpsrlq $48, %xmm0, %xmm0 +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $3, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = mem[1,1,3,3] +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $2, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-NEXT: vpsrld $16, %xmm0, %xmm0 +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vpextrw $1, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX512-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512-NEXT: vmovd %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload +; AVX512-NEXT: vpsrlq $48, %xmm0, %xmm0 +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload +; AVX512-NEXT: vpextrw $3, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX512-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = mem[1,1,3,3] +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload +; AVX512-NEXT: vpextrw $2, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload +; AVX512-NEXT: vpsrld $16, %xmm0, %xmm0 +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload +; AVX512-NEXT: vpextrw $1, %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload +; AVX512-NEXT: vmovd %xmm1, %eax +; AVX512-NEXT: movswl %ax, %edi +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq ldexpf@PLT +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512-NEXT: vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX512-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload +; AVX512-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 64-byte Folded Reload +; AVX512-NEXT: # zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] +; AVX512-NEXT: addq $360, %rsp # imm = 0x168 +; AVX512-NEXT: retq +; +; AVX512VL-LABEL: test_ldexp_32xhalf: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: subq $360, %rsp # imm = 0x168 +; AVX512VL-NEXT: vmovdqu64 %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; AVX512VL-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; AVX512VL-NEXT: vextracti32x4 $3, %zmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vextracti32x4 $3, %zmm1, %xmm1 +; AVX512VL-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpextrw $7, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX512VL-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = mem[3,3,3,3] +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $6, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $5, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = mem[1,0] +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $4, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload +; AVX512VL-NEXT: vextracti32x4 $2, %zmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload +; AVX512VL-NEXT: vextracti32x4 $2, %zmm1, %xmm1 +; AVX512VL-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpextrw $7, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpermilps $255, (%rsp), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = mem[3,3,3,3] +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $6, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload +; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $5, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = mem[1,0] +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $4, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512VL-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX512VL-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload +; AVX512VL-NEXT: vextracti128 $1, %ymm1, %xmm1 +; AVX512VL-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpextrw $7, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = mem[3,3,3,3] +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $6, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $5, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = mem[1,0] +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $4, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload +; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload +; AVX512VL-NEXT: vpextrw $7, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = mem[3,3,3,3] +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload +; AVX512VL-NEXT: vpextrw $6, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload +; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload +; AVX512VL-NEXT: vpextrw $5, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = mem[1,0] +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload +; AVX512VL-NEXT: vpextrw $4, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512VL-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX512VL-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload +; AVX512VL-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; AVX512VL-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $3, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = mem[1,1,3,3] +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $2, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $1, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vmovd %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpsrlq $48, (%rsp), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $3, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = mem[1,1,3,3] +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $2, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpsrld $16, (%rsp), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $1, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vcvtph2ps (%rsp), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vmovd %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512VL-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX512VL-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX512VL-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $3, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = mem[1,1,3,3] +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $2, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vpextrw $1, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX512VL-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX512VL-NEXT: vmovd %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512VL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload +; AVX512VL-NEXT: vpextrw $3, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX512VL-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = mem[1,1,3,3] +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload +; AVX512VL-NEXT: vpextrw $2, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX512VL-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload +; AVX512VL-NEXT: vpextrw $1, %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512VL-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload +; AVX512VL-NEXT: vmovd %xmm1, %eax +; AVX512VL-NEXT: movswl %ax, %edi +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: callq ldexpf@PLT +; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; AVX512VL-NEXT: vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512VL-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX512VL-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload +; AVX512VL-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 64-byte Folded Reload +; AVX512VL-NEXT: # zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] +; AVX512VL-NEXT: addq $360, %rsp # imm = 0x168 +; AVX512VL-NEXT: retq + %r = call <32 x half> @llvm.ldexp.v32f16.v32i16(<32 x half> %x, <32 x i16> %exp) + ret <32 x half> %r +} +declare <32 x half> @llvm.ldexp.v32f16.v32i16(<32 x half>, <32 x i16>) + define <16 x float> @test_ldexp_16xfloat(<16 x float> %x, <16 x i32> %exp) nounwind { ; CHECK-LABEL: test_ldexp_16xfloat: ; CHECK: # %bb.0: @@ -462,6 +1747,3 @@ define <8 x double> @test_ldexp_8xdouble(<8 x double> %x, <8 x i32> %exp) nounwi } declare <8 x double> @llvm.ldexp.v8f64.v8i32(<8 x double>, <8 x i32>) -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; AVX512: {{.*}} -; AVX512VL: {{.*}} From 54d86df4a22c226a957e87375b91db478c641d60 Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Mon, 10 Nov 2025 11:11:51 +0000 Subject: [PATCH 22/24] [AArch64] Fallback to PRFUM for PRFM with negative or unaligned offset (#166756) Section C3.2.2 (quoted below) in the ARMARM makes this a requirement of assemblers for load/stores with unscaled offset. It makes no mention of PRFM so I don't consider this to be a bug, although I can see why we would want to extend this behaviour to the unscaled variants of these instructions as well, as GCC does. This patch adds an alias for this. C3.2.2 Load/store register (unscaled offset) The load/store register instructions with an unscaled offset support only one addressing mode: Base plus an unscaled 9-bit signed immediate offset. See Load/store addressing modes. The load/store register (unscaled offset) instructions are required to disambiguate this instruction class from the load/store register instruction forms that support an addressing mode of base plus a scaled, unsigned 12-bit immediate offset, because that can represent some offset values in the same range. The ambiguous immediate offsets are byte offsets that are both: In the range 0-255, inclusive. Naturally aligned to the access size. Other byte offsets in the range -256 to 255 inclusive are unambiguous. An assembler program translating a load/store instruction, for example LDR, is required to encode an unambiguous offset using the unscaled 9-bit offset form, and to encode an ambiguous offset using the scaled 12-bit offset form. A programmer might force the generation of the unscaled 9-bit form by using one of the mnemonics in Table C.3.21. Arm recommends that a disassembler outputs all unscaled 9-bit offset forms using one of these mnemonics, but unambiguous offsets can be output using a load/store single register mnemonic, for example, LDR. Fixes #83226. --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 5 +++ llvm/test/MC/AArch64/prfum.s | 44 +++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 llvm/test/MC/AArch64/prfum.s diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 76f076a60765f..b30e3d06b2c9f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4444,6 +4444,11 @@ defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum", [(AArch64Prefetch timm:$Rt, (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; +// PRFM falls back to PRFUM for negative or unaligned offsets (not a multiple +// of 8). +def : InstAlias<"prfm $Rt, [$Rn, $offset]", + (PRFUMi prfop:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; + //--- // (unscaled immediate, unprivileged) defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">; diff --git a/llvm/test/MC/AArch64/prfum.s b/llvm/test/MC/AArch64/prfum.s new file mode 100644 index 0000000000000..81a864a694325 --- /dev/null +++ b/llvm/test/MC/AArch64/prfum.s @@ -0,0 +1,44 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding --print-imm-hex=false < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj < %s \ +// RUN: | llvm-objdump -d --print-imm-hex=false - | FileCheck %s --check-prefix=CHECK-INST +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -disassemble -show-encoding --print-imm-hex=false \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +// PRFM falls back to PRFUM for negative or unaligned offsets (not a multiple +// of 8). + +prfm pldl1keep, [x0, #-256] +// CHECK-INST: prfum pldl1keep, [x0, #-256] +// CHECK-ENCODING: [0x00,0x00,0x90,0xf8] + +prfm pldl1keep, [x0, #-8] +// CHECK-INST: prfum pldl1keep, [x0, #-8] +// CHECK-ENCODING: [0x00,0x80,0x9f,0xf8] + +prfm pldl1keep, [x0, #-1] +// CHECK-INST: prfum pldl1keep, [x0, #-1] +// CHECK-ENCODING: [0x00,0xf0,0x9f,0xf8] + +prfm pldl1keep, [x0, #0] +// CHECK-INST: prfm pldl1keep, [x0] +// CHECK-ENCODING: [0x00,0x00,0x80,0xf9] + +prfm pldl1keep, [x0, #1] +// CHECK-INST: prfum pldl1keep, [x0, #1] +// CHECK-ENCODING: [0x00,0x10,0x80,0xf8] + +prfm pldl1keep, [x0, #8] +// CHECK-INST: prfm pldl1keep, [x0, #8] +// CHECK-ENCODING: [0x00,0x04,0x80,0xf9] + +prfm pldl1keep, [x0, #255] +// CHECK-INST: prfum pldl1keep, [x0, #255] +// CHECK-ENCODING: [0x00,0xf0,0x8f,0xf8] + +prfm pldl1keep, [x0, #256] +// CHECK-INST: prfm pldl1keep, [x0, #256] +// CHECK-ENCODING: [0x00,0x80,0x80,0xf9] From b18d828eeae03cc9c42edf4d72911c75f117397c Mon Sep 17 00:00:00 2001 From: Tomer Shafir Date: Mon, 10 Nov 2025 13:51:07 +0200 Subject: [PATCH 23/24] [tools][llc] Make save-stats.ll test target independent (#167238) --- llvm/test/tools/llc/save-stats.ll | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/test/tools/llc/save-stats.ll b/llvm/test/tools/llc/save-stats.ll index acb0367195043..4950625c809cc 100644 --- a/llvm/test/tools/llc/save-stats.ll +++ b/llvm/test/tools/llc/save-stats.ll @@ -1,10 +1,9 @@ ; REQUIRES: asserts -; REQUIRES: aarch64-registered-target -; RUN: llc -mtriple=arm64-apple-macosx --save-stats=obj -o %t.s %s && cat %t.stats | FileCheck %s -; RUN: llc -mtriple=arm64-apple-macosx --save-stats=cwd -o %t.s %s && cat %{t:stem}.tmp.stats | FileCheck %s -; RUN: llc -mtriple=arm64-apple-macosx --save-stats -o %t.s %s && cat %{t:stem}.tmp.stats | FileCheck %s -; RUN: not llc -mtriple=arm64-apple-macosx --save-stats=invalid -o %t.s %s 2>&1 | FileCheck %s --check-prefix=INVALID_ARG +; RUN: llc --save-stats=obj -o %t.s %s && cat %t.stats | FileCheck %s +; RUN: llc --save-stats=cwd -o %t.s %s && cat %{t:stem}.tmp.stats | FileCheck %s +; RUN: llc --save-stats -o %t.s %s && cat %{t:stem}.tmp.stats | FileCheck %s +; RUN: not llc --save-stats=invalid -o %t.s %s 2>&1 | FileCheck %s --check-prefix=INVALID_ARG ; CHECK: { ; CHECK: "asm-printer.EmittedInsts": From 1ffe79d092909a2075705a10d932f0af0825577b Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Mon, 10 Nov 2025 20:04:50 +0800 Subject: [PATCH 24/24] [libc++] Avoid overloaded `operator,` for (`T`, `Iter`) cases (#161049) Several components in libc++ aren't defending against overloaded `operator,(T, Iter)` currently. Existing deleted overloads in `test_iterators.h` are insufficient for such cases. This PR adds corresponding deleted overloads with reversed order and fixes these libc++ components. - `piecewise_linear_distribution`'s iterator pair constructor, - `piecewise_linear_distribution::param_type`'s iterator pair constructor, - `piecewise_constant_distribution`'s iterator pair constructor, - `piecewise_constant_distribution::param_type`'s iterator pair constructor, - `money_get::do_get`, - `money_put::do_put`, and - `num_put::do_put`. --- libcxx/include/__locale_dir/money.h | 4 +- libcxx/include/__locale_dir/num.h | 5 +- libcxx/include/__locale_dir/pad_and_output.h | 11 ++- .../piecewise_constant_distribution.h | 5 +- .../__random/piecewise_linear_distribution.h | 5 +- .../deque/deque.cons/iter_iter.pass.cpp | 5 +- .../deque/deque.cons/iter_iter_alloc.pass.cpp | 5 +- .../vector.bool/construct_iter_iter.pass.cpp | 4 +- .../construct_iter_iter_alloc.pass.cpp | 4 +- .../vector.cons/construct_iter_iter.pass.cpp | 4 +- .../construct_iter_iter_alloc.pass.cpp | 4 +- .../get_long_double_en_US.pass.cpp | 1 + .../get_long_double_fr_FR.pass.cpp | 1 + .../get_long_double_overlong.pass.cpp | 2 + .../get_long_double_ru_RU.pass.cpp | 1 + .../get_long_double_zh_CN.pass.cpp | 1 + .../get_string_en_US.pass.cpp | 1 + .../put_long_double_en_US.pass.cpp | 1 + .../put_long_double_fr_FR.pass.cpp | 2 + .../put_long_double_ru_RU.pass.cpp | 2 + .../put_long_double_zh_CN.pass.cpp | 1 + .../put_string_en_US.pass.cpp | 2 + .../facet.num.put.members/put_bool.pass.cpp | 2 + .../put_double.hex.pass.cpp | 1 + .../facet.num.put.members/put_double.pass.cpp | 1 + .../facet.num.put.members/put_long.pass.cpp | 2 + .../put_long_double.hex.pass.cpp | 1 + .../put_long_double.pass.cpp | 1 + .../put_long_long.pass.cpp | 2 + .../put_pointer.pass.cpp | 2 + .../put_unsigned_long.pass.cpp | 2 + .../put_unsigned_long_long.pass.cpp | 2 + .../ctor_iterator.pass.cpp | 6 +- .../param_ctor_iterator.pass.cpp | 6 +- .../ctor_iterator.pass.cpp | 6 +- .../param_ctor_iterator.pass.cpp | 6 +- libcxx/test/support/test_iterators.h | 68 +++++++++++++++---- 37 files changed, 133 insertions(+), 46 deletions(-) diff --git a/libcxx/include/__locale_dir/money.h b/libcxx/include/__locale_dir/money.h index c1296665505e1..12ba38467d805 100644 --- a/libcxx/include/__locale_dir/money.h +++ b/libcxx/include/__locale_dir/money.h @@ -433,7 +433,7 @@ bool money_get<_CharT, _InputIterator>::__do_get( __err |= ios_base::failbit; return false; } - for (++__b; __fd > 0; --__fd, ++__b) { + for (++__b; __fd > 0; --__fd, (void)++__b) { if (__b == __e || !__ct.is(ctype_base::digit, *__b)) { __err |= ios_base::failbit; return false; @@ -451,7 +451,7 @@ bool money_get<_CharT, _InputIterator>::__do_get( } } if (__trailing_sign) { - for (unsigned __i = 1; __i < __trailing_sign->size(); ++__i, ++__b) { + for (unsigned __i = 1; __i < __trailing_sign->size(); ++__i, (void)++__b) { if (__b == __e || *__b != (*__trailing_sign)[__i]) { __err |= ios_base::failbit; return false; diff --git a/libcxx/include/__locale_dir/num.h b/libcxx/include/__locale_dir/num.h index 7ca8ffe348959..ff357cd2d97db 100644 --- a/libcxx/include/__locale_dir/num.h +++ b/libcxx/include/__locale_dir/num.h @@ -9,6 +9,7 @@ #ifndef _LIBCPP___LOCALE_DIR_NUM_H #define _LIBCPP___LOCALE_DIR_NUM_H +#include <__algorithm/copy.h> #include <__algorithm/find.h> #include <__algorithm/reverse.h> #include <__charconv/to_chars_integral.h> @@ -885,9 +886,7 @@ num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_ty const numpunct& __np = std::use_facet >(__iob.getloc()); typedef typename numpunct::string_type string_type; string_type __nm = __v ? __np.truename() : __np.falsename(); - for (typename string_type::iterator __i = __nm.begin(); __i != __nm.end(); ++__i, ++__s) - *__s = *__i; - return __s; + return std::copy(__nm.begin(), __nm.end(), __s); } template diff --git a/libcxx/include/__locale_dir/pad_and_output.h b/libcxx/include/__locale_dir/pad_and_output.h index a1cb37d0786da..bdd4d2856dad6 100644 --- a/libcxx/include/__locale_dir/pad_and_output.h +++ b/libcxx/include/__locale_dir/pad_and_output.h @@ -13,6 +13,8 @@ #if _LIBCPP_HAS_LOCALIZATION +# include <__algorithm/copy.h> +# include <__algorithm/fill_n.h> # include # if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -30,12 +32,9 @@ _LIBCPP_HIDE_FROM_ABI _OutputIterator __pad_and_output( __ns -= __sz; else __ns = 0; - for (; __ob < __op; ++__ob, ++__s) - *__s = *__ob; - for (; __ns; --__ns, ++__s) - *__s = __fl; - for (; __ob < __oe; ++__ob, ++__s) - *__s = *__ob; + __s = std::copy(__ob, __op, __s); + __s = std::fill_n(__s, __ns, __fl); + __s = std::copy(__op, __oe, __s); __iob.width(0); return __s; } diff --git a/libcxx/include/__random/piecewise_constant_distribution.h b/libcxx/include/__random/piecewise_constant_distribution.h index c5bfa8dc3a4be..3faf339325f74 100644 --- a/libcxx/include/__random/piecewise_constant_distribution.h +++ b/libcxx/include/__random/piecewise_constant_distribution.h @@ -9,9 +9,11 @@ #ifndef _LIBCPP___RANDOM_PIECEWISE_CONSTANT_DISTRIBUTION_H #define _LIBCPP___RANDOM_PIECEWISE_CONSTANT_DISTRIBUTION_H +#include <__algorithm/copy_n.h> #include <__algorithm/upper_bound.h> #include <__config> #include <__cstddef/ptrdiff_t.h> +#include <__iterator/back_insert_iterator.h> #include <__random/is_valid.h> #include <__random/uniform_real_distribution.h> #include <__vector/vector.h> @@ -190,8 +192,7 @@ piecewise_constant_distribution<_RealType>::param_type::param_type( __areas_.assign(1, 0.0); } else { __densities_.reserve(__b_.size() - 1); - for (size_t __i = 0; __i < __b_.size() - 1; ++__i, ++__f_w) - __densities_.push_back(*__f_w); + std::copy_n(__f_w, __b_.size() - 1, std::back_inserter(__densities_)); __init(); } } diff --git a/libcxx/include/__random/piecewise_linear_distribution.h b/libcxx/include/__random/piecewise_linear_distribution.h index a9906430c005c..8aa3f19ca9004 100644 --- a/libcxx/include/__random/piecewise_linear_distribution.h +++ b/libcxx/include/__random/piecewise_linear_distribution.h @@ -9,9 +9,11 @@ #ifndef _LIBCPP___RANDOM_PIECEWISE_LINEAR_DISTRIBUTION_H #define _LIBCPP___RANDOM_PIECEWISE_LINEAR_DISTRIBUTION_H +#include <__algorithm/copy_n.h> #include <__algorithm/upper_bound.h> #include <__config> #include <__cstddef/ptrdiff_t.h> +#include <__iterator/back_insert_iterator.h> #include <__random/is_valid.h> #include <__random/uniform_real_distribution.h> #include <__vector/comparison.h> @@ -194,8 +196,7 @@ piecewise_linear_distribution<_RealType>::param_type::param_type( __areas_.assign(1, 0.0); } else { __densities_.reserve(__b_.size()); - for (size_t __i = 0; __i < __b_.size(); ++__i, ++__f_w) - __densities_.push_back(*__f_w); + std::copy_n(__f_w, __b_.size(), std::back_inserter(__densities_)); __init(); } } diff --git a/libcxx/test/std/containers/sequences/deque/deque.cons/iter_iter.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.cons/iter_iter.pass.cpp index 1f8a044d0b602..59d93ac7ea411 100644 --- a/libcxx/test/std/containers/sequences/deque/deque.cons/iter_iter.pass.cpp +++ b/libcxx/test/std/containers/sequences/deque/deque.cons/iter_iter.pass.cpp @@ -11,6 +11,7 @@ // template deque(InputIterator f, InputIterator l); #include "asan_testing.h" +#include #include #include #include @@ -28,13 +29,11 @@ void test(InputIterator f, InputIterator l) { typedef typename std::iterator_traits::value_type T; typedef std::allocator Allocator; typedef std::deque C; - typedef typename C::const_iterator const_iterator; C d(f, l); assert(d.size() == static_cast(std::distance(f, l))); assert(static_cast(std::distance(d.begin(), d.end())) == d.size()); LIBCPP_ASSERT(is_double_ended_contiguous_container_asan_correct(d)); - for (const_iterator i = d.begin(), e = d.end(); i != e; ++i, ++f) - assert(*i == *f); + assert(std::equal(d.begin(), d.end(), f)); } template diff --git a/libcxx/test/std/containers/sequences/deque/deque.cons/iter_iter_alloc.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.cons/iter_iter_alloc.pass.cpp index 61318c3d0f2d3..ef876bb272fc7 100644 --- a/libcxx/test/std/containers/sequences/deque/deque.cons/iter_iter_alloc.pass.cpp +++ b/libcxx/test/std/containers/sequences/deque/deque.cons/iter_iter_alloc.pass.cpp @@ -12,6 +12,7 @@ // deque(InputIterator f, InputIterator l, const allocator_type& a); #include "asan_testing.h" +#include #include #include #include @@ -28,14 +29,12 @@ template void test(InputIterator f, InputIterator l, const Allocator& a) { typedef typename std::iterator_traits::value_type T; typedef std::deque C; - typedef typename C::const_iterator const_iterator; C d(f, l, a); assert(d.get_allocator() == a); assert(d.size() == static_cast(std::distance(f, l))); assert(static_cast(std::distance(d.begin(), d.end())) == d.size()); LIBCPP_ASSERT(is_double_ended_contiguous_container_asan_correct(d)); - for (const_iterator i = d.begin(), e = d.end(); i != e; ++i, ++f) - assert(*i == *f); + assert(std::equal(d.begin(), d.end(), f)); } void basic_test() { diff --git a/libcxx/test/std/containers/sequences/vector.bool/construct_iter_iter.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/construct_iter_iter.pass.cpp index e9fb2e6ecfbac..b862583c495e1 100644 --- a/libcxx/test/std/containers/sequences/vector.bool/construct_iter_iter.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector.bool/construct_iter_iter.pass.cpp @@ -11,6 +11,7 @@ // template vector(InputIter first, InputIter last); +#include #include #include #include @@ -24,8 +25,7 @@ TEST_CONSTEXPR_CXX20 void test(Iterator first, Iterator last) { C c(first, last); LIBCPP_ASSERT(c.__invariants()); assert(c.size() == static_cast(std::distance(first, last))); - for (typename C::const_iterator i = c.cbegin(), e = c.cend(); i != e; ++i, ++first) - assert(*i == *first); + assert(std::equal(c.cbegin(), c.cend(), first)); } TEST_CONSTEXPR_CXX20 bool tests() { diff --git a/libcxx/test/std/containers/sequences/vector.bool/construct_iter_iter_alloc.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/construct_iter_iter_alloc.pass.cpp index 71a176a0a64ba..3fe462eef80ed 100644 --- a/libcxx/test/std/containers/sequences/vector.bool/construct_iter_iter_alloc.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector.bool/construct_iter_iter_alloc.pass.cpp @@ -12,6 +12,7 @@ // template vector(InputIter first, InputIter last, // const allocator_type& a); +#include #include #include #include @@ -25,8 +26,7 @@ TEST_CONSTEXPR_CXX20 void test(Iterator first, Iterator last, const typename C:: C c(first, last, a); LIBCPP_ASSERT(c.__invariants()); assert(c.size() == static_cast(std::distance(first, last))); - for (typename C::const_iterator i = c.cbegin(), e = c.cend(); i != e; ++i, ++first) - assert(*i == *first); + assert(std::equal(c.cbegin(), c.cend(), first)); } TEST_CONSTEXPR_CXX20 bool tests() { diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/construct_iter_iter.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/construct_iter_iter.pass.cpp index 1a6364a8018bc..f2ac013987eb8 100644 --- a/libcxx/test/std/containers/sequences/vector/vector.cons/construct_iter_iter.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/vector.cons/construct_iter_iter.pass.cpp @@ -10,6 +10,7 @@ // template vector(InputIter first, InputIter last); +#include #include #include #include @@ -31,8 +32,7 @@ TEST_CONSTEXPR_CXX20 void test(Iterator first, Iterator last) { LIBCPP_ASSERT(c.__invariants()); assert(c.size() == static_cast(std::distance(first, last))); LIBCPP_ASSERT(is_contiguous_container_asan_correct(c)); - for (typename C::const_iterator i = c.cbegin(), e = c.cend(); i != e; ++i, ++first) - assert(*i == *first); + assert(std::equal(c.cbegin(), c.cend(), first)); } // Test with an empty range { diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/construct_iter_iter_alloc.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/construct_iter_iter_alloc.pass.cpp index d1eff51011c4f..56a3778ddf965 100644 --- a/libcxx/test/std/containers/sequences/vector/vector.cons/construct_iter_iter_alloc.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/vector.cons/construct_iter_iter_alloc.pass.cpp @@ -11,6 +11,7 @@ // template vector(InputIter first, InputIter last, // const allocator_type& a); +#include #include #include #include @@ -31,8 +32,7 @@ TEST_CONSTEXPR_CXX20 void test(Iterator first, Iterator last, const A& a) { LIBCPP_ASSERT(c.__invariants()); assert(c.size() == static_cast(std::distance(first, last))); LIBCPP_ASSERT(is_contiguous_container_asan_correct(c)); - for (typename C::const_iterator i = c.cbegin(), e = c.cend(); i != e; ++i, ++first) - assert(*i == *first); + assert(std::equal(c.cbegin(), c.cend(), first)); } #if TEST_STD_VER >= 11 diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_en_US.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_en_US.pass.cpp index 9997b07134563..9861662bb59c7 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_en_US.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_en_US.pass.cpp @@ -15,6 +15,7 @@ // Bionic has minimal locale support, investigate this later. // XFAIL: LIBCXX-ANDROID-FIXME +// XFAIL: FROZEN-CXX03-HEADERS-FIXME // REQUIRES: locale.en_US.UTF-8 diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp index c9ed59f3cb9aa..002fc4b1ec7ef 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp @@ -8,6 +8,7 @@ // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd +// XFAIL: FROZEN-CXX03-HEADERS-FIXME // REQUIRES: locale.fr_FR.UTF-8 diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_overlong.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_overlong.pass.cpp index 0b7a38e5104cd..8fe74cdaca5e4 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_overlong.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_overlong.pass.cpp @@ -16,6 +16,8 @@ // Ensure that money_get::do_get correct works when the input doesn't fit into the stack buffer // (100 characters currently). +// XFAIL: FROZEN-CXX03-HEADERS-FIXME + #include #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp index 371cf0e90c8d3..7ce267d0617b0 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp @@ -14,6 +14,7 @@ // ADDITIONAL_COMPILE_FLAGS: -DRU_MON_THOU_SEP=%{LOCALE_CONV_RU_RU_UTF_8_MON_THOUSANDS_SEP} // XFAIL: glibc-old-ru_RU-decimal-point +// XFAIL: FROZEN-CXX03-HEADERS-FIXME // diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp index c86df7e6b53bf..d83167d1ee458 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp @@ -10,6 +10,7 @@ // XFAIL: netbsd // XFAIL: LIBCXX-FREEBSD-FIXME +// XFAIL: FROZEN-CXX03-HEADERS-FIXME // REQUIRES: locale.zh_CN.UTF-8 diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_string_en_US.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_string_en_US.pass.cpp index 478df7964f6d2..0531260487b9f 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_string_en_US.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_string_en_US.pass.cpp @@ -15,6 +15,7 @@ // Bionic has minimal locale support, investigate this later. // XFAIL: LIBCXX-ANDROID-FIXME +// XFAIL: FROZEN-CXX03-HEADERS-FIXME // REQUIRES: locale.en_US.UTF-8 diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_en_US.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_en_US.pass.cpp index 4b767fae871fa..0f2c81a805282 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_en_US.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_en_US.pass.cpp @@ -15,6 +15,7 @@ // Bionic has minimal locale support, investigate this later. // XFAIL: LIBCXX-ANDROID-FIXME +// XFAIL: FROZEN-CXX03-HEADERS-FIXME // REQUIRES: locale.en_US.UTF-8 diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp index f9d7998b07ff4..733eea94fd9bd 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp @@ -9,6 +9,8 @@ // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd +// XFAIL: FROZEN-CXX03-HEADERS-FIXME + // REQUIRES: locale.fr_FR.UTF-8 // ADDITIONAL_COMPILE_FLAGS: -DFR_MON_THOU_SEP=%{LOCALE_CONV_FR_FR_UTF_8_MON_THOUSANDS_SEP} diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp index be1e397488468..24cc4fdb47f75 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp @@ -9,6 +9,8 @@ // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd +// XFAIL: FROZEN-CXX03-HEADERS-FIXME + // REQUIRES: locale.ru_RU.UTF-8 // ADDITIONAL_COMPILE_FLAGS: -DRU_MON_THOU_SEP=%{LOCALE_CONV_RU_RU_UTF_8_MON_THOUSANDS_SEP} diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp index 25046a8417083..d970b55eb704b 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp @@ -10,6 +10,7 @@ // XFAIL: netbsd // XFAIL: LIBCXX-FREEBSD-FIXME +// XFAIL: FROZEN-CXX03-HEADERS-FIXME // REQUIRES: locale.zh_CN.UTF-8 diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_string_en_US.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_string_en_US.pass.cpp index 1c8710a008f27..9770912da9dcf 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_string_en_US.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_string_en_US.pass.cpp @@ -16,6 +16,8 @@ // Bionic has minimal locale support, investigate this later. // XFAIL: LIBCXX-ANDROID-FIXME +// XFAIL: FROZEN-CXX03-HEADERS-FIXME + // REQUIRES: locale.en_US.UTF-8 #include diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_bool.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_bool.pass.cpp index d62a27a0f6ae9..22997ebbbc82d 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_bool.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_bool.pass.cpp @@ -12,6 +12,8 @@ // iter_type put(iter_type s, ios_base& iob, char_type fill, bool v) const; +// XFAIL: FROZEN-CXX03-HEADERS-FIXME + #include #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_double.hex.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_double.hex.pass.cpp index dea2be771e0c6..a4ef158954f59 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_double.hex.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_double.hex.pass.cpp @@ -13,6 +13,7 @@ // iter_type put(iter_type s, ios_base& iob, char_type fill, double v) const; // XFAIL: win32-broken-printf-a-precision +// XFAIL: FROZEN-CXX03-HEADERS-FIXME #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_double.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_double.pass.cpp index b131a41ceac34..45ede5a395c63 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_double.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_double.pass.cpp @@ -13,6 +13,7 @@ // iter_type put(iter_type s, ios_base& iob, char_type fill, double v) const; // XFAIL: win32-broken-printf-g-precision +// XFAIL: FROZEN-CXX03-HEADERS-FIXME #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long.pass.cpp index 7f034d487e57e..c3565c5bab11d 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long.pass.cpp @@ -12,6 +12,8 @@ // iter_type put(iter_type s, ios_base& iob, char_type fill, long v) const; +// XFAIL: FROZEN-CXX03-HEADERS-FIXME + #include #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.hex.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.hex.pass.cpp index 8db40b9e0dcbc..9e84fa8a53afe 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.hex.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.hex.pass.cpp @@ -13,6 +13,7 @@ // iter_type put(iter_type s, ios_base& iob, char_type fill, long double v) const; // XFAIL: win32-broken-printf-a-precision +// XFAIL: FROZEN-CXX03-HEADERS-FIXME #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp index d044898a1f828..e2868cfb37140 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp @@ -13,6 +13,7 @@ // iter_type put(iter_type s, ios_base& iob, char_type fill, long double v) const; // XFAIL: win32-broken-printf-g-precision +// XFAIL: FROZEN-CXX03-HEADERS-FIXME #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_long.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_long.pass.cpp index 2f4dd42e1a20c..4f60835880422 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_long.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_long.pass.cpp @@ -12,6 +12,8 @@ // iter_type put(iter_type s, ios_base& iob, char_type fill, long long v) const; +// XFAIL: FROZEN-CXX03-HEADERS-FIXME + #include #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_pointer.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_pointer.pass.cpp index fed5b4a610fd4..57607e6d6a521 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_pointer.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_pointer.pass.cpp @@ -12,6 +12,8 @@ // iter_type put(iter_type s, ios_base& iob, char_type fill, void* v) const; +// XFAIL: FROZEN-CXX03-HEADERS-FIXME + #include #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_unsigned_long.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_unsigned_long.pass.cpp index 714c8dd8ccd9f..11216a3d111e3 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_unsigned_long.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_unsigned_long.pass.cpp @@ -12,6 +12,8 @@ // iter_type put(iter_type s, ios_base& iob, char_type fill, unsigned long v) const; +// XFAIL: FROZEN-CXX03-HEADERS-FIXME + #include #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_unsigned_long_long.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_unsigned_long_long.pass.cpp index 70ae4b3ae9de0..5dd555eda1e56 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_unsigned_long_long.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_unsigned_long_long.pass.cpp @@ -12,6 +12,8 @@ // iter_type put(iter_type s, ios_base& iob, char_type fill, unsigned long long v) const; +// XFAIL: FROZEN-CXX03-HEADERS-FIXME + #include #include #include diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/ctor_iterator.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/ctor_iterator.pass.cpp index ea6e807ca47b5..400cfd78d94a3 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/ctor_iterator.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/ctor_iterator.pass.cpp @@ -16,20 +16,24 @@ // InputIteratorB lastB, // InputIteratorW firstW); +// XFAIL: FROZEN-CXX03-HEADERS-FIXME + #include #include #include +#include "test_iterators.h" #include "test_macros.h" int main(int, char**) { { typedef std::piecewise_constant_distribution<> D; + typedef cpp17_input_iterator InIt; double b[] = {10}; double p[] = {12}; - D d(b, b, p); + D d((InIt(b)), (InIt(b)), (InIt(p))); std::vector iv = d.intervals(); assert(iv.size() == 2); assert(iv[0] == 0); diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_ctor_iterator.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_ctor_iterator.pass.cpp index baf6108b7e2e8..8b3e21fc0932e 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_ctor_iterator.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_ctor_iterator.pass.cpp @@ -15,11 +15,14 @@ // param_type(InputIteratorB firstB, InputIteratorB lastB, // InputIteratorW firstW); +// XFAIL: FROZEN-CXX03-HEADERS-FIXME + #include #include #include +#include "test_iterators.h" #include "test_macros.h" int main(int, char**) @@ -27,9 +30,10 @@ int main(int, char**) { typedef std::piecewise_constant_distribution<> D; typedef D::param_type P; + typedef cpp17_input_iterator InIt; double b[] = {10}; double p[] = {12}; - P pa(b, b, p); + P pa((InIt(b)), (InIt(b)), (InIt(p))); std::vector iv = pa.intervals(); assert(iv.size() == 2); assert(iv[0] == 0); diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/ctor_iterator.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/ctor_iterator.pass.cpp index 24f7d4e18c36a..8ed56ecdd31e9 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/ctor_iterator.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/ctor_iterator.pass.cpp @@ -16,20 +16,24 @@ // InputIteratorB lastB, // InputIteratorW firstW); +// XFAIL: FROZEN-CXX03-HEADERS-FIXME + #include #include #include +#include "test_iterators.h" #include "test_macros.h" int main(int, char**) { { typedef std::piecewise_linear_distribution<> D; + typedef cpp17_input_iterator InIt; double b[] = {10}; double p[] = {12}; - D d(b, b, p); + D d((InIt(b)), (InIt(b)), (InIt(p))); std::vector iv = d.intervals(); assert(iv.size() == 2); assert(iv[0] == 0); diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_ctor_iterator.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_ctor_iterator.pass.cpp index 04ded2a1c9706..272d0b4c87459 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_ctor_iterator.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_ctor_iterator.pass.cpp @@ -15,11 +15,14 @@ // param_type(InputIteratorB firstB, InputIteratorB lastB, // InputIteratorW firstW); +// XFAIL: FROZEN-CXX03-HEADERS-FIXME + #include #include #include +#include "test_iterators.h" #include "test_macros.h" int main(int, char**) @@ -27,9 +30,10 @@ int main(int, char**) { typedef std::piecewise_linear_distribution<> D; typedef D::param_type P; + typedef cpp17_input_iterator InIt; double b[] = {10}; double p[] = {12}; - P pa(b, b, p); + P pa((InIt(b)), (InIt(b)), (InIt(p))); std::vector iv = pa.intervals(); assert(iv.size() == 2); assert(iv[0] == 0); diff --git a/libcxx/test/support/test_iterators.h b/libcxx/test/support/test_iterators.h index 0335a4c561017..4fc8345c2dcef 100644 --- a/libcxx/test/support/test_iterators.h +++ b/libcxx/test/support/test_iterators.h @@ -59,6 +59,9 @@ class cpp17_output_iterator template void operator,(T const &) = delete; + + template + friend void operator,(const T&, const cpp17_output_iterator&) = delete; }; #if TEST_STD_VER > 14 template @@ -109,6 +112,9 @@ class cpp17_input_iterator template void operator,(T const &) = delete; + + template + friend void operator,(const T&, const cpp17_input_iterator&) = delete; }; #if TEST_STD_VER > 14 template @@ -157,6 +163,9 @@ class forward_iterator template void operator,(T const &) = delete; + + template + friend void operator,(const T&, const forward_iterator&) = delete; }; #if TEST_STD_VER > 14 template @@ -203,6 +212,9 @@ class bidirectional_iterator template void operator,(T const &) = delete; + + template + friend void operator,(const T&, const bidirectional_iterator&) = delete; }; #if TEST_STD_VER > 14 template @@ -261,6 +273,9 @@ class random_access_iterator template void operator,(T const &) = delete; + + template + friend void operator,(const T&, const random_access_iterator&) = delete; }; #if TEST_STD_VER > 14 template @@ -390,6 +405,9 @@ class three_way_random_access_iterator { template void operator,(T const&) = delete; + + template + friend void operator,(const T&, const three_way_random_access_iterator&) = delete; }; #if TEST_STD_VER > 14 template @@ -485,6 +503,9 @@ class cpp20_random_access_iterator { template void operator,(T const&) = delete; + + template + friend void operator,(const T&, const cpp20_random_access_iterator&) = delete; }; template cpp20_random_access_iterator(It) -> cpp20_random_access_iterator; @@ -578,6 +599,9 @@ class contiguous_iterator { template void operator,(T const&) = delete; + + template + friend void operator,(const T&, const contiguous_iterator&) = delete; }; template contiguous_iterator(It) -> contiguous_iterator; @@ -635,6 +659,9 @@ class three_way_contiguous_iterator template void operator,(T const &) = delete; + + template + friend void operator,(const T&, const three_way_contiguous_iterator&) = delete; }; template three_way_contiguous_iterator(It) -> three_way_contiguous_iterator; @@ -746,7 +773,10 @@ struct ThrowingIterator { template void operator,(T2 const &) = delete; -private: + template + friend void operator,(const T2&, const ThrowingIterator&) = delete; + + private: const T* begin_; const T* end_; const T* current_; @@ -817,7 +847,10 @@ struct NonThrowingIterator { template void operator,(T2 const &) = delete; -private: + template + friend void operator,(const T2&, const NonThrowingIterator&) = delete; + + private: const T *begin_; const T *end_; const T *current_; @@ -847,6 +880,9 @@ class cpp20_input_iterator template void operator,(T const &) = delete; + + template + friend void operator,(const T&, const cpp20_input_iterator&) = delete; }; template cpp20_input_iterator(It) -> cpp20_input_iterator; @@ -884,6 +920,9 @@ class cpp20_output_iterator { template void operator,(T const&) = delete; + + template + friend void operator,(const T&, const cpp20_output_iterator&) = delete; }; template cpp20_output_iterator(It) -> cpp20_output_iterator; @@ -1077,17 +1116,20 @@ class operation_counting_iterator { template void operator,(T const &) = delete; -private: - constexpr void moved_by(difference_type n) { - if (counts_ == nullptr) - return; - if (n > 0) - ++counts_->increments; - else if (n < 0) - ++counts_->decrements; - else - ++counts_->zero_moves; - } + template + friend void operator,(const T&, const operation_counting_iterator&) = delete; + + private: + constexpr void moved_by(difference_type n) { + if (counts_ == nullptr) + return; + if (n > 0) + ++counts_->increments; + else if (n < 0) + ++counts_->decrements; + else + ++counts_->zero_moves; + } decltype(base(std::declval())) base_; IteratorOpCounts* counts_ = nullptr;