From e0d9c9c33329f5801c832661132a8bef2f4be836 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Thu, 21 Aug 2025 07:32:24 -0700 Subject: [PATCH 01/21] [MLIR] Apply clang-tidy fixes for llvm-qualified-auto in Vectorization.cpp (NFC) --- mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index 9d62491214018..0f317eac8fa41 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -3911,21 +3911,21 @@ struct Conv1DGenerator Value lhs = vector::TransferReadOp::create( rewriter, loc, lhsType, lhsShaped, ValueRange{zero, zero, zero}, /*padding=*/arith::getZeroConstant(rewriter, loc, lhsEltType)); - auto maybeMaskedLhs = maybeMaskXferOp( + auto *maybeMaskedLhs = maybeMaskXferOp( lhsType.getShape(), lhsType.getScalableDims(), lhs.getDefiningOp()); // Read rhs slice of size {kw, c} @ [0, 0]. Value rhs = vector::TransferReadOp::create( rewriter, loc, rhsType, rhsShaped, ValueRange{zero, zero}, /*padding=*/arith::getZeroConstant(rewriter, loc, rhsEltType)); - auto maybeMaskedRhs = maybeMaskXferOp( + auto *maybeMaskedRhs = maybeMaskXferOp( rhsType.getShape(), rhsType.getScalableDims(), rhs.getDefiningOp()); // Read res slice of size {n, w, c} @ [0, 0, 0]. Value res = vector::TransferReadOp::create( rewriter, loc, resType, resShaped, ValueRange{zero, zero, zero}, /*padding=*/arith::getZeroConstant(rewriter, loc, resEltType)); - auto maybeMaskedRes = maybeMaskXferOp( + auto *maybeMaskedRes = maybeMaskXferOp( resType.getShape(), resType.getScalableDims(), res.getDefiningOp()); //===------------------------------------------------------------------===// From 4d7093b80618e63af91a64c7a01a7c423b12841c Mon Sep 17 00:00:00 2001 From: Pankaj Dwivedi Date: Thu, 30 Oct 2025 12:32:32 +0530 Subject: [PATCH 02/21] [AMDGPU] Enable "amdgpu-uniform-intrinsic-combine" pass in pipeline. (#162819) This PR enables AMDGPUUniformIntrinsicCombine pass in the llc pipeline. Also introduces the "amdgpu-uniform-intrinsic-combine" command-line flag to enable/disable the pass. see the PR:https://github.com/llvm/llvm-project/pull/116953 --- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 5 + .../AMDGPU/AMDGPUUniformIntrinsicCombine.cpp | 2 +- .../GlobalISel/llvm.amdgcn.ballot.i32.ll | 63 +++--- .../GlobalISel/llvm.amdgcn.ballot.i64.ll | 67 +++--- .../amdgpu-miscellaneous-uniform-intrinsic.ll | 173 +++++++++++++++ .../amdgpu-simplify-uniform-waterfall.ll | 1 + .../amdgpu-uniform-intrinsic-combine.ll | 1 + .../amdgpu-uniform-temporal-divergence.ll | 1 + .../CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll | 18 +- llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 6 +- llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 23 ++ .../CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll | 58 ++--- .../CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll | 58 ++--- .../CodeGen/AMDGPU/llvm.amdgcn.permlane64.ll | 75 ++----- .../AMDGPU/llvm.amdgcn.permlane64.ptr.ll | 77 ++----- .../AMDGPU/llvm.amdgcn.readfirstlane.ll | 48 ++--- .../CodeGen/AMDGPU/llvm.amdgcn.readlane.ll | 49 ++--- .../spill-vgpr-to-agpr-update-regscavenger.ll | 23 +- .../AMDGPU/splitkit-getsubrangeformask.ll | 198 +++++++++--------- llvm/test/CodeGen/AMDGPU/wqm.ll | 18 +- 20 files changed, 524 insertions(+), 440 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-miscellaneous-uniform-intrinsic.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 75a94ac891819..b28c50e3f5b6d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1315,6 +1315,9 @@ void AMDGPUPassConfig::addIRPasses() { isPassEnabled(EnableImageIntrinsicOptimizer)) addPass(createAMDGPUImageIntrinsicOptimizerPass(&TM)); + if (EnableUniformIntrinsicCombine) + addPass(createAMDGPUUniformIntrinsicCombineLegacyPass()); + // This can be disabled by passing ::Disable here or on the command line // with --expand-variadics-override=disable. addPass(createExpandVariadicsPass(ExpandVariadicsMode::Lowering)); @@ -2066,6 +2069,8 @@ void AMDGPUCodeGenPassBuilder::addIRPasses(AddIRPass &addPass) const { if (isPassEnabled(EnableImageIntrinsicOptimizer)) addPass(AMDGPUImageIntrinsicOptimizerPass(TM)); + if (EnableUniformIntrinsicCombine) + addPass(AMDGPUUniformIntrinsicCombinePass()); // This can be disabled by passing ::Disable here or on the command line // with --expand-variadics-override=disable. addPass(ExpandVariadicsPass(ExpandVariadicsMode::Lowering)); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp index 65e6ed9d1d428..b5e2d76db662e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp @@ -188,4 +188,4 @@ INITIALIZE_PASS_END(AMDGPUUniformIntrinsicCombineLegacy, DEBUG_TYPE, FunctionPass *llvm::createAMDGPUUniformIntrinsicCombineLegacyPass() { return new AMDGPUUniformIntrinsicCombineLegacy(); -} +} \ No newline at end of file diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll index 7714c032d1737..d3e211855d7ed 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll @@ -113,9 +113,9 @@ false: define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) { ; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_and_b32 s0, 1, s0 -; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 -; CHECK-NEXT: s_cmp_eq_u32 s0, 0 +; CHECK-NEXT: s_xor_b32 s0, s0, 1 +; CHECK-NEXT: s_and_b32 s0, s0, 1 +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 ; CHECK-NEXT: s_cbranch_scc1 .LBB8_2 ; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 @@ -161,16 +161,17 @@ false: define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) { ; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_and_b32 s0, 1, s0 -; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 +; CHECK-NEXT: s_xor_b32 s0, s0, 1 +; CHECK-NEXT: s_xor_b32 s0, s0, 1 +; CHECK-NEXT: s_and_b32 s0, s0, 1 ; CHECK-NEXT: s_cmp_lg_u32 s0, 0 -; CHECK-NEXT: s_cbranch_scc0 .LBB10_2 -; CHECK-NEXT: ; %bb.1: ; %false -; CHECK-NEXT: s_mov_b32 s0, 33 -; CHECK-NEXT: s_branch .LBB10_3 -; CHECK-NEXT: .LBB10_2: ; %true +; CHECK-NEXT: s_cbranch_scc1 .LBB10_2 +; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 ; CHECK-NEXT: s_branch .LBB10_3 +; CHECK-NEXT: .LBB10_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB10_3 ; CHECK-NEXT: .LBB10_3: %c = trunc i32 %v to i1 %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) @@ -208,11 +209,7 @@ false: define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) { ; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_cmp_lt_u32 s0, 12 -; CHECK-NEXT: s_cselect_b32 s0, 1, 0 -; CHECK-NEXT: s_and_b32 s0, 1, s0 -; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 -; CHECK-NEXT: s_cmp_eq_u32 s0, 0 +; CHECK-NEXT: s_cmp_ge_u32 s0, 12 ; CHECK-NEXT: s_cbranch_scc1 .LBB12_2 ; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 @@ -258,17 +255,13 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) { ; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_cmp_lt_u32 s0, 12 -; CHECK-NEXT: s_cselect_b32 s0, 1, 0 -; CHECK-NEXT: s_and_b32 s0, 1, s0 -; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 -; CHECK-NEXT: s_cmp_lg_u32 s0, 0 -; CHECK-NEXT: s_cbranch_scc0 .LBB14_2 -; CHECK-NEXT: ; %bb.1: ; %false -; CHECK-NEXT: s_mov_b32 s0, 33 -; CHECK-NEXT: s_branch .LBB14_3 -; CHECK-NEXT: .LBB14_2: ; %true +; CHECK-NEXT: s_cbranch_scc1 .LBB14_2 +; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 ; CHECK-NEXT: s_branch .LBB14_3 +; CHECK-NEXT: .LBB14_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB14_3 ; CHECK-NEXT: .LBB14_3: %c = icmp ult i32 %v, 12 %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) @@ -310,14 +303,12 @@ false: define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) { ; CHECK-LABEL: branch_uniform_ballot_ne_zero_and: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_cmp_lt_u32 s0, 12 +; CHECK-NEXT: s_cmp_ge_u32 s0, 12 ; CHECK-NEXT: s_cselect_b32 s0, 1, 0 -; CHECK-NEXT: s_cmp_gt_u32 s1, 34 +; CHECK-NEXT: s_cmp_le_u32 s1, 34 ; CHECK-NEXT: s_cselect_b32 s1, 1, 0 -; CHECK-NEXT: s_and_b32 s0, s0, s1 -; CHECK-NEXT: s_and_b32 s0, 1, s0 -; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 -; CHECK-NEXT: s_cmp_eq_u32 s0, 0 +; CHECK-NEXT: s_or_b32 s0, s0, s1 +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 ; CHECK-NEXT: s_cbranch_scc1 .LBB16_2 ; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 @@ -372,16 +363,14 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg ; CHECK-NEXT: s_cmp_gt_u32 s1, 34 ; CHECK-NEXT: s_cselect_b32 s1, 1, 0 ; CHECK-NEXT: s_and_b32 s0, s0, s1 -; CHECK-NEXT: s_and_b32 s0, 1, s0 -; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 ; CHECK-NEXT: s_cmp_lg_u32 s0, 0 -; CHECK-NEXT: s_cbranch_scc0 .LBB18_2 -; CHECK-NEXT: ; %bb.1: ; %false -; CHECK-NEXT: s_mov_b32 s0, 33 -; CHECK-NEXT: s_branch .LBB18_3 -; CHECK-NEXT: .LBB18_2: ; %true +; CHECK-NEXT: s_cbranch_scc1 .LBB18_2 +; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 ; CHECK-NEXT: s_branch .LBB18_3 +; CHECK-NEXT: .LBB18_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB18_3 ; CHECK-NEXT: .LBB18_3: %v1c = icmp ult i32 %v1, 12 %v2c = icmp ugt i32 %v2, 34 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll index 7b8166948610b..250fbc7c0f147 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll @@ -116,9 +116,9 @@ false: define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) { ; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_and_b32 s0, 1, s0 -; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 -; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0 +; CHECK-NEXT: s_xor_b32 s0, s0, 1 +; CHECK-NEXT: s_and_b32 s0, s0, 1 +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 ; CHECK-NEXT: s_cbranch_scc1 .LBB8_2 ; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 @@ -164,16 +164,17 @@ false: define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) { ; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_and_b32 s0, 1, s0 -; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 -; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 -; CHECK-NEXT: s_cbranch_scc0 .LBB10_2 -; CHECK-NEXT: ; %bb.1: ; %false -; CHECK-NEXT: s_mov_b32 s0, 33 -; CHECK-NEXT: s_branch .LBB10_3 -; CHECK-NEXT: .LBB10_2: ; %true +; CHECK-NEXT: s_xor_b32 s0, s0, 1 +; CHECK-NEXT: s_xor_b32 s0, s0, 1 +; CHECK-NEXT: s_and_b32 s0, s0, 1 +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB10_2 +; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 ; CHECK-NEXT: s_branch .LBB10_3 +; CHECK-NEXT: .LBB10_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB10_3 ; CHECK-NEXT: .LBB10_3: %c = trunc i32 %v to i1 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) @@ -211,11 +212,7 @@ false: define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) { ; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_cmp_lt_u32 s0, 12 -; CHECK-NEXT: s_cselect_b32 s0, 1, 0 -; CHECK-NEXT: s_and_b32 s0, 1, s0 -; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 -; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0 +; CHECK-NEXT: s_cmp_ge_u32 s0, 12 ; CHECK-NEXT: s_cbranch_scc1 .LBB12_2 ; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 @@ -261,17 +258,13 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) { ; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_cmp_lt_u32 s0, 12 -; CHECK-NEXT: s_cselect_b32 s0, 1, 0 -; CHECK-NEXT: s_and_b32 s0, 1, s0 -; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 -; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 -; CHECK-NEXT: s_cbranch_scc0 .LBB14_2 -; CHECK-NEXT: ; %bb.1: ; %false -; CHECK-NEXT: s_mov_b32 s0, 33 -; CHECK-NEXT: s_branch .LBB14_3 -; CHECK-NEXT: .LBB14_2: ; %true +; CHECK-NEXT: s_cbranch_scc1 .LBB14_2 +; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 ; CHECK-NEXT: s_branch .LBB14_3 +; CHECK-NEXT: .LBB14_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB14_3 ; CHECK-NEXT: .LBB14_3: %c = icmp ult i32 %v, 12 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) @@ -313,14 +306,12 @@ false: define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) { ; CHECK-LABEL: branch_uniform_ballot_ne_zero_and: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_cmp_lt_u32 s0, 12 +; CHECK-NEXT: s_cmp_ge_u32 s0, 12 ; CHECK-NEXT: s_cselect_b32 s0, 1, 0 -; CHECK-NEXT: s_cmp_gt_u32 s1, 34 +; CHECK-NEXT: s_cmp_le_u32 s1, 34 ; CHECK-NEXT: s_cselect_b32 s1, 1, 0 -; CHECK-NEXT: s_and_b32 s0, s0, s1 -; CHECK-NEXT: s_and_b32 s0, 1, s0 -; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 -; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0 +; CHECK-NEXT: s_or_b32 s0, s0, s1 +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 ; CHECK-NEXT: s_cbranch_scc1 .LBB16_2 ; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 @@ -375,16 +366,14 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg ; CHECK-NEXT: s_cmp_gt_u32 s1, 34 ; CHECK-NEXT: s_cselect_b32 s1, 1, 0 ; CHECK-NEXT: s_and_b32 s0, s0, s1 -; CHECK-NEXT: s_and_b32 s0, 1, s0 -; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 -; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 -; CHECK-NEXT: s_cbranch_scc0 .LBB18_2 -; CHECK-NEXT: ; %bb.1: ; %false -; CHECK-NEXT: s_mov_b32 s0, 33 -; CHECK-NEXT: s_branch .LBB18_3 -; CHECK-NEXT: .LBB18_2: ; %true +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB18_2 +; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 ; CHECK-NEXT: s_branch .LBB18_3 +; CHECK-NEXT: .LBB18_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB18_3 ; CHECK-NEXT: .LBB18_3: %v1c = icmp ult i32 %v1, 12 %v2c = icmp ugt i32 %v2, 34 diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-miscellaneous-uniform-intrinsic.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-miscellaneous-uniform-intrinsic.ll new file mode 100644 index 0000000000000..34d4c519851d4 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-miscellaneous-uniform-intrinsic.ll @@ -0,0 +1,173 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -o - %s | FileCheck %s +define amdgpu_kernel void @readfirstlane_with_readfirstlane(ptr addrspace(1) %out) { +; CHECK-LABEL: readfirstlane_with_readfirstlane: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 5 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-NEXT: s_endpgm + %v1 = call i32 @llvm.amdgcn.readfirstlane(i32 5) + %v2 = call i32 @llvm.amdgcn.readfirstlane(i32 %v1) + store i32 %v2, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @readfirstlane_with_readlane(ptr addrspace(1) %out) { +; CHECK-LABEL: readfirstlane_with_readlane: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-NEXT: v_bfe_u32 v1, v0, 10, 10 +; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; CHECK-NEXT: v_readfirstlane_b32 s2, v1 +; CHECK-NEXT: v_readlane_b32 s2, v0, s2 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; CHECK-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-NEXT: s_endpgm + %tidx = call i32 @llvm.amdgcn.workitem.id.x() + %tidy = call i32 @llvm.amdgcn.workitem.id.y() + %v1 = call i32 @llvm.amdgcn.readlane(i32 %tidx, i32 %tidy) + %v2 = call i32 @llvm.amdgcn.readfirstlane(i32 %v1) + store i32 %v2, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @readlane_with_firstlane(ptr addrspace(1) %out) { +; CHECK-LABEL: readlane_with_firstlane: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; CHECK-NEXT: v_readfirstlane_b32 s2, v0 +; CHECK-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-NEXT: s_endpgm + %tidx = call i32 @llvm.amdgcn.workitem.id.x() + %v1 = call i32 @llvm.amdgcn.readfirstlane(i32 %tidx) + %v2 = call i32 @llvm.amdgcn.readlane(i32 %v1, i32 3) + store i32 %v2, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @readlane_readlane(ptr addrspace(1) %out) { +; CHECK-LABEL: readlane_readlane: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-NEXT: v_bfe_u32 v1, v0, 10, 10 +; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; CHECK-NEXT: v_readfirstlane_b32 s2, v1 +; CHECK-NEXT: v_readlane_b32 s2, v0, s2 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; CHECK-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-NEXT: s_endpgm + %tidx = call i32 @llvm.amdgcn.workitem.id.x() + %tidy = call i32 @llvm.amdgcn.workitem.id.y() + %v1 = call i32 @llvm.amdgcn.readlane(i32 %tidx, i32 %tidy) + %v2 = call i32 @llvm.amdgcn.readlane(i32 %v1, i32 2) + store i32 %v2, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @permlane64_uniform(ptr addrspace(1) %out, i32 %src) { +; CHECK-LABEL: permlane64_uniform: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_clause 0x1 +; CHECK-NEXT: s_load_b32 s2, s[4:5], 0x8 +; CHECK-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.permlane64(i32 %src) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @permlane64_nonuniform(i32 addrspace(1)* %out) { +; CHECK-LABEL: permlane64_nonuniform: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; CHECK-NEXT: v_permlane64_b32 v1, v0 +; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-NEXT: s_endpgm + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %v = call i32 @llvm.amdgcn.permlane64(i32 %tid) + %out_ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid + store i32 %v, i32 addrspace(1)* %out_ptr + ret void +} + +define amdgpu_kernel void @permlane64_nonuniform_expression(i32 addrspace(1)* %out) { +; CHECK-LABEL: permlane64_nonuniform_expression: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; CHECK-NEXT: v_add_nc_u32_e32 v1, 1, v0 +; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; CHECK-NEXT: v_permlane64_b32 v1, v1 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-NEXT: s_endpgm + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid2 = add i32 %tid, 1 + %v = call i32 @llvm.amdgcn.permlane64(i32 %tid2) + %out_ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid + store i32 %v, i32 addrspace(1)* %out_ptr + ret void +} + +define protected amdgpu_kernel void @trivial_waterfall_eq_zero(ptr addrspace(1) %out) { +; CHECK-LABEL: trivial_waterfall_eq_zero: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 5 +; CHECK-NEXT: s_mov_b32 s2, 0 +; CHECK-NEXT: s_branch .LBB7_2 +; CHECK-NEXT: .LBB7_1: ; %Flow +; CHECK-NEXT: ; in Loop: Header=BB7_2 Depth=1 +; CHECK-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s2 +; CHECK-NEXT: s_mov_b32 s2, -1 +; CHECK-NEXT: s_cbranch_vccz .LBB7_4 +; CHECK-NEXT: .LBB7_2: ; %while +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s2 +; CHECK-NEXT: s_mov_b32 s2, -1 +; CHECK-NEXT: s_cbranch_vccnz .LBB7_1 +; CHECK-NEXT: ; %bb.3: ; %if +; CHECK-NEXT: ; in Loop: Header=BB7_2 Depth=1 +; CHECK-NEXT: s_mov_b32 s2, 0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-NEXT: s_branch .LBB7_1 +; CHECK-NEXT: .LBB7_4: ; %exit +; CHECK-NEXT: s_endpgm +entry: + br label %while + +while: + %done = phi i1 [ 0, %entry ], [ 1, %if ] + %not_done = xor i1 %done, true + %ballot = tail call i64 @llvm.amdgcn.ballot.i64(i1 %not_done) + %is_done = icmp eq i64 %ballot, 0 ; in this case is_done = !not_done + br i1 %is_done, label %exit, label %if + +if: + store i32 5, ptr addrspace(1) %out + br label %while + +exit: + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll index 33ce278028bba..c962c05d24ad0 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -amdgpu-enable-uniform-intrinsic-combine=0 -O3 -S < %s | FileCheck %s -check-prefix=CURRENT-CHECK ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine -S < %s | FileCheck %s -check-prefix=PASS-CHECK +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -amdgpu-uniform-intrinsic-combine -S < %s | FileCheck %s -check-prefix=PASS-CHECK ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -O3 -S < %s | FileCheck %s -check-prefix=O3-CHECK define protected amdgpu_kernel void @trivial_waterfall_eq_zero(ptr addrspace(1) %out) { diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-combine.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-combine.ll index a3e42e564376c..a7e828c95d69f 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-combine.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -amdgpu-enable-uniform-intrinsic-combine=0 -O3 -S < %s | FileCheck %s -check-prefix=CURRENT-CHECK +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -amdgpu-uniform-intrinsic-combine -S < %s | FileCheck %s -check-prefix=PASS-CHECK ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine -S < %s | FileCheck %s -check-prefix=PASS-CHECK ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine,dce -S < %s | FileCheck %s -check-prefix=DCE-CHECK diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-temporal-divergence.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-temporal-divergence.ll index 2fde3e3759f47..792926154f7a8 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-temporal-divergence.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-temporal-divergence.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine -S < %s | FileCheck %s -check-prefix=PASS-CHECK +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -amdgpu-uniform-intrinsic-combine -S < %s | FileCheck %s -check-prefix=PASS-CHECK ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine,instcombine,early-cse,simplifycfg -S < %s | FileCheck %s -check-prefix=COMB-CHECK ; This should not be optimized diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll index db32135939a5d..b8f084d5f82ad 100644 --- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll +++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll @@ -4,24 +4,14 @@ define amdgpu_gs i32 @main() { ; CHECK-LABEL: main: ; CHECK: ; %bb.0: ; %bb -; CHECK-NEXT: s_bitcmp1_b32 0, 0 ; CHECK-NEXT: s_mov_b32 s0, 0 -; CHECK-NEXT: s_cselect_b32 s1, -1, 0 -; CHECK-NEXT: s_or_saveexec_b32 s2, -1 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s1 -; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) -; CHECK-NEXT: v_readfirstlane_b32 s1, v0 -; CHECK-NEXT: s_mov_b32 exec_lo, s2 -; CHECK-NEXT: s_or_b32 s0, s0, s1 -; CHECK-NEXT: s_wait_alu 0xfffe +; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; CHECK-NEXT: s_bitcmp1_b32 s0, 0 ; CHECK-NEXT: s_cselect_b32 s0, -1, 0 -; CHECK-NEXT: s_wait_alu 0xfffe ; CHECK-NEXT: s_xor_b32 s0, s0, -1 -; CHECK-NEXT: s_wait_alu 0xfffe -; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 -; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) -; CHECK-NEXT: v_readfirstlane_b32 s0, v1 +; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 ; CHECK-NEXT: s_wait_alu 0xf1ff ; CHECK-NEXT: ; return to shader part epilog bb: diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll index 3aa36635a0ab6..704ea37117f32 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -9,11 +9,11 @@ ; RUN: | FileCheck -check-prefix=GCN-O3 %s -; GCN-O0: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,unreachableblockelim,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,si-post-ra-bundler,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function)) +; GCN-O0: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,unreachableblockelim,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,si-post-ra-bundler,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function)) -; GCN-O2: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,early-cse<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments,codegenprepare,load-store-vectorizer),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require,si-opt-vgpr-liverange,require,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,amdgpu-insert-delay-alu,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function)) +; GCN-O2: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt,amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,early-cse<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments,codegenprepare,load-store-vectorizer),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require,si-opt-vgpr-liverange,require,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,amdgpu-insert-delay-alu,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function)) -; GCN-O3: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,gvn<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments,codegenprepare,load-store-vectorizer),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require,si-opt-vgpr-liverange,require,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,amdgpu-insert-delay-alu,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function)) +; GCN-O3: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt,amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,gvn<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments,codegenprepare,load-store-vectorizer),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require,si-opt-vgpr-liverange,require,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,amdgpu-insert-delay-alu,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function)) define void @empty() { ret void diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index 6e5212580ba2e..ee6caab6f25cd 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -31,6 +31,11 @@ ; GCN-O0-NEXT: AMDGPU Remove Incompatible Functions ; GCN-O0-NEXT: AMDGPU Printf lowering ; GCN-O0-NEXT: Lower ctors and dtors for AMDGPU +; GCN-O0-NEXT: FunctionPass Manager +; GCN-O0-NEXT: Dominator Tree Construction +; GCN-O0-NEXT: Cycle Info Analysis +; GCN-O0-NEXT: Uniformity Analysis +; GCN-O0-NEXT: AMDGPU Uniform Intrinsic Combine ; GCN-O0-NEXT: Expand variadic functions ; GCN-O0-NEXT: AMDGPU Inline All Functions ; GCN-O0-NEXT: Inliner for always_inline functions @@ -179,6 +184,11 @@ ; GCN-O1-NEXT: AMDGPU Remove Incompatible Functions ; GCN-O1-NEXT: AMDGPU Printf lowering ; GCN-O1-NEXT: Lower ctors and dtors for AMDGPU +; GCN-O1-NEXT: FunctionPass Manager +; GCN-O1-NEXT: Dominator Tree Construction +; GCN-O1-NEXT: Cycle Info Analysis +; GCN-O1-NEXT: Uniformity Analysis +; GCN-O1-NEXT: AMDGPU Uniform Intrinsic Combine ; GCN-O1-NEXT: Expand variadic functions ; GCN-O1-NEXT: AMDGPU Inline All Functions ; GCN-O1-NEXT: Inliner for always_inline functions @@ -466,6 +476,11 @@ ; GCN-O1-OPTS-NEXT: AMDGPU Remove Incompatible Functions ; GCN-O1-OPTS-NEXT: AMDGPU Printf lowering ; GCN-O1-OPTS-NEXT: Lower ctors and dtors for AMDGPU +; GCN-O1-OPTS-NEXT: FunctionPass Manager +; GCN-O1-OPTS-NEXT: Dominator Tree Construction +; GCN-O1-OPTS-NEXT: Cycle Info Analysis +; GCN-O1-OPTS-NEXT: Uniformity Analysis +; GCN-O1-OPTS-NEXT: AMDGPU Uniform Intrinsic Combine ; GCN-O1-OPTS-NEXT: Expand variadic functions ; GCN-O1-OPTS-NEXT: AMDGPU Inline All Functions ; GCN-O1-OPTS-NEXT: Inliner for always_inline functions @@ -783,6 +798,10 @@ ; GCN-O2-NEXT: Lower ctors and dtors for AMDGPU ; GCN-O2-NEXT: FunctionPass Manager ; GCN-O2-NEXT: AMDGPU Image Intrinsic Optimizer +; GCN-O2-NEXT: Dominator Tree Construction +; GCN-O2-NEXT: Cycle Info Analysis +; GCN-O2-NEXT: Uniformity Analysis +; GCN-O2-NEXT: AMDGPU Uniform Intrinsic Combine ; GCN-O2-NEXT: Expand variadic functions ; GCN-O2-NEXT: AMDGPU Inline All Functions ; GCN-O2-NEXT: Inliner for always_inline functions @@ -1104,6 +1123,10 @@ ; GCN-O3-NEXT: Lower ctors and dtors for AMDGPU ; GCN-O3-NEXT: FunctionPass Manager ; GCN-O3-NEXT: AMDGPU Image Intrinsic Optimizer +; GCN-O3-NEXT: Dominator Tree Construction +; GCN-O3-NEXT: Cycle Info Analysis +; GCN-O3-NEXT: Uniformity Analysis +; GCN-O3-NEXT: AMDGPU Uniform Intrinsic Combine ; GCN-O3-NEXT: Expand variadic functions ; GCN-O3-NEXT: AMDGPU Inline All Functions ; GCN-O3-NEXT: Inliner for always_inline functions diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll index e00e1f13b2b77..aa591d28eb346 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll @@ -110,9 +110,8 @@ false: define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) { ; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_and_b32 s0, s0, 1 -; CHECK-NEXT: v_cmp_ne_u32_e64 vcc_lo, s0, 0 -; CHECK-NEXT: s_cbranch_vccz .LBB8_2 +; CHECK-NEXT: s_bitcmp0_b32 s0, 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB8_2 ; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 ; CHECK-NEXT: s_branch .LBB8_3 @@ -156,15 +155,16 @@ false: define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) { ; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_and_b32 s0, s0, 1 -; CHECK-NEXT: v_cmp_ne_u32_e64 vcc_lo, s0, 0 -; CHECK-NEXT: s_cbranch_vccz .LBB10_2 -; CHECK-NEXT: ; %bb.1: ; %false -; CHECK-NEXT: s_mov_b32 s0, 33 -; CHECK-NEXT: s_branch .LBB10_3 -; CHECK-NEXT: .LBB10_2: ; %true +; CHECK-NEXT: s_bitcmp1_b32 s0, 0 +; CHECK-NEXT: s_cselect_b32 s0, -1, 0 +; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s0 +; CHECK-NEXT: s_cbranch_vccnz .LBB10_2 +; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 ; CHECK-NEXT: s_branch .LBB10_3 +; CHECK-NEXT: .LBB10_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB10_3 ; CHECK-NEXT: .LBB10_3: %c = trunc i32 %v to i1 %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) @@ -201,8 +201,8 @@ false: define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) { ; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare: ; CHECK: ; %bb.0: -; CHECK-NEXT: v_cmp_lt_u32_e64 vcc_lo, s0, 12 -; CHECK-NEXT: s_cbranch_vccz .LBB12_2 +; CHECK-NEXT: s_cmp_gt_u32 s0, 11 +; CHECK-NEXT: s_cbranch_scc1 .LBB12_2 ; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 ; CHECK-NEXT: s_branch .LBB12_3 @@ -245,14 +245,14 @@ false: define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) { ; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare: ; CHECK: ; %bb.0: -; CHECK-NEXT: v_cmp_lt_u32_e64 vcc_lo, s0, 12 -; CHECK-NEXT: s_cbranch_vccz .LBB14_2 -; CHECK-NEXT: ; %bb.1: ; %false -; CHECK-NEXT: s_mov_b32 s0, 33 -; CHECK-NEXT: s_branch .LBB14_3 -; CHECK-NEXT: .LBB14_2: ; %true +; CHECK-NEXT: s_cmp_lt_u32 s0, 12 +; CHECK-NEXT: s_cbranch_scc1 .LBB14_2 +; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 ; CHECK-NEXT: s_branch .LBB14_3 +; CHECK-NEXT: .LBB14_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB14_3 ; CHECK-NEXT: .LBB14_3: %c = icmp ult i32 %v, 12 %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) @@ -293,13 +293,13 @@ false: define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) { ; CHECK-LABEL: branch_uniform_ballot_ne_zero_and: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_cmp_lt_u32 s0, 12 +; CHECK-NEXT: s_cmp_gt_u32 s0, 11 ; CHECK-NEXT: s_cselect_b32 s0, -1, 0 -; CHECK-NEXT: s_cmp_gt_u32 s1, 34 +; CHECK-NEXT: s_cmp_lt_u32 s1, 35 ; CHECK-NEXT: s_cselect_b32 s1, -1, 0 -; CHECK-NEXT: s_and_b32 s0, s0, s1 -; CHECK-NEXT: s_and_b32 s0, s0, exec_lo -; CHECK-NEXT: s_cbranch_scc0 .LBB16_2 +; CHECK-NEXT: s_or_b32 s0, s0, s1 +; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s0 +; CHECK-NEXT: s_cbranch_vccnz .LBB16_2 ; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 ; CHECK-NEXT: s_branch .LBB16_3 @@ -353,14 +353,14 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg ; CHECK-NEXT: s_cmp_gt_u32 s1, 34 ; CHECK-NEXT: s_cselect_b32 s1, -1, 0 ; CHECK-NEXT: s_and_b32 s0, s0, s1 -; CHECK-NEXT: s_and_b32 s0, s0, exec_lo -; CHECK-NEXT: s_cbranch_scc0 .LBB18_2 -; CHECK-NEXT: ; %bb.1: ; %false -; CHECK-NEXT: s_mov_b32 s0, 33 -; CHECK-NEXT: s_branch .LBB18_3 -; CHECK-NEXT: .LBB18_2: ; %true +; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s0 +; CHECK-NEXT: s_cbranch_vccnz .LBB18_2 +; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 ; CHECK-NEXT: s_branch .LBB18_3 +; CHECK-NEXT: .LBB18_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB18_3 ; CHECK-NEXT: .LBB18_3: %v1c = icmp ult i32 %v1, 12 %v2c = icmp ugt i32 %v2, 34 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll index b4adf7f641550..30c2c260a3274 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll @@ -113,9 +113,8 @@ false: define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) { ; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_and_b32 s0, s0, 1 -; CHECK-NEXT: v_cmp_ne_u32_e64 vcc, s0, 0 -; CHECK-NEXT: s_cbranch_vccz .LBB8_2 +; CHECK-NEXT: s_bitcmp0_b32 s0, 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB8_2 ; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 ; CHECK-NEXT: s_branch .LBB8_3 @@ -159,15 +158,16 @@ false: define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) { ; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_and_b32 s0, s0, 1 -; CHECK-NEXT: v_cmp_ne_u32_e64 vcc, s0, 0 -; CHECK-NEXT: s_cbranch_vccz .LBB10_2 -; CHECK-NEXT: ; %bb.1: ; %false -; CHECK-NEXT: s_mov_b32 s0, 33 -; CHECK-NEXT: s_branch .LBB10_3 -; CHECK-NEXT: .LBB10_2: ; %true +; CHECK-NEXT: s_bitcmp1_b32 s0, 0 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1] +; CHECK-NEXT: s_cbranch_vccnz .LBB10_2 +; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 ; CHECK-NEXT: s_branch .LBB10_3 +; CHECK-NEXT: .LBB10_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB10_3 ; CHECK-NEXT: .LBB10_3: %c = trunc i32 %v to i1 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) @@ -204,8 +204,8 @@ false: define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) { ; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare: ; CHECK: ; %bb.0: -; CHECK-NEXT: v_cmp_lt_u32_e64 vcc, s0, 12 -; CHECK-NEXT: s_cbranch_vccz .LBB12_2 +; CHECK-NEXT: s_cmp_gt_u32 s0, 11 +; CHECK-NEXT: s_cbranch_scc1 .LBB12_2 ; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 ; CHECK-NEXT: s_branch .LBB12_3 @@ -248,14 +248,14 @@ false: define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) { ; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare: ; CHECK: ; %bb.0: -; CHECK-NEXT: v_cmp_lt_u32_e64 vcc, s0, 12 -; CHECK-NEXT: s_cbranch_vccz .LBB14_2 -; CHECK-NEXT: ; %bb.1: ; %false -; CHECK-NEXT: s_mov_b32 s0, 33 -; CHECK-NEXT: s_branch .LBB14_3 -; CHECK-NEXT: .LBB14_2: ; %true +; CHECK-NEXT: s_cmp_lt_u32 s0, 12 +; CHECK-NEXT: s_cbranch_scc1 .LBB14_2 +; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 ; CHECK-NEXT: s_branch .LBB14_3 +; CHECK-NEXT: .LBB14_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB14_3 ; CHECK-NEXT: .LBB14_3: %c = icmp ult i32 %v, 12 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) @@ -296,13 +296,13 @@ false: define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) { ; CHECK-LABEL: branch_uniform_ballot_ne_zero_and: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_cmp_lt_u32 s0, 12 +; CHECK-NEXT: s_cmp_gt_u32 s0, 11 ; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 -; CHECK-NEXT: s_cmp_gt_u32 s1, 34 +; CHECK-NEXT: s_cmp_lt_u32 s1, 35 ; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 -; CHECK-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] -; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], exec -; CHECK-NEXT: s_cbranch_scc0 .LBB16_2 +; CHECK-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] +; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1] +; CHECK-NEXT: s_cbranch_vccnz .LBB16_2 ; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 ; CHECK-NEXT: s_branch .LBB16_3 @@ -356,14 +356,14 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg ; CHECK-NEXT: s_cmp_gt_u32 s1, 34 ; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 ; CHECK-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] -; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], exec -; CHECK-NEXT: s_cbranch_scc0 .LBB18_2 -; CHECK-NEXT: ; %bb.1: ; %false -; CHECK-NEXT: s_mov_b32 s0, 33 -; CHECK-NEXT: s_branch .LBB18_3 -; CHECK-NEXT: .LBB18_2: ; %true +; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1] +; CHECK-NEXT: s_cbranch_vccnz .LBB18_2 +; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 ; CHECK-NEXT: s_branch .LBB18_3 +; CHECK-NEXT: .LBB18_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB18_3 ; CHECK-NEXT: .LBB18_3: %v1c = icmp ult i32 %v1, 12 %v2c = icmp ugt i32 %v2, 34 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ll index 6dd2258420998..39191d242574f 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ll @@ -23,10 +23,8 @@ define amdgpu_kernel void @test_s_i32(ptr addrspace(1) %out, i32 %src0) { ; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v0 -; GFX11-SDAG-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX11-GISEL-LABEL: test_s_i32: @@ -36,8 +34,6 @@ define amdgpu_kernel void @test_s_i32(ptr addrspace(1) %out, i32 %src0) { ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_permlane64_b32 v0, v0 ; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-GISEL-NEXT: s_endpgm %v = call i32 @llvm.amdgcn.permlane64.i32(i32 %src0) @@ -50,12 +46,9 @@ define amdgpu_kernel void @test_s_i64(ptr addrspace(1) %out, i64 %src0) { ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v0, s3 -; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-NEXT: v_permlane64_b32 v1, v0 -; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v2 -; GFX11-SDAG-NEXT: global_store_b64 v3, v[0:1], s[0:1] +; GFX11-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX11-GISEL-LABEL: test_s_i64: @@ -64,9 +57,6 @@ define amdgpu_kernel void @test_s_i64(ptr addrspace(1) %out, i64 %src0) { ; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_permlane64_b32 v0, v0 -; GFX11-GISEL-NEXT: v_permlane64_b32 v1, v1 ; GFX11-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-GISEL-NEXT: s_endpgm %v = call i64 @llvm.amdgcn.permlane64.i64(i64 %src0) @@ -79,12 +69,9 @@ define amdgpu_kernel void @test_s_f64(ptr addrspace(1) %out, double %src0) { ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v0, s3 -; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-NEXT: v_permlane64_b32 v1, v0 -; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v2 -; GFX11-SDAG-NEXT: global_store_b64 v3, v[0:1], s[0:1] +; GFX11-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX11-GISEL-LABEL: test_s_f64: @@ -93,9 +80,6 @@ define amdgpu_kernel void @test_s_f64(ptr addrspace(1) %out, double %src0) { ; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_permlane64_b32 v0, v0 -; GFX11-GISEL-NEXT: v_permlane64_b32 v1, v1 ; GFX11-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-GISEL-NEXT: s_endpgm %v = call double @llvm.amdgcn.permlane64.f64(double %src0) @@ -116,19 +100,15 @@ define amdgpu_kernel void @test_i_i32(ptr addrspace(1) %out) { ; GFX11-SDAG-LABEL: test_i_i32: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, 0x63 :: v_dual_mov_b32 v1, 0 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v0 +; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x63 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX11-GISEL-LABEL: test_i_i32: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, 0x63 :: v_dual_mov_b32 v1, 0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_permlane64_b32 v0, v0 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-GISEL-NEXT: s_endpgm @@ -141,19 +121,15 @@ define amdgpu_kernel void @test_i_f32(ptr addrspace(1) %out) { ; GFX11-SDAG-LABEL: test_i_f32: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, 0x449a5000 :: v_dual_mov_b32 v1, 0 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v0 +; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x449a5000 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX11-GISEL-LABEL: test_i_f32: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, 0x449a5000 :: v_dual_mov_b32 v1, 0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_permlane64_b32 v0, v0 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-GISEL-NEXT: s_endpgm @@ -166,23 +142,16 @@ define amdgpu_kernel void @test_i_i64(ptr addrspace(1) %out) { ; GFX11-SDAG-LABEL: test_i_i64: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0 -; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0x63 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-NEXT: v_permlane64_b32 v1, v2 -; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v0 +; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, 0x63 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX11-SDAG-NEXT: global_store_b64 v1, v[0:1], s[0:1] ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX11-GISEL-LABEL: test_i_i64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x63 -; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_permlane64_b32 v0, v0 -; GFX11-GISEL-NEXT: v_permlane64_b32 v1, v2 +; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-GISEL-NEXT: s_endpgm @@ -195,22 +164,16 @@ define amdgpu_kernel void @test_i_f64(ptr addrspace(1) %out) { ; GFX11-SDAG-LABEL: test_i_f64: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0x40934a00 -; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-NEXT: v_permlane64_b32 v1, v0 -; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v2 +; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x40934a00 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX11-SDAG-NEXT: global_store_b64 v0, v[0:1], s[0:1] ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX11-GISEL-LABEL: test_i_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, 0x40934a00 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_permlane64_b32 v0, v2 -; GFX11-GISEL-NEXT: v_permlane64_b32 v1, v1 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 0x40934a00 :: v_dual_mov_b32 v2, 0 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-GISEL-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ptr.ll index b0149f7de5e85..672b658659824 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ptr.ll @@ -6,12 +6,9 @@ define amdgpu_kernel void @test_p0(ptr addrspace(1) %out, ptr %src0) { ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v0, s3 -; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-NEXT: v_permlane64_b32 v1, v0 -; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v2 -; GFX11-SDAG-NEXT: global_store_b64 v3, v[0:1], s[0:1] +; GFX11-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-SDAG-NEXT: s_endpgm %v = call ptr @llvm.amdgcn.permlane64.p0(ptr %src0) store ptr %v, ptr addrspace(1) %out @@ -22,21 +19,14 @@ define amdgpu_kernel void @test_v3p0(ptr addrspace(1) %out, <3 x ptr> %src0) { ; GFX11-SDAG-LABEL: test_v3p0: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_clause 0x2 -; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x44 ; GFX11-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x54 +; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x44 ; GFX11-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x24 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-SDAG-NEXT: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v5, s7 -; GFX11-SDAG-NEXT: v_mov_b32_e32 v8, s6 -; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, s3 :: v_dual_mov_b32 v7, s0 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-SDAG-NEXT: v_permlane64_b32 v2, v1 -; GFX11-SDAG-NEXT: v_permlane64_b32 v1, v4 -; GFX11-SDAG-NEXT: v_permlane64_b32 v5, v5 -; GFX11-SDAG-NEXT: v_permlane64_b32 v4, v8 -; GFX11-SDAG-NEXT: v_permlane64_b32 v3, v0 -; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v7 +; GFX11-SDAG-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mov_b32 v5, s7 +; GFX11-SDAG-NEXT: v_dual_mov_b32 v4, s6 :: v_dual_mov_b32 v1, s1 +; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-SDAG-NEXT: s_clause 0x1 ; GFX11-SDAG-NEXT: global_store_b64 v6, v[4:5], s[4:5] offset:16 ; GFX11-SDAG-NEXT: global_store_b128 v6, v[0:3], s[4:5] @@ -53,10 +43,8 @@ define amdgpu_kernel void @test_p3(ptr addrspace(1) %out, ptr addrspace(3) %src0 ; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v0 -; GFX11-SDAG-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-SDAG-NEXT: s_endpgm %v = call ptr addrspace(3) @llvm.amdgcn.permlane64.v3p0(ptr addrspace(3) %src0) store ptr addrspace(3) %v, ptr addrspace(1) %out @@ -70,14 +58,9 @@ define amdgpu_kernel void @test_v3p3(ptr addrspace(1) %out, <3 x ptr addrspace(3 ; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 ; GFX11-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x24 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v3, s0 -; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s1 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-NEXT: v_permlane64_b32 v2, v0 -; GFX11-SDAG-NEXT: v_permlane64_b32 v1, v1 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v3 -; GFX11-SDAG-NEXT: global_store_b96 v4, v[0:2], s[4:5] +; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v0, s0 +; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2 +; GFX11-SDAG-NEXT: global_store_b96 v3, v[0:2], s[4:5] ; GFX11-SDAG-NEXT: s_endpgm %v = call <3 x ptr addrspace(3)> @llvm.amdgcn.permlane64.v3p3(<3 x ptr addrspace(3)> %src0) store <3 x ptr addrspace(3)> %v, ptr addrspace(1) %out @@ -91,10 +74,8 @@ define amdgpu_kernel void @test_p5(ptr addrspace(1) %out, ptr addrspace(5) %src0 ; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v0 -; GFX11-SDAG-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-SDAG-NEXT: s_endpgm %v = call ptr addrspace(5) @llvm.amdgcn.permlane64.p5(ptr addrspace(5) %src0) store ptr addrspace(5) %v, ptr addrspace(1) %out @@ -108,14 +89,9 @@ define amdgpu_kernel void @test_v3p5(ptr addrspace(1) %out, <3 x ptr addrspace(5 ; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 ; GFX11-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x24 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v3, s0 -; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s1 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-NEXT: v_permlane64_b32 v2, v0 -; GFX11-SDAG-NEXT: v_permlane64_b32 v1, v1 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v3 -; GFX11-SDAG-NEXT: global_store_b96 v4, v[0:2], s[4:5] +; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v0, s0 +; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2 +; GFX11-SDAG-NEXT: global_store_b96 v3, v[0:2], s[4:5] ; GFX11-SDAG-NEXT: s_endpgm %v = call <3 x ptr addrspace(5)> @llvm.amdgcn.permlane64.v3p5(<3 x ptr addrspace(5)> %src0) store <3 x ptr addrspace(5)> %v, ptr addrspace(1) %out @@ -129,10 +105,8 @@ define amdgpu_kernel void @test_p6(ptr addrspace(1) %out, ptr addrspace(6) %src0 ; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v0 -; GFX11-SDAG-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-SDAG-NEXT: s_endpgm %v = call ptr addrspace(6) @llvm.amdgcn.permlane64.p6(ptr addrspace(6) %src0) store ptr addrspace(6) %v, ptr addrspace(1) %out @@ -146,14 +120,9 @@ define amdgpu_kernel void @test_v3p6(ptr addrspace(1) %out, <3 x ptr addrspace(6 ; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 ; GFX11-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x24 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v3, s0 -; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s1 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-NEXT: v_permlane64_b32 v2, v0 -; GFX11-SDAG-NEXT: v_permlane64_b32 v1, v1 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v3 -; GFX11-SDAG-NEXT: global_store_b96 v4, v[0:2], s[4:5] +; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v0, s0 +; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2 +; GFX11-SDAG-NEXT: global_store_b96 v3, v[0:2], s[4:5] ; GFX11-SDAG-NEXT: s_endpgm %v = call <3 x ptr addrspace(6)> @llvm.amdgcn.permlane64.v3p6(<3 x ptr addrspace(6)> %src0) store <3 x ptr addrspace(6)> %v, ptr addrspace(1) %out diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll index d1ba892d7f7e1..02d29909c661c 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll @@ -396,8 +396,7 @@ define amdgpu_kernel void @test_readfirstlane_imm_f64(ptr addrspace(1) %out) { ; ; CHECK-GISEL-LABEL: test_readfirstlane_imm_f64: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: s_mov_b32 s0, 0 -; CHECK-GISEL-NEXT: s_mov_b32 s1, 0x40400000 +; CHECK-GISEL-NEXT: s_mov_b64 s[0:1], 0x4040000000000000 ; CHECK-GISEL-NEXT: ;;#ASMSTART ; CHECK-GISEL-NEXT: ; use s[0:1] ; CHECK-GISEL-NEXT: ;;#ASMEND @@ -456,14 +455,13 @@ define amdgpu_kernel void @test_readfirstlane_imm_fold_i64(ptr addrspace(1) %out ; CHECK-GISEL-LABEL: test_readfirstlane_imm_fold_i64: ; CHECK-GISEL: ; %bb.0: ; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 -; CHECK-GISEL-NEXT: s_mov_b64 s[2:3], 32 ; CHECK-GISEL-NEXT: s_add_i32 s12, s12, s17 -; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, 32 ; CHECK-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 +; CHECK-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 ; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1 -; CHECK-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 -; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0 ; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; CHECK-GISEL-NEXT: s_endpgm @@ -490,15 +488,13 @@ define amdgpu_kernel void @test_readfirstlane_imm_fold_f64(ptr addrspace(1) %out ; CHECK-GISEL-LABEL: test_readfirstlane_imm_fold_f64: ; CHECK-GISEL: ; %bb.0: ; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 -; CHECK-GISEL-NEXT: s_mov_b32 s2, 0 ; CHECK-GISEL-NEXT: s_add_i32 s12, s12, s17 -; CHECK-GISEL-NEXT: s_mov_b32 s3, 0x40400000 -; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1 +; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 ; CHECK-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 -; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1 +; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, 0x40400000 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0 ; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; CHECK-GISEL-NEXT: s_endpgm @@ -588,17 +584,17 @@ define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i64(ptr addrspace(1 ; CHECK-SDAG: ; %bb.0: ; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; CHECK-SDAG-NEXT: s_add_i32 s12, s12, s17 -; CHECK-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13 -; CHECK-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 ; CHECK-SDAG-NEXT: ;;#ASMSTART ; CHECK-SDAG-NEXT: s_mov_b64 s[2:3], 0 ; CHECK-SDAG-NEXT: ;;#ASMEND +; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s2 +; CHECK-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13 ; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1 -; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s2 -; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s3 -; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0 -; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s3 +; CHECK-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; CHECK-SDAG-NEXT: s_endpgm ; ; CHECK-GISEL-LABEL: test_readfirstlane_copy_from_sgpr_i64: @@ -628,17 +624,17 @@ define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_f64(ptr addrspace(1 ; CHECK-SDAG: ; %bb.0: ; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; CHECK-SDAG-NEXT: s_add_i32 s12, s12, s17 -; CHECK-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13 -; CHECK-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 ; CHECK-SDAG-NEXT: ;;#ASMSTART ; CHECK-SDAG-NEXT: s_mov_b64 s[2:3], 0 ; CHECK-SDAG-NEXT: ;;#ASMEND +; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s2 +; CHECK-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13 ; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1 -; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s2 -; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s3 -; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0 -; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s3 +; CHECK-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; CHECK-SDAG-NEXT: s_endpgm ; ; CHECK-GISEL-LABEL: test_readfirstlane_copy_from_sgpr_f64: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll index 7ff5eb46def38..0795f4050b622 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll @@ -9,7 +9,7 @@ declare double @llvm.amdgcn.readlane.f64(double, i32) #0 define amdgpu_kernel void @test_readlane_sreg_sreg_i32(i32 %src0, i32 %src1) #1 { ; CHECK-SDAG-LABEL: test_readlane_sreg_sreg_i32: ; CHECK-SDAG: ; %bb.0: -; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 +; CHECK-SDAG-NEXT: s_load_dword s0, s[8:9], 0x0 ; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-SDAG-NEXT: ;;#ASMSTART ; CHECK-SDAG-NEXT: ; use s0 @@ -18,7 +18,7 @@ define amdgpu_kernel void @test_readlane_sreg_sreg_i32(i32 %src0, i32 %src1) #1 ; ; CHECK-GISEL-LABEL: test_readlane_sreg_sreg_i32: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 +; CHECK-GISEL-NEXT: s_load_dword s0, s[8:9], 0x0 ; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-GISEL-NEXT: ;;#ASMSTART ; CHECK-GISEL-NEXT: ; use s0 @@ -224,14 +224,13 @@ define amdgpu_kernel void @test_readlane_imm_sreg_i64(ptr addrspace(1) %out, i32 ; CHECK-GISEL-LABEL: test_readlane_imm_sreg_i64: ; CHECK-GISEL: ; %bb.0: ; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 -; CHECK-GISEL-NEXT: s_mov_b64 s[2:3], 32 ; CHECK-GISEL-NEXT: s_add_i32 s12, s12, s17 -; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, 32 ; CHECK-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 +; CHECK-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 ; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1 -; CHECK-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 -; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0 ; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; CHECK-GISEL-NEXT: s_endpgm @@ -258,15 +257,13 @@ define amdgpu_kernel void @test_readlane_imm_sreg_f64(ptr addrspace(1) %out, i32 ; CHECK-GISEL-LABEL: test_readlane_imm_sreg_f64: ; CHECK-GISEL: ; %bb.0: ; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 -; CHECK-GISEL-NEXT: s_mov_b32 s2, 0 ; CHECK-GISEL-NEXT: s_add_i32 s12, s12, s17 -; CHECK-GISEL-NEXT: s_mov_b32 s3, 0x40400000 -; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1 +; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 ; CHECK-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 -; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1 +; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, 0x40400000 ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0 ; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; CHECK-GISEL-NEXT: s_endpgm @@ -660,17 +657,17 @@ define amdgpu_kernel void @test_readlane_copy_from_sgpr_i64(ptr addrspace(1) %ou ; CHECK-SDAG: ; %bb.0: ; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; CHECK-SDAG-NEXT: s_add_i32 s12, s12, s17 -; CHECK-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13 -; CHECK-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 ; CHECK-SDAG-NEXT: ;;#ASMSTART ; CHECK-SDAG-NEXT: s_mov_b64 s[2:3], 0 ; CHECK-SDAG-NEXT: ;;#ASMEND +; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s2 +; CHECK-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13 ; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1 -; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s2 -; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s3 -; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0 -; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s3 +; CHECK-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; CHECK-SDAG-NEXT: s_endpgm ; ; CHECK-GISEL-LABEL: test_readlane_copy_from_sgpr_i64: @@ -700,17 +697,17 @@ define amdgpu_kernel void @test_readlane_copy_from_sgpr_f64(ptr addrspace(1) %ou ; CHECK-SDAG: ; %bb.0: ; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; CHECK-SDAG-NEXT: s_add_i32 s12, s12, s17 -; CHECK-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13 -; CHECK-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 ; CHECK-SDAG-NEXT: ;;#ASMSTART ; CHECK-SDAG-NEXT: s_mov_b64 s[2:3], 0 ; CHECK-SDAG-NEXT: ;;#ASMEND +; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s2 +; CHECK-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13 ; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1 -; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s2 -; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s3 -; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0 -; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s3 +; CHECK-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; CHECK-SDAG-NEXT: s_endpgm ; ; CHECK-GISEL-LABEL: test_readlane_copy_from_sgpr_f64: diff --git a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll index 586579fcaeb93..ef96944abef0e 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll @@ -20,38 +20,33 @@ define void @test() { ; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: .LBB0_3: ; %bb.3 ; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: ; implicit-def: $sgpr4 -; CHECK-NEXT: v_mov_b32_e32 v0, s4 -; CHECK-NEXT: v_readfirstlane_b32 s6, v0 ; CHECK-NEXT: s_mov_b64 s[4:5], -1 -; CHECK-NEXT: s_mov_b32 s7, 0 -; CHECK-NEXT: s_cmp_eq_u32 s6, s7 ; CHECK-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane ; CHECK-NEXT: v_writelane_b32 v1, s4, 0 ; CHECK-NEXT: v_writelane_b32 v1, s5, 1 -; CHECK-NEXT: s_mov_b64 s[10:11], exec -; CHECK-NEXT: s_mov_b64 exec, -1 +; CHECK-NEXT: s_or_saveexec_b64 s[8:9], -1 +; CHECK-NEXT: s_nop 0 ; CHECK-NEXT: v_accvgpr_write_b32 a0, v1 ; Reload Reuse -; CHECK-NEXT: s_mov_b64 exec, s[10:11] +; CHECK-NEXT: s_mov_b64 exec, s[8:9] ; CHECK-NEXT: s_cbranch_scc1 .LBB0_5 ; CHECK-NEXT: ; %bb.4: ; %bb.4 ; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: s_or_saveexec_b64 s[10:11], -1 +; CHECK-NEXT: s_or_saveexec_b64 s[8:9], -1 ; CHECK-NEXT: v_accvgpr_read_b32 v1, a0 ; Reload Reuse -; CHECK-NEXT: s_mov_b64 exec, s[10:11] +; CHECK-NEXT: s_mov_b64 exec, s[8:9] ; CHECK-NEXT: s_mov_b64 s[4:5], 0 ; CHECK-NEXT: v_writelane_b32 v1, s4, 0 ; CHECK-NEXT: v_writelane_b32 v1, s5, 1 -; CHECK-NEXT: s_or_saveexec_b64 s[10:11], -1 +; CHECK-NEXT: s_or_saveexec_b64 s[8:9], -1 ; CHECK-NEXT: s_nop 0 ; CHECK-NEXT: v_accvgpr_write_b32 a0, v1 ; Reload Reuse -; CHECK-NEXT: s_mov_b64 exec, s[10:11] +; CHECK-NEXT: s_mov_b64 exec, s[8:9] ; CHECK-NEXT: .LBB0_5: ; %Flow ; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: s_or_saveexec_b64 s[10:11], -1 +; CHECK-NEXT: s_or_saveexec_b64 s[8:9], -1 ; CHECK-NEXT: s_nop 0 ; CHECK-NEXT: v_accvgpr_read_b32 v1, a0 ; Reload Reuse -; CHECK-NEXT: s_mov_b64 exec, s[10:11] +; CHECK-NEXT: s_mov_b64 exec, s[8:9] ; CHECK-NEXT: v_readlane_b32 s4, v1, 0 ; CHECK-NEXT: v_readlane_b32 s5, v1, 1 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll index 5aafb0f576fb4..364598f7cf6c0 100644 --- a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll +++ b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll @@ -31,8 +31,8 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sgpr_32 = COPY $sgpr10 ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sgpr_32 = COPY $sgpr8 ; CHECK-NEXT: undef [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 232, 0 :: (invariant load (s64) from %ir.39, addrspace 4) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %125:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: KILL undef %125:sgpr_128 + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %117:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: KILL undef %117:sgpr_128 ; CHECK-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY5]], 4, implicit-def dead $scc ; CHECK-NEXT: [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 4, implicit-def dead $scc ; CHECK-NEXT: [[S_LSHL_B32_2:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 4, implicit-def dead $scc @@ -44,87 +44,85 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[S_SUB_I32_1:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 30, implicit-def dead $scc ; CHECK-NEXT: undef [[S_ADD_U32_:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_2]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_]], 16, 0 :: (invariant load (s128) from %ir.81, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_]], 16, 0 :: (invariant load (s128) from %ir.71, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM1:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM undef %74:sreg_64, 0, 0 :: (invariant load (s128) from `ptr addrspace(4) poison`, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_]], 64, 0 :: (invariant load (s128) from %ir.88, addrspace 4) ; CHECK-NEXT: KILL undef %74:sreg_64 ; CHECK-NEXT: KILL [[S_ADD_U32_]].sub0, [[S_ADD_U32_]].sub1 ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_LOAD_DWORDX4_IMM]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; CHECK-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = S_MOV_B32 0 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %118:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], undef %89:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %112:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], undef %87:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: KILL undef %89:sgpr_128 - ; CHECK-NEXT: KILL undef %118:sgpr_128 + ; CHECK-NEXT: KILL undef %112:sgpr_128 + ; CHECK-NEXT: KILL undef %87:sgpr_128 ; CHECK-NEXT: [[S_SUB_I32_2:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM1]], 31, implicit-def dead $scc ; CHECK-NEXT: undef [[S_ADD_U32_1:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_1:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: undef [[S_ADD_U32_2:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_1]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_2:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_3:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_1]], 64, 0 :: (invariant load (s128) from %ir.87, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_2]], 64, 0 :: (invariant load (s128) from %ir.93, addrspace 4) - ; CHECK-NEXT: KILL [[S_ADD_U32_1]].sub0, [[S_ADD_U32_1]].sub1 + ; CHECK-NEXT: [[S_ASHR_I32_3:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 undef %148:sreg_32, 31, implicit-def dead $scc + ; CHECK-NEXT: undef [[S_ADD_U32_3:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], undef %148:sreg_32, implicit-def $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_1]], 64, 0 :: (invariant load (s128) from %ir.77, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_2]], 64, 0 :: (invariant load (s128) from %ir.83, addrspace 4) ; CHECK-NEXT: KILL [[S_ADD_U32_2]].sub0, [[S_ADD_U32_2]].sub1 - ; CHECK-NEXT: [[S_ADD_U32_3:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_ASHR_I32_3:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 undef %169:sreg_32, 31, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_4:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], undef %169:sreg_32, implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_4:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_5:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_5:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_6:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_6:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_7:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, undef %169:sreg_32, implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_7:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_8:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_8:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_9:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY8]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_9:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %48:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_10:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_10:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_11:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_11:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: KILL [[S_ADD_U32_1]].sub0, [[S_ADD_U32_1]].sub1 + ; CHECK-NEXT: [[S_ADD_U32_3:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_4:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_4:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_5:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_5:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_6:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, undef %148:sreg_32, implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_6:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_7:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_7:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_8:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY8]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_8:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %48:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_9:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_9:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_10:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_10:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_]], 16, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_2]], 16, implicit-def dead $scc ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], undef %302:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], undef %279:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_MOV_B32_]], 16, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %357:sgpr_128, undef %358:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %368:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_3]], 64, 0 :: (invariant load (s128) from %ir.99, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_4]], 64, 0 :: (invariant load (s128) from %ir.107, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 0, 0 :: (invariant load (s128) from %ir.112, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 0, 0 :: (invariant load (s128) from %ir.117, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 0, 0 :: (invariant load (s128) from %ir.124, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %352:sgpr_128, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %363:sgpr_128, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %334:sgpr_128, undef %335:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %345:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_3]], 64, 0 :: (invariant load (s128) from %ir.95, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_4]], 0, 0 :: (invariant load (s128) from %ir.100, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 0, 0 :: (invariant load (s128) from %ir.105, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 0, 0 :: (invariant load (s128) from %ir.112, addrspace 4) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %329:sgpr_128, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %340:sgpr_128, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ADD_I32_2:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM]], -98, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_3:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM1]], -114, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_4:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM2]], -130, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_5:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM2]], -178, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_12:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY10]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_12:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %42:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_13:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_13:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_14:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_14:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_15:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_15:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_11:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY10]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_11:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %42:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_12:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_12:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_13:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_13:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_14:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_14:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LSHL_B32_3:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY12]], 4, implicit-def dead $scc - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_3]], 16, implicit-def dead $scc - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %384:sgpr_128, [[S_ADD_I32_6]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %361:sgpr_128, [[S_ADD_I32_6]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN5:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 224, 0 :: (invariant load (s128) from %ir.129, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY7]], 224, 0 :: (invariant load (s128) from %ir.145, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 576, 0 :: (invariant load (s128) from %ir.150, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_4]], 224, 0 :: (invariant load (s128) from %ir.117, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY7]], 224, 0 :: (invariant load (s128) from %ir.133, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_4]], 576, 0 :: (invariant load (s128) from %ir.138, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN6:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 224, 0 :: (invariant load (s128) from %ir.134, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 576, 0 :: (invariant load (s128) from %ir.162, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 224, 0 :: (invariant load (s128) from %ir.140, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 224, 0 :: (invariant load (s128) from %ir.122, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 576, 0 :: (invariant load (s128) from %ir.150, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 224, 0 :: (invariant load (s128) from %ir.128, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN7:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ADD_I32_7:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM4]], -217, implicit-def dead $scc @@ -135,49 +133,49 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[S_ADD_I32_12:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -329, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_13:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -345, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_14:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM6]], -441, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_16:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY2]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_16:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %36:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_15:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY2]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_15:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %36:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LSHL_B32_4:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY13]], 4, implicit-def dead $scc ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ASHR_I32_4:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_4]], 31, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_17:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY2]], [[S_LSHL_B32_4]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_17:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %36:sreg_32, [[S_ASHR_I32_4]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_16:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY2]], [[S_LSHL_B32_4]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_16:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %36:sreg_32, [[S_ASHR_I32_4]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LSHL_B32_5:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY5]], 3, implicit-def dead $scc ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ASHR_I32_5:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_5]], 31, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_18:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_5]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_18:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_5]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_18]], 168, 0 :: (invariant load (s32) from %ir.273, align 8, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 576, 0 :: (invariant load (s128) from %ir.157, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_17:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_5]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_17:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_5]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_17]], 168, 0 :: (invariant load (s32) from %ir.260, align 8, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 576, 0 :: (invariant load (s128) from %ir.145, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN11:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN12:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM11]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub3:sgpr_128 = S_MOV_B32 553734060 ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub2:sgpr_128 = S_MOV_B32 -1 ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]] - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_9]], 0, 0 :: (invariant load (s128) from %ir.170, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 0, 0 :: (invariant load (s128) from %ir.158, addrspace 4) ; CHECK-NEXT: [[COPY15:%[0-9]+]].sub1:sgpr_128 = COPY [[S_MOV_B32_]].sub1 ; CHECK-NEXT: [[COPY15:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORD_IMM]] ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY15]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM15]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM13]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_10]], 0, 0 :: (invariant load (s128) from %ir.178, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_11]], 0, 0 :: (invariant load (s128) from %ir.183, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_9]], 0, 0 :: (invariant load (s128) from %ir.166, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_10]], 0, 0 :: (invariant load (s128) from %ir.171, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN16:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_LSHL_B32_6:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 3, implicit-def dead $scc ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ASHR_I32_6:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_6]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_15:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM4]], -467, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_19:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_6]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_19:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_19]], 168, 0 :: (invariant load (s64) from %ir.282, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_18:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_6]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_18:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_18]], 168, 0 :: (invariant load (s64) from %ir.269, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM17]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM18]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_12]], 0, 0 :: (invariant load (s128) from %ir.205, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_13]], 0, 0 :: (invariant load (s128) from %ir.211, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_11]], 0, 0 :: (invariant load (s128) from %ir.193, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_12]], 0, 0 :: (invariant load (s128) from %ir.199, addrspace 4) ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]] - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_14]], 0, 0 :: (invariant load (s128) from %ir.216, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_15]], 0, 0 :: (invariant load (s128) from %ir.221, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_13]], 0, 0 :: (invariant load (s128) from %ir.204, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_14]], 0, 0 :: (invariant load (s128) from %ir.209, addrspace 4) ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM1]].sub1, 65535, implicit-def dead $scc ; CHECK-NEXT: [[COPY16:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM1]].sub0 ; CHECK-NEXT: [[COPY16:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_]] @@ -189,30 +187,30 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN20:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ASHR_I32_7:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_7]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_16:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM5]], -468, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_20:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_7]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_20:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_20]], 168, 0 :: (invariant load (s64) from %ir.293, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_19:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_7]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_19:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_19]], 168, 0 :: (invariant load (s64) from %ir.280, addrspace 4) ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]] ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM2]].sub1, 65535, implicit-def dead $scc ; CHECK-NEXT: [[COPY17:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM2]].sub0 ; CHECK-NEXT: [[COPY17:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_1]] ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY17]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_16]], 160, 0 :: (invariant load (s128) from %ir.256, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %470:sreg_64, 0, 0 :: (invariant load (s32) from `ptr addrspace(4) poison`, addrspace 4) - ; CHECK-NEXT: KILL [[S_ADD_U32_16]].sub0, [[S_ADD_U32_16]].sub1 - ; CHECK-NEXT: KILL undef %470:sreg_64 + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_15]], 160, 0 :: (invariant load (s128) from %ir.244, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %443:sreg_64, 0, 0 :: (invariant load (s32) from `ptr addrspace(4) poison`, addrspace 4) + ; CHECK-NEXT: KILL [[S_ADD_U32_15]].sub0, [[S_ADD_U32_15]].sub1 ; CHECK-NEXT: KILL [[COPY17]].sub0_sub1_sub2, [[COPY17]].sub3 + ; CHECK-NEXT: KILL undef %443:sreg_64 ; CHECK-NEXT: [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY14]], 3, implicit-def dead $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_17]], 160, 0 :: (invariant load (s128) from %ir.265, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_16]], 160, 0 :: (invariant load (s128) from %ir.252, addrspace 4) ; CHECK-NEXT: [[S_ASHR_I32_8:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_8]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_17:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM6]], -469, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_21:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_8]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_21:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_8]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_21]], 168, 0 :: (invariant load (s32) from %ir.305, align 8, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_20:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_8]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_20:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_8]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_20]], 168, 0 :: (invariant load (s32) from %ir.291, align 8, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN21:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM23]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN22:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM24]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM24]] ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM23]] + ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM24]] ; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORD_IMM1]], 65535, implicit-def dead $scc ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]] ; CHECK-NEXT: [[COPY18:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_2]] @@ -224,22 +222,22 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[S_ADD_I32_21:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -507, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_22:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -539, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_23:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM7]], -473, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_22:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_22:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_22]], 96, 0 :: (invariant load (s128) from %ir.323, addrspace 4) - ; CHECK-NEXT: undef [[S_ADD_U32_23:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_23:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_23]], 96, 0 :: (invariant load (s128) from %ir.329, addrspace 4) - ; CHECK-NEXT: undef [[S_ADD_U32_24:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_24:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_24]], 96, 0 :: (invariant load (s128) from %ir.335, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_21:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_21:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_21]], 96, 0 :: (invariant load (s128) from %ir.309, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_22:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_22:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_22]], 96, 0 :: (invariant load (s128) from %ir.315, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_23:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_23:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_23]], 96, 0 :: (invariant load (s128) from %ir.321, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM27]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM27]] ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM25]] - ; CHECK-NEXT: KILL [[V_MOV_B32_e32_]] ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM26]] + ; CHECK-NEXT: KILL [[V_MOV_B32_e32_]] + ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM27]] ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -2, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -1, [[BUFFER_LOAD_FORMAT_X_IDXEN1]], 0, implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -3, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec @@ -351,13 +349,13 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[V_OR_B32_e64_64:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_63]], [[V_ADD_U32_e64_28]], implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -593, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec ; CHECK-NEXT: [[V_OR_B32_e64_65:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_64]], [[V_ADD_U32_e64_29]], implicit $exec - ; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %543:sreg_64, 0, 0 :: (invariant load (s256) from `ptr addrspace(4) poison`, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %516:sreg_64, 0, 0 :: (invariant load (s256) from `ptr addrspace(4) poison`, addrspace 4) ; CHECK-NEXT: [[V_OR_B32_e64_66:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_65]], [[V_ADD_U32_e64_30]], implicit $exec ; CHECK-NEXT: [[S_ADD_I32_24:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM8]], -594, implicit-def dead $scc ; CHECK-NEXT: [[V_OR_B32_e64_67:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_24]], [[V_OR_B32_e64_66]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 0, [[V_OR_B32_e64_67]], implicit $exec ; CHECK-NEXT: undef [[V_CNDMASK_B32_e64_:%[0-9]+]].sub3:vreg_128 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_EQ_U32_e64_]], implicit $exec - ; CHECK-NEXT: IMAGE_STORE_V4_V2_nsa_gfx10 [[V_CNDMASK_B32_e64_]], undef %557:vgpr_32, undef %559:vgpr_32, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8) + ; CHECK-NEXT: IMAGE_STORE_V4_V2_nsa_gfx10 [[V_CNDMASK_B32_e64_]], undef %530:vgpr_32, undef %532:vgpr_32, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 .expVert: %0 = extractelement <31 x i32> %userData, i64 2 diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll index ad8dcd3888e9f..21f0c008366a9 100644 --- a/llvm/test/CodeGen/AMDGPU/wqm.ll +++ b/llvm/test/CodeGen/AMDGPU/wqm.ll @@ -3477,13 +3477,10 @@ define amdgpu_gs void @wqm_init_exec_wwm() { ; GFX9-W64-NEXT: s_mov_b64 exec, 0 ; GFX9-W64-NEXT: s_mov_b32 s1, 0 ; GFX9-W64-NEXT: s_mov_b32 s0, s1 -; GFX9-W64-NEXT: s_cmp_lg_u64 exec, 0 -; GFX9-W64-NEXT: s_cselect_b64 s[2:3], -1, 0 -; GFX9-W64-NEXT: s_cmp_lg_u64 s[0:1], 0 +; GFX9-W64-NEXT: s_cmp_eq_u64 s[0:1], 0 ; GFX9-W64-NEXT: s_cselect_b64 s[0:1], -1, 0 -; GFX9-W64-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1] -; GFX9-W64-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s[0:1] -; GFX9-W64-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-W64-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-W64-NEXT: v_cndmask_b32_e64 v1, 0, 1.0, s[0:1] ; GFX9-W64-NEXT: exp mrt0 off, off, off, off ; GFX9-W64-NEXT: s_endpgm ; @@ -3491,14 +3488,11 @@ define amdgpu_gs void @wqm_init_exec_wwm() { ; GFX10-W32: ; %bb.0: ; GFX10-W32-NEXT: s_mov_b32 exec_lo, 0 ; GFX10-W32-NEXT: s_mov_b32 s1, 0 -; GFX10-W32-NEXT: s_cmp_lg_u64 exec, 0 +; GFX10-W32-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-W32-NEXT: s_mov_b32 s0, s1 -; GFX10-W32-NEXT: s_cselect_b32 s2, -1, 0 -; GFX10-W32-NEXT: s_cmp_lg_u64 s[0:1], 0 -; GFX10-W32-NEXT: v_mov_b32_e32 v1, 0 +; GFX10-W32-NEXT: s_cmp_eq_u64 s[0:1], 0 ; GFX10-W32-NEXT: s_cselect_b32 s0, -1, 0 -; GFX10-W32-NEXT: s_xor_b32 s0, s2, s0 -; GFX10-W32-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s0 +; GFX10-W32-NEXT: v_cndmask_b32_e64 v1, 0, 1.0, s0 ; GFX10-W32-NEXT: exp mrt0 off, off, off, off ; GFX10-W32-NEXT: s_endpgm call void @llvm.amdgcn.init.exec(i64 0) From 98ceb458f42ed05e2c3e9fb5bc75cd6b1df7a438 Mon Sep 17 00:00:00 2001 From: Slava Gurevich Date: Thu, 30 Oct 2025 00:25:10 -0700 Subject: [PATCH 03/21] [mlir] Fix use-after-move issues (#165660) This patch addresses two use-after-move issues: 1. `Timing.cpp` A variable was std::moved and then immediately passed to an `assert()` check. Since the moved-from state made the assertion condition trivially true, the check was effectively useless. The `assert()` is removed. 2. `Query.cpp` The `matcher` object was moved-from and then subsequently used as if it still retained valid state. The fix ensures no subsequent use for the moved-from variable. Testing: `ninja check-mlir` --- mlir/lib/Query/Query.cpp | 5 +++-- mlir/lib/Support/Timing.cpp | 1 - 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Query/Query.cpp b/mlir/lib/Query/Query.cpp index 375e82050a481..cf8a4d293299c 100644 --- a/mlir/lib/Query/Query.cpp +++ b/mlir/lib/Query/Query.cpp @@ -121,12 +121,13 @@ LogicalResult MatchQuery::run(llvm::raw_ostream &os, QuerySession &qs) const { Operation *rootOp = qs.getRootOp(); int matchCount = 0; matcher::MatchFinder finder; + + StringRef functionName = matcher.getFunctionName(); auto matches = finder.collectMatches(rootOp, std::move(matcher)); // An extract call is recognized by considering if the matcher has a name. // TODO: Consider making the extract more explicit. - if (matcher.hasFunctionName()) { - auto functionName = matcher.getFunctionName(); + if (!functionName.empty()) { std::vector flattenedMatches = finder.flattenMatchedOps(matches); Operation *function = diff --git a/mlir/lib/Support/Timing.cpp b/mlir/lib/Support/Timing.cpp index fb6f82c283df5..16306d72815f7 100644 --- a/mlir/lib/Support/Timing.cpp +++ b/mlir/lib/Support/Timing.cpp @@ -319,7 +319,6 @@ class TimerImpl { void mergeChildren(AsyncChildrenMap &&other) { for (auto &thread : other) { mergeChildren(std::move(thread.second)); - assert(thread.second.empty()); } other.clear(); } From 67db5fd739780ebcc592d5addacee88574ac319d Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Thu, 30 Oct 2025 09:36:25 +0100 Subject: [PATCH 04/21] [clang] Add Bytes/Columns types to TextDiagnostic (#165541) In `TextDiagnostic.cpp`, we're using column- and byte indices everywhere, but we were using integers for them which made it hard to know what to pass where, and what was produced. To make matters worse, that `SourceManager` considers a "column" is a byte in `TextDiagnostic`. Add `Bytes` and `Columns` structs, which are not related so API using them can differentiate between values interpreted columns or bytes. --- clang/lib/Frontend/TextDiagnostic.cpp | 389 ++++++++++++++------------ 1 file changed, 217 insertions(+), 172 deletions(-) diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index f5add2a941f72..c33d8f8ca9ebd 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -47,6 +47,43 @@ static constexpr raw_ostream::Colors CommentColor = raw_ostream::YELLOW; static constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN; static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE; +namespace { +template class ColumnsOrBytes { +public: + int V = 0; + ColumnsOrBytes(int V) : V(V) {} + bool isValid() const { return V != -1; } + Sub next() const { return Sub(V + 1); } + Sub prev() const { return Sub(V - 1); } + + bool operator>(Sub O) const { return V > O.V; } + bool operator<(Sub O) const { return V < O.V; } + bool operator<=(Sub B) const { return V <= B.V; } + bool operator!=(Sub C) const { return C.V != V; } + + Sub operator+(Sub B) const { return Sub(V + B.V); } + Sub &operator+=(Sub B) { + V += B.V; + return *static_cast(this); + } + Sub operator-(Sub B) const { return Sub(V - B.V); } + Sub &operator-=(Sub B) { + V -= B.V; + return *static_cast(this); + } +}; + +class Bytes final : public ColumnsOrBytes { +public: + Bytes(int V) : ColumnsOrBytes(V) {} +}; + +class Columns final : public ColumnsOrBytes { +public: + Columns(int V) : ColumnsOrBytes(V) {} +}; +} // namespace + /// Add highlights to differences in template strings. static void applyTemplateHighlighting(raw_ostream &OS, StringRef Str, bool &Normal, bool Bold) { @@ -109,8 +146,8 @@ printableTextForNextCharacter(StringRef SourceLine, size_t *I, if (SourceLine[*I] == '\t') { assert(0 < TabStop && TabStop <= DiagnosticOptions::MaxTabStop && "Invalid -ftabstop value"); - unsigned Col = bytesSincePreviousTabOrLineBegin(SourceLine, *I); - unsigned NumSpaces = TabStop - (Col % TabStop); + unsigned LineBytes = bytesSincePreviousTabOrLineBegin(SourceLine, *I); + unsigned NumSpaces = TabStop - (LineBytes % TabStop); assert(0 < NumSpaces && NumSpaces <= TabStop && "Invalid computation of space amt"); ++(*I); @@ -220,33 +257,33 @@ static void expandTabs(std::string &SourceLine, unsigned TabStop) { /// (\\u3042 is represented in UTF-8 by three bytes and takes two columns to /// display) static void genColumnByteMapping(StringRef SourceLine, unsigned TabStop, - SmallVectorImpl &BytesOut, - SmallVectorImpl &ColumnsOut) { + SmallVectorImpl &BytesOut, + SmallVectorImpl &ColumnsOut) { assert(BytesOut.empty()); assert(ColumnsOut.empty()); if (SourceLine.empty()) { - BytesOut.resize(1u, 0); - ColumnsOut.resize(1u, 0); + BytesOut.resize(1u, Bytes(0)); + ColumnsOut.resize(1u, Columns(0)); return; } ColumnsOut.resize(SourceLine.size() + 1, -1); - int Columns = 0; + Columns NumColumns = 0; size_t I = 0; while (I < SourceLine.size()) { - ColumnsOut[I] = Columns; - BytesOut.resize(Columns + 1, -1); - BytesOut.back() = I; + ColumnsOut[I] = NumColumns; + BytesOut.resize(NumColumns.V + 1, -1); + BytesOut.back() = Bytes(I); auto [Str, Printable] = printableTextForNextCharacter(SourceLine, &I, TabStop); - Columns += llvm::sys::locale::columnWidth(Str); + NumColumns += Columns(llvm::sys::locale::columnWidth(Str)); } - ColumnsOut.back() = Columns; - BytesOut.resize(Columns + 1, -1); - BytesOut.back() = I; + ColumnsOut.back() = NumColumns; + BytesOut.resize(NumColumns.V + 1, -1); + BytesOut.back() = Bytes(I); } namespace { @@ -258,48 +295,52 @@ struct SourceColumnMap { assert(m_byteToColumn.size()==SourceLine.size()+1); assert(0 < m_byteToColumn.size() && 0 < m_columnToByte.size()); - assert(m_byteToColumn.size() - == static_cast(m_columnToByte.back()+1)); - assert(static_cast(m_byteToColumn.back()+1) - == m_columnToByte.size()); + assert(m_byteToColumn.size() == + static_cast(m_columnToByte.back().V + 1)); + assert(static_cast(m_byteToColumn.back().V + 1) == + m_columnToByte.size()); } - int columns() const { return m_byteToColumn.back(); } - int bytes() const { return m_columnToByte.back(); } + Columns columns() const { return m_byteToColumn.back(); } + Bytes bytes() const { return m_columnToByte.back(); } /// Map a byte to the column which it is at the start of, or return -1 /// if it is not at the start of a column (for a UTF-8 trailing byte). - int byteToColumn(int n) const { - assert(0<=n && n(m_byteToColumn.size())); - return m_byteToColumn[n]; + Columns byteToColumn(Bytes N) const { + assert(0 <= N.V && N.V < static_cast(m_byteToColumn.size())); + return m_byteToColumn[N.V]; } /// Map a byte to the first column which contains it. - int byteToContainingColumn(int N) const { - assert(0 <= N && N < static_cast(m_byteToColumn.size())); - while (m_byteToColumn[N] == -1) - --N; - return m_byteToColumn[N]; + Columns byteToContainingColumn(Bytes N) const { + assert(0 <= N.V && N.V < static_cast(m_byteToColumn.size())); + while (!m_byteToColumn[N.V].isValid()) + --N.V; + return m_byteToColumn[N.V]; } /// Map a column to the byte which starts the column, or return -1 if /// the column the second or subsequent column of an expanded tab or similar /// multi-column entity. - int columnToByte(int n) const { - assert(0<=n && n(m_columnToByte.size())); - return m_columnToByte[n]; + Bytes columnToByte(Columns N) const { + assert(0 <= N.V && N.V < static_cast(m_columnToByte.size())); + return m_columnToByte[N.V]; } /// Map from a byte index to the next byte which starts a column. - int startOfNextColumn(int N) const { - assert(0 <= N && N < static_cast(m_byteToColumn.size() - 1)); - while (byteToColumn(++N) == -1) {} + Bytes startOfNextColumn(Bytes N) const { + assert(0 <= N.V && N.V < static_cast(m_byteToColumn.size() - 1)); + N = N.next(); + while (!byteToColumn(N).isValid()) + N = N.next(); return N; } /// Map from a byte index to the previous byte which starts a column. - int startOfPreviousColumn(int N) const { - assert(0 < N && N < static_cast(m_byteToColumn.size())); - while (byteToColumn(--N) == -1) {} + Bytes startOfPreviousColumn(Bytes N) const { + assert(0 < N.V && N.V < static_cast(m_byteToColumn.size())); + N = N.prev(); + while (!byteToColumn(N).isValid()) + N = N.prev(); return N; } @@ -308,9 +349,9 @@ struct SourceColumnMap { } private: - const std::string m_SourceLine; - SmallVector m_byteToColumn; - SmallVector m_columnToByte; + StringRef m_SourceLine; + SmallVector m_byteToColumn; + SmallVector m_columnToByte; }; } // end anonymous namespace @@ -319,14 +360,15 @@ struct SourceColumnMap { static void selectInterestingSourceRegion(std::string &SourceLine, std::string &CaretLine, std::string &FixItInsertionLine, - unsigned Columns, + Columns NonGutterColumns, const SourceColumnMap &map) { - unsigned CaretColumns = CaretLine.size(); - unsigned FixItColumns = llvm::sys::locale::columnWidth(FixItInsertionLine); - unsigned MaxColumns = std::max(static_cast(map.columns()), - std::max(CaretColumns, FixItColumns)); + Columns CaretColumns = Columns(CaretLine.size()); + Columns FixItColumns = + Columns(llvm::sys::locale::columnWidth(FixItInsertionLine)); + Columns MaxColumns = + std::max({map.columns().V, CaretColumns.V, FixItColumns.V}); // if the number of columns is less than the desired number we're done - if (MaxColumns <= Columns) + if (MaxColumns <= NonGutterColumns) return; // No special characters are allowed in CaretLine. @@ -334,13 +376,13 @@ static void selectInterestingSourceRegion(std::string &SourceLine, // Find the slice that we need to display the full caret line // correctly. - unsigned CaretStart = 0, CaretEnd = CaretLine.size(); - for (; CaretStart != CaretEnd; ++CaretStart) - if (!isWhitespace(CaretLine[CaretStart])) + Columns CaretStart = 0, CaretEnd = CaretLine.size(); + for (; CaretStart != CaretEnd; CaretStart = CaretStart.next()) + if (!isWhitespace(CaretLine[CaretStart.V])) break; - for (; CaretEnd != CaretStart; --CaretEnd) - if (!isWhitespace(CaretLine[CaretEnd - 1])) + for (; CaretEnd != CaretStart; CaretEnd = CaretEnd.prev()) + if (!isWhitespace(CaretLine[CaretEnd.V - 1])) break; // caret has already been inserted into CaretLine so the above whitespace @@ -349,39 +391,38 @@ static void selectInterestingSourceRegion(std::string &SourceLine, // If we have a fix-it line, make sure the slice includes all of the // fix-it information. if (!FixItInsertionLine.empty()) { - unsigned FixItStart = 0, FixItEnd = FixItInsertionLine.size(); - for (; FixItStart != FixItEnd; ++FixItStart) - if (!isWhitespace(FixItInsertionLine[FixItStart])) - break; - - for (; FixItEnd != FixItStart; --FixItEnd) - if (!isWhitespace(FixItInsertionLine[FixItEnd - 1])) - break; - // We can safely use the byte offset FixItStart as the column offset // because the characters up until FixItStart are all ASCII whitespace // characters. - unsigned FixItStartCol = FixItStart; - unsigned FixItEndCol - = llvm::sys::locale::columnWidth(FixItInsertionLine.substr(0, FixItEnd)); - - CaretStart = std::min(FixItStartCol, CaretStart); - CaretEnd = std::max(FixItEndCol, CaretEnd); + Bytes FixItStart = 0; + Bytes FixItEnd = Bytes(FixItInsertionLine.size()); + while (FixItStart != FixItEnd && + isWhitespace(FixItInsertionLine[FixItStart.V])) + FixItStart = FixItStart.next(); + + while (FixItEnd != FixItStart && + isWhitespace(FixItInsertionLine[FixItEnd.V - 1])) + FixItEnd = FixItEnd.prev(); + + Columns FixItStartCol = Columns(FixItStart.V); + Columns FixItEndCol = Columns(llvm::sys::locale::columnWidth( + FixItInsertionLine.substr(0, FixItEnd.V))); + + CaretStart = std::min(FixItStartCol.V, CaretStart.V); + CaretEnd = std::max(FixItEndCol.V, CaretEnd.V); } // CaretEnd may have been set at the middle of a character // If it's not at a character's first column then advance it past the current // character. - while (static_cast(CaretEnd) < map.columns() && - -1 == map.columnToByte(CaretEnd)) - ++CaretEnd; - - assert((static_cast(CaretStart) > map.columns() || - -1!=map.columnToByte(CaretStart)) && - "CaretStart must not point to a column in the middle of a source" - " line character"); - assert((static_cast(CaretEnd) > map.columns() || - -1!=map.columnToByte(CaretEnd)) && + while (CaretEnd < map.columns() && !map.columnToByte(CaretEnd).isValid()) + CaretEnd = CaretEnd.next(); + + assert( + (CaretStart > map.columns() || map.columnToByte(CaretStart).isValid()) && + "CaretStart must not point to a column in the middle of a source" + " line character"); + assert((CaretEnd > map.columns() || map.columnToByte(CaretEnd).isValid()) && "CaretEnd must not point to a column in the middle of a source line" " character"); @@ -390,70 +431,70 @@ static void selectInterestingSourceRegion(std::string &SourceLine, // number of columns we have, try to grow the slice to encompass // more context. - unsigned SourceStart = map.columnToByte(std::min(CaretStart, - map.columns())); - unsigned SourceEnd = map.columnToByte(std::min(CaretEnd, - map.columns())); + Bytes SourceStart = map.columnToByte(std::min(CaretStart.V, map.columns().V)); + Bytes SourceEnd = map.columnToByte(std::min(CaretEnd.V, map.columns().V)); - unsigned CaretColumnsOutsideSource = CaretEnd-CaretStart - - (map.byteToColumn(SourceEnd)-map.byteToColumn(SourceStart)); + Columns CaretColumnsOutsideSource = + CaretEnd - CaretStart - + (map.byteToColumn(SourceEnd) - map.byteToColumn(SourceStart)); char const *front_ellipse = " ..."; char const *front_space = " "; char const *back_ellipse = "..."; - unsigned ellipses_space = strlen(front_ellipse) + strlen(back_ellipse); + Columns EllipsesColumns = + Columns(strlen(front_ellipse) + strlen(back_ellipse)); - unsigned TargetColumns = Columns; + Columns TargetColumns = Columns(NonGutterColumns); // Give us extra room for the ellipses // and any of the caret line that extends past the source - if (TargetColumns > ellipses_space+CaretColumnsOutsideSource) - TargetColumns -= ellipses_space+CaretColumnsOutsideSource; + if (TargetColumns > EllipsesColumns + CaretColumnsOutsideSource) + TargetColumns -= EllipsesColumns + CaretColumnsOutsideSource; - while (SourceStart>0 || SourceEnd 0 || SourceEnd < SourceLine.size()) { bool ExpandedRegion = false; - if (SourceStart>0) { - unsigned NewStart = map.startOfPreviousColumn(SourceStart); + if (SourceStart > 0) { + Bytes NewStart = map.startOfPreviousColumn(SourceStart); // Skip over any whitespace we see here; we're looking for // another bit of interesting text. // FIXME: Detect non-ASCII whitespace characters too. - while (NewStart && isWhitespace(SourceLine[NewStart])) + while (NewStart > 0 && isWhitespace(SourceLine[NewStart.V])) NewStart = map.startOfPreviousColumn(NewStart); // Skip over this bit of "interesting" text. - while (NewStart) { - unsigned Prev = map.startOfPreviousColumn(NewStart); - if (isWhitespace(SourceLine[Prev])) + while (NewStart > 0) { + Bytes Prev = map.startOfPreviousColumn(NewStart); + if (isWhitespace(SourceLine[Prev.V])) break; NewStart = Prev; } - assert(map.byteToColumn(NewStart) != -1); - unsigned NewColumns = map.byteToColumn(SourceEnd) - - map.byteToColumn(NewStart); + assert(map.byteToColumn(NewStart).isValid()); + Columns NewColumns = + map.byteToColumn(SourceEnd) - map.byteToColumn(NewStart); if (NewColumns <= TargetColumns) { SourceStart = NewStart; ExpandedRegion = true; } } - if (SourceEnd(SourceLine.size())}) - + map.byteToColumn(SourceEnd); + Columns FrontColumnsRemoved = CaretStart; + Columns ColumnsKept = CaretEnd - CaretStart; // We checked up front that the line needed truncation - assert(FrontColumnsRemoved+ColumnsKept+BackColumnsRemoved > Columns); + assert(FrontColumnsRemoved + ColumnsKept + BackColumnsRemoved > + NonGutterColumns); // The line needs some truncation, and we'd prefer to keep the front // if possible, so remove the back - if (BackColumnsRemoved > strlen(back_ellipse)) - SourceLine.replace(SourceEnd, std::string::npos, back_ellipse); + if (BackColumnsRemoved > Columns(strlen(back_ellipse))) + SourceLine.replace(SourceEnd.V, std::string::npos, back_ellipse); // If that's enough then we're done - if (FrontColumnsRemoved+ColumnsKept <= Columns) + if (FrontColumnsRemoved + ColumnsKept <= Columns(NonGutterColumns)) return; // Otherwise remove the front as well - if (FrontColumnsRemoved > strlen(front_ellipse)) { - SourceLine.replace(0, SourceStart, front_ellipse); - CaretLine.replace(0, CaretStart, front_space); + if (FrontColumnsRemoved > Columns(strlen(front_ellipse))) { + SourceLine.replace(0, SourceStart.V, front_ellipse); + CaretLine.replace(0, CaretStart.V, front_space); if (!FixItInsertionLine.empty()) - FixItInsertionLine.replace(0, CaretStart, front_space); + FixItInsertionLine.replace(0, CaretStart.V, front_space); } } @@ -961,41 +1004,40 @@ maybeAddRange(std::pair A, std::pair B, struct LineRange { unsigned LineNo; - unsigned StartCol; - unsigned EndCol; + Bytes StartByte; + Bytes EndByte; }; /// Highlight \p R (with ~'s) on the current source line. static void highlightRange(const LineRange &R, const SourceColumnMap &Map, std::string &CaretLine) { // Pick the first non-whitespace column. - unsigned StartColNo = R.StartCol; - while (StartColNo < Map.getSourceLine().size() && - (Map.getSourceLine()[StartColNo] == ' ' || - Map.getSourceLine()[StartColNo] == '\t')) - StartColNo = Map.startOfNextColumn(StartColNo); + Bytes StartByte = R.StartByte; + while (StartByte < Map.bytes() && (Map.getSourceLine()[StartByte.V] == ' ' || + Map.getSourceLine()[StartByte.V] == '\t')) + StartByte = Map.startOfNextColumn(StartByte); // Pick the last non-whitespace column. - unsigned EndColNo = - std::min(static_cast(R.EndCol), Map.getSourceLine().size()); - while (EndColNo && (Map.getSourceLine()[EndColNo - 1] == ' ' || - Map.getSourceLine()[EndColNo - 1] == '\t')) - EndColNo = Map.startOfPreviousColumn(EndColNo); + Bytes EndByte = std::min(R.EndByte.V, Map.bytes().V); + while (EndByte.V != 0 && (Map.getSourceLine()[EndByte.V - 1] == ' ' || + Map.getSourceLine()[EndByte.V - 1] == '\t')) + EndByte = Map.startOfPreviousColumn(EndByte); // If the start/end passed each other, then we are trying to highlight a // range that just exists in whitespace. That most likely means we have // a multi-line highlighting range that covers a blank line. - if (StartColNo > EndColNo) + if (StartByte > EndByte) return; + assert(StartByte <= EndByte && "Invalid range!"); // Fill the range with ~'s. - StartColNo = Map.byteToContainingColumn(StartColNo); - EndColNo = Map.byteToContainingColumn(EndColNo); + Columns StartCol = Map.byteToContainingColumn(StartByte); + Columns EndCol = Map.byteToContainingColumn(EndByte); + + if (CaretLine.size() < static_cast(EndCol.V)) + CaretLine.resize(EndCol.V, ' '); - assert(StartColNo <= EndColNo && "Invalid range!"); - if (CaretLine.size() < EndColNo) - CaretLine.resize(EndColNo, ' '); - std::fill(CaretLine.begin() + StartColNo, CaretLine.begin() + EndColNo, '~'); + std::fill(CaretLine.begin() + StartCol.V, CaretLine.begin() + EndCol.V, '~'); } static std::string buildFixItInsertionLine(FileID FID, unsigned LineNo, @@ -1006,7 +1048,7 @@ static std::string buildFixItInsertionLine(FileID FID, unsigned LineNo, std::string FixItInsertionLine; if (Hints.empty() || !DiagOpts.ShowFixits) return FixItInsertionLine; - unsigned PrevHintEndCol = 0; + Columns PrevHintEndCol = 0; for (const auto &H : Hints) { if (H.CodeToInsert.empty()) @@ -1024,12 +1066,13 @@ static std::string buildFixItInsertionLine(FileID FID, unsigned LineNo, // Note: When modifying this function, be very careful about what is a // "column" (printed width, platform-dependent) and what is a // "byte offset" (SourceManager "column"). - unsigned HintByteOffset = - SM.getColumnNumber(HintLocInfo.first, HintLocInfo.second) - 1; + Bytes HintByteOffset = + Bytes(SM.getColumnNumber(HintLocInfo.first, HintLocInfo.second)) + .prev(); // The hint must start inside the source or right at the end - assert(HintByteOffset < static_cast(map.bytes()) + 1); - unsigned HintCol = map.byteToContainingColumn(HintByteOffset); + assert(HintByteOffset < map.bytes().next()); + Columns HintCol = map.byteToContainingColumn(HintByteOffset); // If we inserted a long previous hint, push this one forwards, and add // an extra space to show that this is not part of the previous @@ -1043,11 +1086,11 @@ static std::string buildFixItInsertionLine(FileID FID, unsigned LineNo, // This should NOT use HintByteOffset, because the source might have // Unicode characters in earlier columns. - unsigned NewFixItLineSize = FixItInsertionLine.size() + - (HintCol - PrevHintEndCol) + - H.CodeToInsert.size(); + Columns NewFixItLineSize = Columns(FixItInsertionLine.size()) + + (HintCol - PrevHintEndCol) + + Columns(H.CodeToInsert.size()); if (NewFixItLineSize > FixItInsertionLine.size()) - FixItInsertionLine.resize(NewFixItLineSize, ' '); + FixItInsertionLine.resize(NewFixItLineSize.V, ' '); std::copy(H.CodeToInsert.begin(), H.CodeToInsert.end(), FixItInsertionLine.end() - H.CodeToInsert.size()); @@ -1095,28 +1138,29 @@ prepareAndFilterRanges(const SmallVectorImpl &Ranges, if (EndLineNo < Lines.first || SM.getFileID(End) != FID) continue; - unsigned StartColumn = SM.getExpansionColumnNumber(Begin); - unsigned EndColumn = SM.getExpansionColumnNumber(End); - assert(StartColumn && "StartColumn must be valid, 0 is invalid"); - assert(EndColumn && "EndColumn must be valid, 0 is invalid"); + Bytes StartByte = SM.getExpansionColumnNumber(Begin); + Bytes EndByte = SM.getExpansionColumnNumber(End); + assert(StartByte.V != 0 && "StartByte must be valid, 0 is invalid"); + assert(EndByte.V != 0 && "EndByte must be valid, 0 is invalid"); if (R.isTokenRange()) - EndColumn += Lexer::MeasureTokenLength(End, SM, LangOpts); + EndByte += Bytes(Lexer::MeasureTokenLength(End, SM, LangOpts)); // Only a single line. if (StartLineNo == EndLineNo) { - LineRanges.push_back({StartLineNo, StartColumn - 1, EndColumn - 1}); + LineRanges.push_back({StartLineNo, StartByte.prev(), EndByte.prev()}); continue; } // Start line. - LineRanges.push_back({StartLineNo, StartColumn - 1, ~0u}); + LineRanges.push_back( + {StartLineNo, StartByte.prev(), std::numeric_limits::max()}); // Middle lines. for (unsigned S = StartLineNo + 1; S != EndLineNo; ++S) - LineRanges.push_back({S, 0, ~0u}); + LineRanges.push_back({S, 0, std::numeric_limits::max()}); // End line. - LineRanges.push_back({EndLineNo, 0, EndColumn - 1}); + LineRanges.push_back({EndLineNo, 0, EndByte.prev()}); } return LineRanges; @@ -1226,8 +1270,7 @@ highlightLines(StringRef FileData, unsigned StartLineNumber, if (TokenStartLine > EndLineNumber) break; - unsigned StartCol = - SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1; + Bytes StartCol = SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1; if (Invalid) continue; @@ -1235,14 +1278,14 @@ highlightLines(StringRef FileData, unsigned StartLineNumber, if (TokenStartLine == TokenEndLine) { SmallVector &LineRanges = SnippetRanges[TokenStartLine - StartLineNumber]; - appendStyle(LineRanges, T, StartCol, T.getLength()); + appendStyle(LineRanges, T, StartCol.V, T.getLength()); continue; } assert((TokenEndLine - TokenStartLine) >= 1); // For tokens that span multiple lines (think multiline comments), we // divide them into multiple StyleRanges. - unsigned EndCol = SM.getSpellingColumnNumber(T.getEndLoc(), &Invalid) - 1; + Bytes EndCol = SM.getSpellingColumnNumber(T.getEndLoc(), &Invalid) - 1; if (Invalid) continue; @@ -1258,9 +1301,9 @@ highlightLines(StringRef FileData, unsigned StartLineNumber, SnippetRanges[L - StartLineNumber]; if (L == TokenStartLine) // First line - appendStyle(LineRanges, T, StartCol, LineLength); + appendStyle(LineRanges, T, StartCol.V, LineLength); else if (L == TokenEndLine) // Last line - appendStyle(LineRanges, T, 0, EndCol); + appendStyle(LineRanges, T, 0, EndCol.V); else appendStyle(LineRanges, T, 0, LineLength); } @@ -1315,11 +1358,11 @@ void TextDiagnostic::emitSnippetAndCaret( const char *BufEnd = BufStart + BufData.size(); unsigned CaretLineNo = Loc.getLineNumber(); - unsigned CaretColNo = Loc.getColumnNumber(); + Bytes CaretByte = Loc.getColumnNumber(); // Arbitrarily stop showing snippets when the line is too long. static const size_t MaxLineLengthToPrint = 4096; - if (CaretColNo > MaxLineLengthToPrint) + if (CaretByte > MaxLineLengthToPrint) return; // Find the set of lines to include. @@ -1379,35 +1422,37 @@ void TextDiagnostic::emitSnippetAndCaret( std::string SourceLine(LineStart, LineEnd); // Remove trailing null bytes. while (!SourceLine.empty() && SourceLine.back() == '\0' && - (LineNo != CaretLineNo || SourceLine.size() > CaretColNo)) + (LineNo != CaretLineNo || + SourceLine.size() > static_cast(CaretByte.V))) SourceLine.pop_back(); // Build the byte to column map. - const SourceColumnMap sourceColMap(SourceLine, DiagOpts.TabStop); + const SourceColumnMap SourceColMap(SourceLine, DiagOpts.TabStop); std::string CaretLine; // Highlight all of the characters covered by Ranges with ~ characters. for (const auto &LR : LineRanges) { if (LR.LineNo == LineNo) - highlightRange(LR, sourceColMap, CaretLine); + highlightRange(LR, SourceColMap, CaretLine); } // Next, insert the caret itself. if (CaretLineNo == LineNo) { - size_t Col = sourceColMap.byteToContainingColumn(CaretColNo - 1); - CaretLine.resize(std::max(Col + 1, CaretLine.size()), ' '); - CaretLine[Col] = '^'; + Columns Col = SourceColMap.byteToContainingColumn(CaretByte.prev()); + CaretLine.resize( + std::max(static_cast(Col.V) + 1, CaretLine.size()), ' '); + CaretLine[Col.V] = '^'; } std::string FixItInsertionLine = - buildFixItInsertionLine(FID, LineNo, sourceColMap, Hints, SM, DiagOpts); + buildFixItInsertionLine(FID, LineNo, SourceColMap, Hints, SM, DiagOpts); // If the source line is too long for our terminal, select only the // "interesting" source region within that line. - unsigned Columns = DiagOpts.MessageLength; - if (Columns) + Columns MessageLength = DiagOpts.MessageLength; + if (MessageLength.V != 0) selectInterestingSourceRegion(SourceLine, CaretLine, FixItInsertionLine, - Columns, sourceColMap); + MessageLength, SourceColMap); // If we are in -fdiagnostics-print-source-range-info mode, we are trying // to produce easily machine parsable output. Add a space before the From 44f5ae3eeca65661794f82cd5caa291ff8d6baf3 Mon Sep 17 00:00:00 2001 From: Valery Pykhtin Date: Thu, 30 Oct 2025 09:41:33 +0100 Subject: [PATCH 05/21] [utils][UpdateTestChecks] Extract MIR functionality into separate mir.py module (#165535) This commit extracts some MIR-related code from `common.py` and `update_mir_test_checks.py` into a dedicated `mir.py` module to improve code organization. This is a preparation step for https://github.com/llvm/llvm-project/pull/164965 and also moves some pieces already moved by https://github.com/llvm/llvm-project/pull/140296 All code intentionally moved verbatim with minimal necessary adaptations: * `log()` calls converted to `print(..., file=sys.stderr)` at `mir.py` lines 62, 64 due to a `log` locality. --- llvm/utils/UpdateTestChecks/common.py | 238 ------------ llvm/utils/UpdateTestChecks/mir.py | 362 ++++++++++++++++++ .../update_givaluetracking_test_checks.py | 3 +- llvm/utils/update_mir_test_checks.py | 121 +----- 4 files changed, 367 insertions(+), 357 deletions(-) create mode 100644 llvm/utils/UpdateTestChecks/mir.py diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index 8cd200c93a482..b6b80ea117672 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -2396,244 +2396,6 @@ def add_analyze_checks( ) -IR_FUNC_NAME_RE = re.compile( - r"^\s*define\s+(?:internal\s+)?[^@]*@(?P[A-Za-z0-9_.]+)\s*\(" -) -IR_PREFIX_DATA_RE = re.compile(r"^ *(;|$)") -MIR_FUNC_NAME_RE = re.compile(r" *name: *(?P[A-Za-z0-9_.-]+)") -MIR_BODY_BEGIN_RE = re.compile(r" *body: *\|") -MIR_BASIC_BLOCK_RE = re.compile(r" *bb\.[0-9]+.*:$") -MIR_PREFIX_DATA_RE = re.compile(r"^ *(;|bb.[0-9].*: *$|[a-z]+:( |$)|$)") - - -def find_mir_functions_with_one_bb(lines, verbose=False): - result = [] - cur_func = None - bbs = 0 - for line in lines: - m = MIR_FUNC_NAME_RE.match(line) - if m: - if bbs == 1: - result.append(cur_func) - cur_func = m.group("func") - bbs = 0 - m = MIR_BASIC_BLOCK_RE.match(line) - if m: - bbs += 1 - if bbs == 1: - result.append(cur_func) - return result - - -def add_mir_checks_for_function( - test, - output_lines, - run_list, - func_dict, - func_name, - single_bb, - print_fixed_stack, - first_check_is_next, - at_the_function_name, -): - printed_prefixes = set() - for run in run_list: - for prefix in run[0]: - if prefix in printed_prefixes: - break - if not func_dict[prefix][func_name]: - continue - if printed_prefixes: - # Add some space between different check prefixes. - indent = len(output_lines[-1]) - len(output_lines[-1].lstrip(" ")) - output_lines.append(" " * indent + ";") - printed_prefixes.add(prefix) - add_mir_check_lines( - test, - output_lines, - prefix, - ("@" if at_the_function_name else "") + func_name, - single_bb, - func_dict[prefix][func_name], - print_fixed_stack, - first_check_is_next, - ) - break - else: - warn( - "Found conflicting asm for function: {}".format(func_name), - test_file=test, - ) - return output_lines - - -def add_mir_check_lines( - test, - output_lines, - prefix, - func_name, - single_bb, - func_info, - print_fixed_stack, - first_check_is_next, -): - func_body = str(func_info).splitlines() - if single_bb: - # Don't bother checking the basic block label for a single BB - func_body.pop(0) - - if not func_body: - warn( - "Function has no instructions to check: {}".format(func_name), - test_file=test, - ) - return - - first_line = func_body[0] - indent = len(first_line) - len(first_line.lstrip(" ")) - # A check comment, indented the appropriate amount - check = "{:>{}}; {}".format("", indent, prefix) - - output_lines.append("{}-LABEL: name: {}".format(check, func_name)) - - if print_fixed_stack: - output_lines.append("{}: fixedStack:".format(check)) - for stack_line in func_info.extrascrub.splitlines(): - filecheck_directive = check + "-NEXT" - output_lines.append("{}: {}".format(filecheck_directive, stack_line)) - - first_check = not first_check_is_next - for func_line in func_body: - if not func_line.strip(): - # The mir printer prints leading whitespace so we can't use CHECK-EMPTY: - output_lines.append(check + "-NEXT: {{" + func_line + "$}}") - continue - filecheck_directive = check if first_check else check + "-NEXT" - first_check = False - check_line = "{}: {}".format(filecheck_directive, func_line[indent:]).rstrip() - output_lines.append(check_line) - - -def should_add_mir_line_to_output(input_line, prefix_set): - # Skip any check lines that we're handling as well as comments - m = CHECK_RE.match(input_line) - if (m and m.group(1) in prefix_set) or input_line.strip() == ";": - return False - return True - - -def add_mir_checks( - input_lines, - prefix_set, - autogenerated_note, - test, - run_list, - func_dict, - print_fixed_stack, - first_check_is_next, - at_the_function_name, -): - simple_functions = find_mir_functions_with_one_bb(input_lines) - - output_lines = [] - output_lines.append(autogenerated_note) - - func_name = None - state = "toplevel" - for input_line in input_lines: - if input_line == autogenerated_note: - continue - - if state == "toplevel": - m = IR_FUNC_NAME_RE.match(input_line) - if m: - state = "ir function prefix" - func_name = m.group("func") - if input_line.rstrip("| \r\n") == "---": - state = "document" - output_lines.append(input_line) - elif state == "document": - m = MIR_FUNC_NAME_RE.match(input_line) - if m: - state = "mir function metadata" - func_name = m.group("func") - if input_line.strip() == "...": - state = "toplevel" - func_name = None - if should_add_mir_line_to_output(input_line, prefix_set): - output_lines.append(input_line) - elif state == "mir function metadata": - if should_add_mir_line_to_output(input_line, prefix_set): - output_lines.append(input_line) - m = MIR_BODY_BEGIN_RE.match(input_line) - if m: - if func_name in simple_functions: - # If there's only one block, put the checks inside it - state = "mir function prefix" - continue - state = "mir function body" - add_mir_checks_for_function( - test, - output_lines, - run_list, - func_dict, - func_name, - single_bb=False, - print_fixed_stack=print_fixed_stack, - first_check_is_next=first_check_is_next, - at_the_function_name=at_the_function_name, - ) - elif state == "mir function prefix": - m = MIR_PREFIX_DATA_RE.match(input_line) - if not m: - state = "mir function body" - add_mir_checks_for_function( - test, - output_lines, - run_list, - func_dict, - func_name, - single_bb=True, - print_fixed_stack=print_fixed_stack, - first_check_is_next=first_check_is_next, - at_the_function_name=at_the_function_name, - ) - - if should_add_mir_line_to_output(input_line, prefix_set): - output_lines.append(input_line) - elif state == "mir function body": - if input_line.strip() == "...": - state = "toplevel" - func_name = None - if should_add_mir_line_to_output(input_line, prefix_set): - output_lines.append(input_line) - elif state == "ir function prefix": - m = IR_PREFIX_DATA_RE.match(input_line) - if not m: - state = "ir function body" - add_mir_checks_for_function( - test, - output_lines, - run_list, - func_dict, - func_name, - single_bb=False, - print_fixed_stack=print_fixed_stack, - first_check_is_next=first_check_is_next, - at_the_function_name=at_the_function_name, - ) - - if should_add_mir_line_to_output(input_line, prefix_set): - output_lines.append(input_line) - elif state == "ir function body": - if input_line.strip() == "}": - state = "toplevel" - func_name = None - if should_add_mir_line_to_output(input_line, prefix_set): - output_lines.append(input_line) - return output_lines - - def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes, ginfo): for nameless_value in ginfo.get_nameless_values(): if nameless_value.global_ir_rhs_regexp is None: diff --git a/llvm/utils/UpdateTestChecks/mir.py b/llvm/utils/UpdateTestChecks/mir.py new file mode 100644 index 0000000000000..24bb8b341d335 --- /dev/null +++ b/llvm/utils/UpdateTestChecks/mir.py @@ -0,0 +1,362 @@ +"""MIR test utility functions for UpdateTestChecks scripts.""" + +import re +import sys +from UpdateTestChecks import common +from UpdateTestChecks.common import ( + CHECK_RE, + warn, +) + +IR_FUNC_NAME_RE = re.compile( + r"^\s*define\s+(?:internal\s+)?[^@]*@(?P[A-Za-z0-9_.]+)\s*\(" +) +IR_PREFIX_DATA_RE = re.compile(r"^ *(;|$)") +MIR_FUNC_NAME_RE = re.compile(r" *name: *(?P[A-Za-z0-9_.-]+)") +MIR_BODY_BEGIN_RE = re.compile(r" *body: *\|") +MIR_BASIC_BLOCK_RE = re.compile(r" *bb\.[0-9]+.*:$") +MIR_PREFIX_DATA_RE = re.compile(r"^ *(;|bb.[0-9].*: *$|[a-z]+:( |$)|$)") + +VREG_RE = re.compile(r"(%[0-9]+)(?:\.[a-z0-9_]+)?(?::[a-z0-9_]+)?(?:\([<>a-z0-9 ]+\))?") +MI_FLAGS_STR = ( + r"(frame-setup |frame-destroy |nnan |ninf |nsz |arcp |contract |afn " + r"|reassoc |nuw |nsw |exact |nofpexcept |nomerge |unpredictable " + r"|noconvergent |nneg |disjoint |nusw |samesign |inbounds )*" +) +VREG_DEF_FLAGS_STR = r"(?:dead |undef )*" + +# Pattern to match the defined vregs and the opcode of an instruction that +# defines vregs. Opcodes starting with a lower-case 't' are allowed to match +# ARM's thumb instructions, like tADDi8 and t2ADDri. +VREG_DEF_RE = re.compile( + r"^ *(?P{2}{0}(?:, {2}{0})*) = " + r"{1}(?P[A-Zt][A-Za-z0-9_]+)".format( + VREG_RE.pattern, MI_FLAGS_STR, VREG_DEF_FLAGS_STR + ) +) + +MIR_FUNC_RE = re.compile( + r"^---$" + r"\n" + r"^ *name: *(?P[A-Za-z0-9_.-]+)$" + r".*?" + r"(?:^ *fixedStack: *(\[\])? *\n" + r"(?P.*?)\n?" + r"^ *stack:" + r".*?)?" + r"^ *body: *\|\n" + r"(?P.*?)\n" + r"^\.\.\.$", + flags=(re.M | re.S), +) + + +def build_function_info_dictionary( + test, raw_tool_output, triple, prefixes, func_dict, verbose +): + for m in MIR_FUNC_RE.finditer(raw_tool_output): + func = m.group("func") + fixedStack = m.group("fixedStack") + body = m.group("body") + if verbose: + print("Processing function: {}".format(func), file=sys.stderr) + for l in body.splitlines(): + print(" {}".format(l), file=sys.stderr) + + # Vreg mangling + mangled = [] + vreg_map = {} + for func_line in body.splitlines(keepends=True): + m = VREG_DEF_RE.match(func_line) + if m: + for vreg in VREG_RE.finditer(m.group("vregs")): + if vreg.group(1) in vreg_map: + name = vreg_map[vreg.group(1)] + else: + name = mangle_vreg(m.group("opcode"), vreg_map.values()) + vreg_map[vreg.group(1)] = name + func_line = func_line.replace( + vreg.group(1), "[[{}:%[0-9]+]]".format(name), 1 + ) + for number, name in vreg_map.items(): + func_line = re.sub( + r"{}\b".format(number), "[[{}]]".format(name), func_line + ) + mangled.append(func_line) + body = "".join(mangled) + + for prefix in prefixes: + info = common.function_body( + body, fixedStack, None, None, None, None, ginfo=None + ) + if func in func_dict[prefix]: + if ( + not func_dict[prefix][func] + or func_dict[prefix][func].scrub != info.scrub + or func_dict[prefix][func].extrascrub != info.extrascrub + ): + func_dict[prefix][func] = None + else: + func_dict[prefix][func] = info + + +def mangle_vreg(opcode, current_names): + base = opcode + # Simplify some common prefixes and suffixes + if opcode.startswith("G_"): + base = base[len("G_") :] + if opcode.endswith("_PSEUDO"): + base = base[: len("_PSEUDO")] + # Shorten some common opcodes with long-ish names + base = dict( + IMPLICIT_DEF="DEF", + GLOBAL_VALUE="GV", + CONSTANT="C", + FCONSTANT="C", + MERGE_VALUES="MV", + UNMERGE_VALUES="UV", + INTRINSIC="INT", + INTRINSIC_W_SIDE_EFFECTS="INT", + INSERT_VECTOR_ELT="IVEC", + EXTRACT_VECTOR_ELT="EVEC", + SHUFFLE_VECTOR="SHUF", + ).get(base, base) + # Avoid ambiguity when opcodes end in numbers + if len(base.rstrip("0123456789")) < len(base): + base += "_" + + i = 0 + for name in current_names: + if name.rstrip("0123456789") == base: + i += 1 + if i: + return "{}{}".format(base, i) + return base + + +def find_mir_functions_with_one_bb(lines, verbose=False): + result = [] + cur_func = None + bbs = 0 + for line in lines: + m = MIR_FUNC_NAME_RE.match(line) + if m: + if bbs == 1: + result.append(cur_func) + cur_func = m.group("func") + bbs = 0 + m = MIR_BASIC_BLOCK_RE.match(line) + if m: + bbs += 1 + if bbs == 1: + result.append(cur_func) + return result + + +def add_mir_checks_for_function( + test, + output_lines, + run_list, + func_dict, + func_name, + single_bb, + print_fixed_stack, + first_check_is_next, + at_the_function_name, +): + printed_prefixes = set() + for run in run_list: + for prefix in run[0]: + if prefix in printed_prefixes: + break + if not func_dict[prefix][func_name]: + continue + if printed_prefixes: + # Add some space between different check prefixes. + indent = len(output_lines[-1]) - len(output_lines[-1].lstrip(" ")) + output_lines.append(" " * indent + ";") + printed_prefixes.add(prefix) + add_mir_check_lines( + test, + output_lines, + prefix, + ("@" if at_the_function_name else "") + func_name, + single_bb, + func_dict[prefix][func_name], + print_fixed_stack, + first_check_is_next, + ) + break + else: + warn( + "Found conflicting asm for function: {}".format(func_name), + test_file=test, + ) + return output_lines + + +def add_mir_check_lines( + test, + output_lines, + prefix, + func_name, + single_bb, + func_info, + print_fixed_stack, + first_check_is_next, +): + func_body = str(func_info).splitlines() + if single_bb: + # Don't bother checking the basic block label for a single BB + func_body.pop(0) + + if not func_body: + warn( + "Function has no instructions to check: {}".format(func_name), + test_file=test, + ) + return + + first_line = func_body[0] + indent = len(first_line) - len(first_line.lstrip(" ")) + # A check comment, indented the appropriate amount + check = "{:>{}}; {}".format("", indent, prefix) + + output_lines.append("{}-LABEL: name: {}".format(check, func_name)) + + if print_fixed_stack: + output_lines.append("{}: fixedStack:".format(check)) + for stack_line in func_info.extrascrub.splitlines(): + filecheck_directive = check + "-NEXT" + output_lines.append("{}: {}".format(filecheck_directive, stack_line)) + + first_check = not first_check_is_next + for func_line in func_body: + if not func_line.strip(): + # The mir printer prints leading whitespace so we can't use CHECK-EMPTY: + output_lines.append(check + "-NEXT: {{" + func_line + "$}}") + continue + filecheck_directive = check if first_check else check + "-NEXT" + first_check = False + check_line = "{}: {}".format(filecheck_directive, func_line[indent:]).rstrip() + output_lines.append(check_line) + + +def should_add_mir_line_to_output(input_line, prefix_set): + # Skip any check lines that we're handling as well as comments + m = CHECK_RE.match(input_line) + if (m and m.group(1) in prefix_set) or input_line.strip() == ";": + return False + return True + + +def add_mir_checks( + input_lines, + prefix_set, + autogenerated_note, + test, + run_list, + func_dict, + print_fixed_stack, + first_check_is_next, + at_the_function_name, +): + simple_functions = find_mir_functions_with_one_bb(input_lines) + + output_lines = [] + output_lines.append(autogenerated_note) + + func_name = None + state = "toplevel" + for input_line in input_lines: + if input_line == autogenerated_note: + continue + + if state == "toplevel": + m = IR_FUNC_NAME_RE.match(input_line) + if m: + state = "ir function prefix" + func_name = m.group("func") + if input_line.rstrip("| \r\n") == "---": + state = "document" + output_lines.append(input_line) + elif state == "document": + m = MIR_FUNC_NAME_RE.match(input_line) + if m: + state = "mir function metadata" + func_name = m.group("func") + if input_line.strip() == "...": + state = "toplevel" + func_name = None + if should_add_mir_line_to_output(input_line, prefix_set): + output_lines.append(input_line) + elif state == "mir function metadata": + if should_add_mir_line_to_output(input_line, prefix_set): + output_lines.append(input_line) + m = MIR_BODY_BEGIN_RE.match(input_line) + if m: + if func_name in simple_functions: + # If there's only one block, put the checks inside it + state = "mir function prefix" + continue + state = "mir function body" + add_mir_checks_for_function( + test, + output_lines, + run_list, + func_dict, + func_name, + single_bb=False, + print_fixed_stack=print_fixed_stack, + first_check_is_next=first_check_is_next, + at_the_function_name=at_the_function_name, + ) + elif state == "mir function prefix": + m = MIR_PREFIX_DATA_RE.match(input_line) + if not m: + state = "mir function body" + add_mir_checks_for_function( + test, + output_lines, + run_list, + func_dict, + func_name, + single_bb=True, + print_fixed_stack=print_fixed_stack, + first_check_is_next=first_check_is_next, + at_the_function_name=at_the_function_name, + ) + + if should_add_mir_line_to_output(input_line, prefix_set): + output_lines.append(input_line) + elif state == "mir function body": + if input_line.strip() == "...": + state = "toplevel" + func_name = None + if should_add_mir_line_to_output(input_line, prefix_set): + output_lines.append(input_line) + elif state == "ir function prefix": + m = IR_PREFIX_DATA_RE.match(input_line) + if not m: + state = "ir function body" + add_mir_checks_for_function( + test, + output_lines, + run_list, + func_dict, + func_name, + single_bb=False, + print_fixed_stack=print_fixed_stack, + first_check_is_next=first_check_is_next, + at_the_function_name=at_the_function_name, + ) + + if should_add_mir_line_to_output(input_line, prefix_set): + output_lines.append(input_line) + elif state == "ir function body": + if input_line.strip() == "}": + state = "toplevel" + func_name = None + if should_add_mir_line_to_output(input_line, prefix_set): + output_lines.append(input_line) + return output_lines diff --git a/llvm/utils/update_givaluetracking_test_checks.py b/llvm/utils/update_givaluetracking_test_checks.py index 49b068ac7bef0..9ad0f3ec9ad1c 100755 --- a/llvm/utils/update_givaluetracking_test_checks.py +++ b/llvm/utils/update_givaluetracking_test_checks.py @@ -19,6 +19,7 @@ import sys from UpdateTestChecks import common +from UpdateTestChecks import mir VT_FUNCTION_RE = re.compile( r"\s*name:\s*@(?P[A-Za-z0-9_-]+)" @@ -92,7 +93,7 @@ def update_test(ti: common.TestInfo): func_dict = builder.finish_and_get_func_dict() prefix_set = set([prefix for p in run_list for prefix in p[0]]) common.debug("Rewriting FileCheck prefixes:", str(prefix_set)) - output_lines = common.add_mir_checks( + output_lines = mir.add_mir_checks( ti.input_lines, prefix_set, ti.test_autogenerated_note, diff --git a/llvm/utils/update_mir_test_checks.py b/llvm/utils/update_mir_test_checks.py index c4ee0523a6469..ba70249db28e6 100755 --- a/llvm/utils/update_mir_test_checks.py +++ b/llvm/utils/update_mir_test_checks.py @@ -31,39 +31,7 @@ import sys from UpdateTestChecks import common - -VREG_RE = re.compile(r"(%[0-9]+)(?:\.[a-z0-9_]+)?(?::[a-z0-9_]+)?(?:\([<>a-z0-9 ]+\))?") -MI_FLAGS_STR = ( - r"(frame-setup |frame-destroy |nnan |ninf |nsz |arcp |contract |afn " - r"|reassoc |nuw |nsw |exact |nofpexcept |nomerge |unpredictable " - r"|noconvergent |nneg |disjoint |nusw |samesign |inbounds )*" -) -VREG_DEF_FLAGS_STR = r"(?:dead |undef )*" - -# Pattern to match the defined vregs and the opcode of an instruction that -# defines vregs. Opcodes starting with a lower-case 't' are allowed to match -# ARM's thumb instructions, like tADDi8 and t2ADDri. -VREG_DEF_RE = re.compile( - r"^ *(?P{2}{0}(?:, {2}{0})*) = " - r"{1}(?P[A-Zt][A-Za-z0-9_]+)".format( - VREG_RE.pattern, MI_FLAGS_STR, VREG_DEF_FLAGS_STR - ) -) - -MIR_FUNC_RE = re.compile( - r"^---$" - r"\n" - r"^ *name: *(?P[A-Za-z0-9_.-]+)$" - r".*?" - r"(?:^ *fixedStack: *(\[\])? *\n" - r"(?P.*?)\n?" - r"^ *stack:" - r".*?)?" - r"^ *body: *\|\n" - r"(?P.*?)\n" - r"^\.\.\.$", - flags=(re.M | re.S), -) +from UpdateTestChecks import mir class LLC: @@ -143,89 +111,6 @@ def build_run_list(test, run_lines, verbose=False): return run_list -def build_function_info_dictionary( - test, raw_tool_output, triple, prefixes, func_dict, verbose -): - for m in MIR_FUNC_RE.finditer(raw_tool_output): - func = m.group("func") - fixedStack = m.group("fixedStack") - body = m.group("body") - if verbose: - log("Processing function: {}".format(func)) - for l in body.splitlines(): - log(" {}".format(l)) - - # Vreg mangling - mangled = [] - vreg_map = {} - for func_line in body.splitlines(keepends=True): - m = VREG_DEF_RE.match(func_line) - if m: - for vreg in VREG_RE.finditer(m.group("vregs")): - if vreg.group(1) in vreg_map: - name = vreg_map[vreg.group(1)] - else: - name = mangle_vreg(m.group("opcode"), vreg_map.values()) - vreg_map[vreg.group(1)] = name - func_line = func_line.replace( - vreg.group(1), "[[{}:%[0-9]+]]".format(name), 1 - ) - for number, name in vreg_map.items(): - func_line = re.sub( - r"{}\b".format(number), "[[{}]]".format(name), func_line - ) - mangled.append(func_line) - body = "".join(mangled) - - for prefix in prefixes: - info = common.function_body( - body, fixedStack, None, None, None, None, ginfo=None - ) - if func in func_dict[prefix]: - if ( - not func_dict[prefix][func] - or func_dict[prefix][func].scrub != info.scrub - or func_dict[prefix][func].extrascrub != info.extrascrub - ): - func_dict[prefix][func] = None - else: - func_dict[prefix][func] = info - - -def mangle_vreg(opcode, current_names): - base = opcode - # Simplify some common prefixes and suffixes - if opcode.startswith("G_"): - base = base[len("G_") :] - if opcode.endswith("_PSEUDO"): - base = base[: len("_PSEUDO")] - # Shorten some common opcodes with long-ish names - base = dict( - IMPLICIT_DEF="DEF", - GLOBAL_VALUE="GV", - CONSTANT="C", - FCONSTANT="C", - MERGE_VALUES="MV", - UNMERGE_VALUES="UV", - INTRINSIC="INT", - INTRINSIC_W_SIDE_EFFECTS="INT", - INSERT_VECTOR_ELT="IVEC", - EXTRACT_VECTOR_ELT="EVEC", - SHUFFLE_VECTOR="SHUF", - ).get(base, base) - # Avoid ambiguity when opcodes end in numbers - if len(base.rstrip("0123456789")) < len(base): - base += "_" - - i = 0 - for name in current_names: - if name.rstrip("0123456789") == base: - i += 1 - if i: - return "{}{}".format(base, i) - return base - - def update_test_file(args, test, autogenerated_note): with open(test) as fd: input_lines = [l.rstrip() for l in fd] @@ -247,7 +132,7 @@ def update_test_file(args, test, autogenerated_note): common.warn("No triple found: skipping file", test_file=test) return - build_function_info_dictionary( + mir.build_function_info_dictionary( test, raw_tool_output, triple_in_cmd or triple_in_ir, @@ -259,7 +144,7 @@ def update_test_file(args, test, autogenerated_note): prefix_set = set([prefix for run in run_list for prefix in run[0]]) log("Rewriting FileCheck prefixes: {}".format(prefix_set), args.verbose) - output_lines = common.add_mir_checks( + output_lines = mir.add_mir_checks( input_lines, prefix_set, autogenerated_note, From e7605426e4001e6c19984c4ae4b6691fd06ce139 Mon Sep 17 00:00:00 2001 From: Vlad Serebrennikov Date: Thu, 30 Oct 2025 11:51:25 +0300 Subject: [PATCH 06/21] [clang] Update C++ DR status page --- clang/www/cxx_dr_status.html | 260 +++++++++++++++++++++++++++++++---- 1 file changed, 232 insertions(+), 28 deletions(-) diff --git a/clang/www/cxx_dr_status.html b/clang/www/cxx_dr_status.html index b7da22cf9fb22..ae9b28ee625cd 100755 --- a/clang/www/cxx_dr_status.html +++ b/clang/www/cxx_dr_status.html @@ -3113,11 +3113,11 @@

C++ defect report implementation status

Default initialization of POD classes? N/A - + 511 - open + NAD POD-structs with template assignment operators - Not resolved + Unknown 512 @@ -10895,7 +10895,7 @@

C++ defect report implementation status

1845 - drafting + review Point of instantiation of a variable template specialization Not resolved @@ -12081,7 +12081,7 @@

C++ defect report implementation status

2042 - drafting + review Exceptions and deallocation functions Not resolved @@ -12335,7 +12335,7 @@

C++ defect report implementation status

2084 CD4 NSDMIs and deleted union default constructors - Unknown + Clang 3.1 2085 @@ -12837,7 +12837,7 @@

C++ defect report implementation status

2168 - open + review Narrowing conversions and +/- infinity Not resolved @@ -14237,11 +14237,11 @@

C++ defect report implementation status

Constexpr virtual functions and temporary objects Unknown - + 2401 - drafting + C++20 Array decay vs prohibition of subobject non-type arguments - Not resolved + Unknown 2402 @@ -15171,7 +15171,7 @@

C++ defect report implementation status

2555 - drafting + tentatively ready Ineffective redeclaration prevention for using-declarators Not resolved @@ -15311,23 +15311,23 @@

C++ defect report implementation status

Undefined behavior for preprocessing directives in macro arguments Not resolved - + 2578 - open + CD7 Undefined behavior when creating an invalid string literal via stringizing - Not resolved + Unknown - + 2579 - open + CD7 Undefined behavior when token pasting does not create a preprocessing token - Not resolved + Unknown - + 2580 - open + CD7 Undefined behavior with #line - Not resolved + Unknown 2581 @@ -17104,7 +17104,7 @@

C++ defect report implementation status

2875 - review + tentatively ready Missing support for round-tripping null pointer values through indirection/address operators Not resolved @@ -17400,7 +17400,7 @@

C++ defect report implementation status

2923 - review + tentatively ready Note about infinite loops and execution steps Not resolved @@ -17760,7 +17760,7 @@

C++ defect report implementation status

2983 - open + review Non-type template parameters are not variables Not resolved @@ -17868,7 +17868,7 @@

C++ defect report implementation status

3001 - review + tentatively ready Inconsistent restrictions for static_cast on pointers to out-of-lifetime objects Not resolved @@ -17932,7 +17932,7 @@

C++ defect report implementation status

3011 - open + tentatively ready Parenthesized aggregate initialization for new-expressions Not resolved @@ -17992,7 +17992,7 @@

C++ defect report implementation status

3021 - open + drafting Subsumption rules for fold expanded constraints Not resolved @@ -18058,7 +18058,7 @@

C++ defect report implementation status

3032 - open + tentatively ready Template argument disambiguation Not resolved @@ -18184,7 +18184,7 @@

C++ defect report implementation status

3053 - open + tentatively ready Allowing #undef likely Not resolved @@ -18265,6 +18265,210 @@

C++ defect report implementation status

tentatively ready Declarative nested-name-specifier in explicit instantiation Not resolved + + + 3067 + open + Array-to-pointer conversion with object type mismatch + Not resolved + + + 3068 + open + Access checking in friends involving qualified-ids + Not resolved + + + 3069 + open + Reference to wrong placeholder + Not resolved + + + 3070 + open + Trivial assignment can skip member subobjects + Not resolved + + + 3071 + open + Negative tuple_size in structured bindings + Not resolved + + + 3072 + open + Incorrect examples for lambda SFINAE + Not resolved + + + 3073 + open + Dependence of R on T2 is unclear + Not resolved + + + 3074 + tentatively ready + Redundant ill-formedness for module macros + Not resolved + + + 3075 + tentatively ready + Unclear matching of import directive + Not resolved + + + 3076 + tentatively ready + Remove unnecessary IFNDR for malformed header-name-tokens + Not resolved + + + 3077 + tentatively ready + Undesirable formation of import directive with string-literal + Not resolved + + + 3078 + review + Different treatment of #include pp-tokens and header-name-tokens + Not resolved + + + 3079 + open + Allow empty-declarations in anonymous unions + Not resolved + + + 3080 + tentatively ready + Clarify kinds of permitted template template arguments + Not resolved + + + 3081 + review + Require glvalue when splicing direct base class relationship + Not resolved + + + 3082 + tentatively ready + Allow for call-compatible function types in reinterpret_cast + Not resolved + + + 3083 + tentatively ready + Remove redundant restrictions on class and enum definitions + Not resolved + + + 3084 + tentatively ready + compound-statements inside iteration-statements + Not resolved + + + 3085 + tentatively ready + Apply restriction inside for-range-declaration + Not resolved + + + 3086 + tentatively ready + Destringizing should consider all sorts of encoding-prefixes + Not resolved + + + 3087 + open + Destringizing for raw string literals + Not resolved + + + 3088 + open + Clarify macro treatment of identifiers with special meaning + Not resolved + + + 3089 + tentatively ready + const-default-constructible improperly handles std::meta::info + Not resolved + + + 3090 + tentatively ready + Internal linkage from header units + Not resolved + + + 3091 + review + Linking of translation units as sequences of tokens + Not resolved + + + 3092 + tentatively ready + base-specifiers are not "declared" + Not resolved + + + 3093 + open + Missing integration of direct base class relationships + Not resolved + + + 3094 + review + Rework phases for string literal concatenation and token formation + Not resolved + + + 3095 + open + Type-dependent packs that are not structured binding packs + Not resolved + + + 3096 + open + Value-dependence of size of structured binding pack with non-dependent initializer + Not resolved + + + 3097 + tentatively ready + Lambda expression introduces a scope + Not resolved + + + 3098 + tentatively ready + Remove redundancy "names or designates" + Not resolved + + + 3099 + open + Instantiation of type aliases from alias templates is unspecified + Not resolved + + + 3100 + open + Destruction order for objects with static storage duration + Not resolved From 31890c5370040beb5e6dfdeef14206e6fa733c8c Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 30 Oct 2025 09:00:36 +0000 Subject: [PATCH 07/21] [AArch64][GlobalISel] Add some GISel test coverage for icmp-and tests. NFC --- llvm/test/CodeGen/AArch64/arm64-srl-and.ll | 42 +- ...st-and-by-const-from-lshr-in-eqcmp-zero.ll | 414 ++++++++++++----- ...ist-and-by-const-from-shl-in-eqcmp-zero.ll | 424 ++++++++++++----- llvm/test/CodeGen/AArch64/signbit-test.ll | 22 +- .../AArch64/signed-truncation-check.ll | 434 ++++++++++++------ 5 files changed, 942 insertions(+), 394 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/arm64-srl-and.ll b/llvm/test/CodeGen/AArch64/arm64-srl-and.ll index b58f6ba96a5b8..330f27bd6c0cd 100644 --- a/llvm/test/CodeGen/AArch64/arm64-srl-and.ll +++ b/llvm/test/CodeGen/AArch64/arm64-srl-and.ll @@ -1,22 +1,38 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -O3 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -O3 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64-linux-gnu -O3 -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; This used to miscompile: ; The 16-bit -1 should not become 32-bit -1 (sub w8, w8, #1). @g = global i16 0, align 4 define i32 @srl_and() { -; CHECK-LABEL: srl_and: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x8, :got:g -; CHECK-NEXT: mov w9, #50 -; CHECK-NEXT: ldr x8, [x8, :got_lo12:g] -; CHECK-NEXT: ldrh w8, [x8] -; CHECK-NEXT: eor w8, w8, w9 -; CHECK-NEXT: mov w9, #65535 -; CHECK-NEXT: add w8, w8, w9 -; CHECK-NEXT: and w0, w8, w8, lsr #16 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srl_and: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: adrp x8, :got:g +; CHECK-SD-NEXT: mov w9, #50 // =0x32 +; CHECK-SD-NEXT: ldr x8, [x8, :got_lo12:g] +; CHECK-SD-NEXT: ldrh w8, [x8] +; CHECK-SD-NEXT: eor w8, w8, w9 +; CHECK-SD-NEXT: mov w9, #65535 // =0xffff +; CHECK-SD-NEXT: add w8, w8, w9 +; CHECK-SD-NEXT: and w0, w8, w8, lsr #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srl_and: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: adrp x8, :got:g +; CHECK-GI-NEXT: mov w9, #50 // =0x32 +; CHECK-GI-NEXT: ldr x8, [x8, :got_lo12:g] +; CHECK-GI-NEXT: ldrh w8, [x8] +; CHECK-GI-NEXT: eor w8, w8, w9 +; CHECK-GI-NEXT: mov w9, #65535 // =0xffff +; CHECK-GI-NEXT: add w8, w9, w8, uxth +; CHECK-GI-NEXT: and w9, w8, #0xffff +; CHECK-GI-NEXT: cmp w8, w9 +; CHECK-GI-NEXT: cset w8, ne +; CHECK-GI-NEXT: and w0, w9, w8 +; CHECK-GI-NEXT: ret entry: %0 = load i16, ptr @g, align 4 %1 = xor i16 %0, 50 @@ -29,3 +45,5 @@ entry: ret i32 %and } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll index c3fdc7db2abbe..8438f0b03179c 100644 --- a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll +++ b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-unknown-unknown < %s | FileCheck %s --check-prefix=CHECK +; RUN: llc -mtriple=aarch64-unknown-unknown < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64-unknown-unknown -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; We are looking for the following pattern here: ; (X & (C l>> Y)) ==/!= 0 @@ -13,12 +14,21 @@ ; i8 scalar define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind { -; CHECK-LABEL: scalar_i8_signbit_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, w1 -; CHECK-NEXT: tst w8, #0x80 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i8_signbit_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsl w8, w0, w1 +; CHECK-SD-NEXT: tst w8, #0x80 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i8_signbit_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #128 // =0x80 +; CHECK-GI-NEXT: and w9, w1, #0xff +; CHECK-GI-NEXT: lsr w8, w8, w9 +; CHECK-GI-NEXT: tst w8, w0 +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret %t0 = lshr i8 128, %y %t1 = and i8 %t0, %x %res = icmp eq i8 %t1, 0 @@ -26,12 +36,21 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind { } define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind { -; CHECK-LABEL: scalar_i8_lowestbit_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, w1 -; CHECK-NEXT: tst w8, #0x1 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i8_lowestbit_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsl w8, w0, w1 +; CHECK-SD-NEXT: tst w8, #0x1 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i8_lowestbit_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1 // =0x1 +; CHECK-GI-NEXT: and w9, w1, #0xff +; CHECK-GI-NEXT: lsr w8, w8, w9 +; CHECK-GI-NEXT: tst w8, w0 +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret %t0 = lshr i8 1, %y %t1 = and i8 %t0, %x %res = icmp eq i8 %t1, 0 @@ -39,12 +58,21 @@ define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind { } define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind { -; CHECK-LABEL: scalar_i8_bitsinmiddle_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, w1 -; CHECK-NEXT: tst w8, #0x18 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i8_bitsinmiddle_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsl w8, w0, w1 +; CHECK-SD-NEXT: tst w8, #0x18 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i8_bitsinmiddle_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #24 // =0x18 +; CHECK-GI-NEXT: and w9, w1, #0xff +; CHECK-GI-NEXT: lsr w8, w8, w9 +; CHECK-GI-NEXT: tst w8, w0 +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret %t0 = lshr i8 24, %y %t1 = and i8 %t0, %x %res = icmp eq i8 %t1, 0 @@ -54,12 +82,21 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind { ; i16 scalar define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind { -; CHECK-LABEL: scalar_i16_signbit_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, w1 -; CHECK-NEXT: tst w8, #0x8000 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i16_signbit_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsl w8, w0, w1 +; CHECK-SD-NEXT: tst w8, #0x8000 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i16_signbit_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #32768 // =0x8000 +; CHECK-GI-NEXT: and w9, w1, #0xffff +; CHECK-GI-NEXT: lsr w8, w8, w9 +; CHECK-GI-NEXT: tst w8, w0 +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret %t0 = lshr i16 32768, %y %t1 = and i16 %t0, %x %res = icmp eq i16 %t1, 0 @@ -67,12 +104,21 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind { } define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind { -; CHECK-LABEL: scalar_i16_lowestbit_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, w1 -; CHECK-NEXT: tst w8, #0x1 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i16_lowestbit_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsl w8, w0, w1 +; CHECK-SD-NEXT: tst w8, #0x1 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i16_lowestbit_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1 // =0x1 +; CHECK-GI-NEXT: and w9, w1, #0xffff +; CHECK-GI-NEXT: lsr w8, w8, w9 +; CHECK-GI-NEXT: tst w8, w0 +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret %t0 = lshr i16 1, %y %t1 = and i16 %t0, %x %res = icmp eq i16 %t1, 0 @@ -80,12 +126,21 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind { } define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind { -; CHECK-LABEL: scalar_i16_bitsinmiddle_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, w1 -; CHECK-NEXT: tst w8, #0xff0 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i16_bitsinmiddle_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsl w8, w0, w1 +; CHECK-SD-NEXT: tst w8, #0xff0 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i16_bitsinmiddle_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #4080 // =0xff0 +; CHECK-GI-NEXT: and w9, w1, #0xffff +; CHECK-GI-NEXT: lsr w8, w8, w9 +; CHECK-GI-NEXT: tst w8, w0 +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret %t0 = lshr i16 4080, %y %t1 = and i16 %t0, %x %res = icmp eq i16 %t1, 0 @@ -95,12 +150,20 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind { ; i32 scalar define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind { -; CHECK-LABEL: scalar_i32_signbit_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, w1 -; CHECK-NEXT: tst w8, #0x80000000 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i32_signbit_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsl w8, w0, w1 +; CHECK-SD-NEXT: tst w8, #0x80000000 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i32_signbit_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #-2147483648 // =0x80000000 +; CHECK-GI-NEXT: lsr w8, w8, w1 +; CHECK-GI-NEXT: tst w8, w0 +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret %t0 = lshr i32 2147483648, %y %t1 = and i32 %t0, %x %res = icmp eq i32 %t1, 0 @@ -108,12 +171,20 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind { } define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind { -; CHECK-LABEL: scalar_i32_lowestbit_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, w1 -; CHECK-NEXT: tst w8, #0x1 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i32_lowestbit_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsl w8, w0, w1 +; CHECK-SD-NEXT: tst w8, #0x1 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i32_lowestbit_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1 // =0x1 +; CHECK-GI-NEXT: lsr w8, w8, w1 +; CHECK-GI-NEXT: tst w8, w0 +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret %t0 = lshr i32 1, %y %t1 = and i32 %t0, %x %res = icmp eq i32 %t1, 0 @@ -121,12 +192,20 @@ define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind { } define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind { -; CHECK-LABEL: scalar_i32_bitsinmiddle_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, w1 -; CHECK-NEXT: tst w8, #0xffff00 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i32_bitsinmiddle_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsl w8, w0, w1 +; CHECK-SD-NEXT: tst w8, #0xffff00 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i32_bitsinmiddle_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #16776960 // =0xffff00 +; CHECK-GI-NEXT: lsr w8, w8, w1 +; CHECK-GI-NEXT: tst w8, w0 +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret %t0 = lshr i32 16776960, %y %t1 = and i32 %t0, %x %res = icmp eq i32 %t1, 0 @@ -136,12 +215,20 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind { ; i64 scalar define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind { -; CHECK-LABEL: scalar_i64_signbit_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: lsl x8, x0, x1 -; CHECK-NEXT: tst x8, #0x8000000000000000 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i64_signbit_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsl x8, x0, x1 +; CHECK-SD-NEXT: tst x8, #0x8000000000000000 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i64_signbit_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-GI-NEXT: lsr x8, x8, x1 +; CHECK-GI-NEXT: tst x8, x0 +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret %t0 = lshr i64 9223372036854775808, %y %t1 = and i64 %t0, %x %res = icmp eq i64 %t1, 0 @@ -149,12 +236,20 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind { } define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind { -; CHECK-LABEL: scalar_i64_lowestbit_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: lsl x8, x0, x1 -; CHECK-NEXT: tst x8, #0x1 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i64_lowestbit_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsl x8, x0, x1 +; CHECK-SD-NEXT: tst x8, #0x1 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i64_lowestbit_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1 // =0x1 +; CHECK-GI-NEXT: lsr x8, x8, x1 +; CHECK-GI-NEXT: tst x8, x0 +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret %t0 = lshr i64 1, %y %t1 = and i64 %t0, %x %res = icmp eq i64 %t1, 0 @@ -162,12 +257,20 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind { } define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind { -; CHECK-LABEL: scalar_i64_bitsinmiddle_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: lsl x8, x0, x1 -; CHECK-NEXT: tst x8, #0xffffffff0000 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i64_bitsinmiddle_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsl x8, x0, x1 +; CHECK-SD-NEXT: tst x8, #0xffffffff0000 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i64_bitsinmiddle_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #281474976645120 // =0xffffffff0000 +; CHECK-GI-NEXT: lsr x8, x8, x1 +; CHECK-GI-NEXT: tst x8, x0 +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret %t0 = lshr i64 281474976645120, %y %t1 = and i64 %t0, %x %res = icmp eq i64 %t1, 0 @@ -179,14 +282,24 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind { ;------------------------------------------------------------------------------; define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind { -; CHECK-LABEL: vec_4xi32_splat_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 -; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 -; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: vec_4xi32_splat_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: movi v2.4s, #1 +; CHECK-SD-NEXT: ushl v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: cmeq v0.4s, v0.4s, #0 +; CHECK-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: vec_4xi32_splat_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v2.4s, #1 +; CHECK-GI-NEXT: neg v1.4s, v1.4s +; CHECK-GI-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-GI-NEXT: cmeq v0.4s, v0.4s, #0 +; CHECK-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-NEXT: ret %t0 = lshr <4 x i32> , %y %t1 = and <4 x i32> %t0, %x %res = icmp eq <4 x i32> %t1, @@ -211,44 +324,86 @@ define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind { } define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwind { -; CHECK-LABEL: vec_4xi32_nonsplat_undef0_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 -; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 -; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: vec_4xi32_nonsplat_undef0_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: movi v2.4s, #1 +; CHECK-SD-NEXT: ushl v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: cmeq v0.4s, v0.4s, #0 +; CHECK-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: vec_4xi32_nonsplat_undef0_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1 // =0x1 +; CHECK-GI-NEXT: neg v1.4s, v1.4s +; CHECK-GI-NEXT: fmov s2, w8 +; CHECK-GI-NEXT: mov v2.s[1], w8 +; CHECK-GI-NEXT: mov v2.s[3], w8 +; CHECK-GI-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-GI-NEXT: cmeq v0.4s, v0.4s, #0 +; CHECK-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-NEXT: ret %t0 = lshr <4 x i32> , %y %t1 = and <4 x i32> %t0, %x %res = icmp eq <4 x i32> %t1, ret <4 x i1> %res } define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwind { -; CHECK-LABEL: vec_4xi32_nonsplat_undef1_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 -; CHECK-NEXT: neg v1.4s, v1.4s -; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b -; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 -; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: vec_4xi32_nonsplat_undef1_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: movi v2.4s, #1 +; CHECK-SD-NEXT: neg v1.4s, v1.4s +; CHECK-SD-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-SD-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-SD-NEXT: cmeq v0.4s, v0.4s, #0 +; CHECK-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: vec_4xi32_nonsplat_undef1_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d2, #0000000000000000 +; CHECK-GI-NEXT: movi v3.4s, #1 +; CHECK-GI-NEXT: neg v1.4s, v1.4s +; CHECK-GI-NEXT: mov v2.s[1], wzr +; CHECK-GI-NEXT: ushl v1.4s, v3.4s, v1.4s +; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-GI-NEXT: mov v2.s[3], wzr +; CHECK-GI-NEXT: cmeq v0.4s, v0.4s, v2.4s +; CHECK-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-NEXT: ret %t0 = lshr <4 x i32> , %y %t1 = and <4 x i32> %t0, %x %res = icmp eq <4 x i32> %t1, ret <4 x i1> %res } define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwind { -; CHECK-LABEL: vec_4xi32_nonsplat_undef2_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 -; CHECK-NEXT: neg v1.4s, v1.4s -; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b -; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 -; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: vec_4xi32_nonsplat_undef2_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: movi v2.4s, #1 +; CHECK-SD-NEXT: neg v1.4s, v1.4s +; CHECK-SD-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-SD-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-SD-NEXT: cmeq v0.4s, v0.4s, #0 +; CHECK-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: vec_4xi32_nonsplat_undef2_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1 // =0x1 +; CHECK-GI-NEXT: movi d2, #0000000000000000 +; CHECK-GI-NEXT: neg v1.4s, v1.4s +; CHECK-GI-NEXT: fmov s3, w8 +; CHECK-GI-NEXT: mov v3.s[1], w8 +; CHECK-GI-NEXT: mov v2.s[1], wzr +; CHECK-GI-NEXT: mov v3.s[3], w8 +; CHECK-GI-NEXT: mov v2.s[3], wzr +; CHECK-GI-NEXT: ushl v1.4s, v3.4s, v1.4s +; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-GI-NEXT: cmeq v0.4s, v0.4s, v2.4s +; CHECK-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-NEXT: ret %t0 = lshr <4 x i32> , %y %t1 = and <4 x i32> %t0, %x %res = icmp eq <4 x i32> %t1, @@ -260,11 +415,20 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwi ;------------------------------------------------------------------------------; define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind { -; CHECK-LABEL: scalar_i8_signbit_ne: -; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, w1 -; CHECK-NEXT: ubfx w0, w8, #7, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i8_signbit_ne: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsl w8, w0, w1 +; CHECK-SD-NEXT: ubfx w0, w8, #7, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i8_signbit_ne: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #128 // =0x80 +; CHECK-GI-NEXT: and w9, w1, #0xff +; CHECK-GI-NEXT: lsr w8, w8, w9 +; CHECK-GI-NEXT: tst w8, w0 +; CHECK-GI-NEXT: cset w0, ne +; CHECK-GI-NEXT: ret %t0 = lshr i8 128, %y %t1 = and i8 %t0, %x %res = icmp ne i8 %t1, 0 ; we are perfectly happy with 'ne' predicate @@ -315,14 +479,24 @@ define i1 @scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind { } define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind { -; CHECK-LABEL: scalar_i8_signbit_eq_with_nonzero: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #128 // =0x80 -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: cmp w8, #1 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i8_signbit_eq_with_nonzero: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #128 // =0x80 +; CHECK-SD-NEXT: lsr w8, w8, w1 +; CHECK-SD-NEXT: and w8, w8, w0 +; CHECK-SD-NEXT: cmp w8, #1 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i8_signbit_eq_with_nonzero: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #128 // =0x80 +; CHECK-GI-NEXT: and w9, w1, #0xff +; CHECK-GI-NEXT: lsr w8, w8, w9 +; CHECK-GI-NEXT: and w8, w8, w0 +; CHECK-GI-NEXT: cmp w8, #1 +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret %t0 = lshr i8 128, %y %t1 = and i8 %t0, %x %res = icmp eq i8 %t1, 1 ; should be comparing with 0 diff --git a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll index 4a73b10811d29..cc1bf27b8d4b7 100644 --- a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll +++ b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-unknown-unknown < %s | FileCheck %s --check-prefix=CHECK +; RUN: llc -mtriple=aarch64-unknown-unknown < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64-unknown-unknown -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; We are looking for the following pattern here: ; (X & (C << Y)) ==/!= 0 @@ -13,13 +14,23 @@ ; i8 scalar define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind { -; CHECK-LABEL: scalar_i8_signbit_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: tst w8, #0x80 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i8_signbit_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: and w8, w0, #0xff +; CHECK-SD-NEXT: lsr w8, w8, w1 +; CHECK-SD-NEXT: tst w8, #0x80 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i8_signbit_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #-128 // =0xffffff80 +; CHECK-GI-NEXT: and w9, w1, #0xff +; CHECK-GI-NEXT: lsl w8, w8, w9 +; CHECK-GI-NEXT: and w8, w8, w0 +; CHECK-GI-NEXT: tst w8, #0xff +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret %t0 = shl i8 128, %y %t1 = and i8 %t0, %x %res = icmp eq i8 %t1, 0 @@ -27,13 +38,23 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind { } define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind { -; CHECK-LABEL: scalar_i8_lowestbit_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: tst w8, #0x1 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i8_lowestbit_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: and w8, w0, #0xff +; CHECK-SD-NEXT: lsr w8, w8, w1 +; CHECK-SD-NEXT: tst w8, #0x1 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i8_lowestbit_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1 // =0x1 +; CHECK-GI-NEXT: and w9, w1, #0xff +; CHECK-GI-NEXT: lsl w8, w8, w9 +; CHECK-GI-NEXT: and w8, w8, w0 +; CHECK-GI-NEXT: tst w8, #0xff +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret %t0 = shl i8 1, %y %t1 = and i8 %t0, %x %res = icmp eq i8 %t1, 0 @@ -41,13 +62,23 @@ define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind { } define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind { -; CHECK-LABEL: scalar_i8_bitsinmiddle_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: tst w8, #0x18 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i8_bitsinmiddle_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: and w8, w0, #0xff +; CHECK-SD-NEXT: lsr w8, w8, w1 +; CHECK-SD-NEXT: tst w8, #0x18 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i8_bitsinmiddle_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #24 // =0x18 +; CHECK-GI-NEXT: and w9, w1, #0xff +; CHECK-GI-NEXT: lsl w8, w8, w9 +; CHECK-GI-NEXT: and w8, w8, w0 +; CHECK-GI-NEXT: tst w8, #0xff +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret %t0 = shl i8 24, %y %t1 = and i8 %t0, %x %res = icmp eq i8 %t1, 0 @@ -57,13 +88,23 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind { ; i16 scalar define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind { -; CHECK-LABEL: scalar_i16_signbit_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: tst w8, #0x8000 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i16_signbit_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: and w8, w0, #0xffff +; CHECK-SD-NEXT: lsr w8, w8, w1 +; CHECK-SD-NEXT: tst w8, #0x8000 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i16_signbit_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #-32768 // =0xffff8000 +; CHECK-GI-NEXT: and w9, w1, #0xffff +; CHECK-GI-NEXT: lsl w8, w8, w9 +; CHECK-GI-NEXT: and w8, w8, w0 +; CHECK-GI-NEXT: tst w8, #0xffff +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret %t0 = shl i16 32768, %y %t1 = and i16 %t0, %x %res = icmp eq i16 %t1, 0 @@ -71,13 +112,23 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind { } define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind { -; CHECK-LABEL: scalar_i16_lowestbit_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: tst w8, #0x1 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i16_lowestbit_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: and w8, w0, #0xffff +; CHECK-SD-NEXT: lsr w8, w8, w1 +; CHECK-SD-NEXT: tst w8, #0x1 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i16_lowestbit_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1 // =0x1 +; CHECK-GI-NEXT: and w9, w1, #0xffff +; CHECK-GI-NEXT: lsl w8, w8, w9 +; CHECK-GI-NEXT: and w8, w8, w0 +; CHECK-GI-NEXT: tst w8, #0xffff +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret %t0 = shl i16 1, %y %t1 = and i16 %t0, %x %res = icmp eq i16 %t1, 0 @@ -85,13 +136,23 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind { } define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind { -; CHECK-LABEL: scalar_i16_bitsinmiddle_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: tst w8, #0xff0 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i16_bitsinmiddle_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: and w8, w0, #0xffff +; CHECK-SD-NEXT: lsr w8, w8, w1 +; CHECK-SD-NEXT: tst w8, #0xff0 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i16_bitsinmiddle_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #4080 // =0xff0 +; CHECK-GI-NEXT: and w9, w1, #0xffff +; CHECK-GI-NEXT: lsl w8, w8, w9 +; CHECK-GI-NEXT: and w8, w8, w0 +; CHECK-GI-NEXT: tst w8, #0xffff +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret %t0 = shl i16 4080, %y %t1 = and i16 %t0, %x %res = icmp eq i16 %t1, 0 @@ -101,12 +162,20 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind { ; i32 scalar define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind { -; CHECK-LABEL: scalar_i32_signbit_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, w1 -; CHECK-NEXT: tst w8, #0x80000000 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i32_signbit_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsr w8, w0, w1 +; CHECK-SD-NEXT: tst w8, #0x80000000 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i32_signbit_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #-2147483648 // =0x80000000 +; CHECK-GI-NEXT: lsl w8, w8, w1 +; CHECK-GI-NEXT: tst w8, w0 +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret %t0 = shl i32 2147483648, %y %t1 = and i32 %t0, %x %res = icmp eq i32 %t1, 0 @@ -114,12 +183,20 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind { } define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind { -; CHECK-LABEL: scalar_i32_lowestbit_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, w1 -; CHECK-NEXT: tst w8, #0x1 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i32_lowestbit_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsr w8, w0, w1 +; CHECK-SD-NEXT: tst w8, #0x1 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i32_lowestbit_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1 // =0x1 +; CHECK-GI-NEXT: lsl w8, w8, w1 +; CHECK-GI-NEXT: tst w8, w0 +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret %t0 = shl i32 1, %y %t1 = and i32 %t0, %x %res = icmp eq i32 %t1, 0 @@ -127,12 +204,20 @@ define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind { } define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind { -; CHECK-LABEL: scalar_i32_bitsinmiddle_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, w1 -; CHECK-NEXT: tst w8, #0xffff00 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i32_bitsinmiddle_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsr w8, w0, w1 +; CHECK-SD-NEXT: tst w8, #0xffff00 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i32_bitsinmiddle_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #16776960 // =0xffff00 +; CHECK-GI-NEXT: lsl w8, w8, w1 +; CHECK-GI-NEXT: tst w8, w0 +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret %t0 = shl i32 16776960, %y %t1 = and i32 %t0, %x %res = icmp eq i32 %t1, 0 @@ -142,12 +227,20 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind { ; i64 scalar define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind { -; CHECK-LABEL: scalar_i64_signbit_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: lsr x8, x0, x1 -; CHECK-NEXT: tst x8, #0x8000000000000000 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i64_signbit_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsr x8, x0, x1 +; CHECK-SD-NEXT: tst x8, #0x8000000000000000 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i64_signbit_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-GI-NEXT: lsl x8, x8, x1 +; CHECK-GI-NEXT: tst x8, x0 +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret %t0 = shl i64 9223372036854775808, %y %t1 = and i64 %t0, %x %res = icmp eq i64 %t1, 0 @@ -155,12 +248,20 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind { } define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind { -; CHECK-LABEL: scalar_i64_lowestbit_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: lsr x8, x0, x1 -; CHECK-NEXT: tst x8, #0x1 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i64_lowestbit_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsr x8, x0, x1 +; CHECK-SD-NEXT: tst x8, #0x1 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i64_lowestbit_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1 // =0x1 +; CHECK-GI-NEXT: lsl x8, x8, x1 +; CHECK-GI-NEXT: tst x8, x0 +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret %t0 = shl i64 1, %y %t1 = and i64 %t0, %x %res = icmp eq i64 %t1, 0 @@ -168,12 +269,20 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind { } define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind { -; CHECK-LABEL: scalar_i64_bitsinmiddle_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: lsr x8, x0, x1 -; CHECK-NEXT: tst x8, #0xffffffff0000 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i64_bitsinmiddle_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsr x8, x0, x1 +; CHECK-SD-NEXT: tst x8, #0xffffffff0000 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i64_bitsinmiddle_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #281474976645120 // =0xffffffff0000 +; CHECK-GI-NEXT: lsl x8, x8, x1 +; CHECK-GI-NEXT: tst x8, x0 +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret %t0 = shl i64 281474976645120, %y %t1 = and i64 %t0, %x %res = icmp eq i64 %t1, 0 @@ -216,42 +325,81 @@ define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind { } define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwind { -; CHECK-LABEL: vec_4xi32_nonsplat_undef0_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 -; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b -; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 -; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: vec_4xi32_nonsplat_undef0_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: movi v2.4s, #1 +; CHECK-SD-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-SD-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-SD-NEXT: cmeq v0.4s, v0.4s, #0 +; CHECK-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: vec_4xi32_nonsplat_undef0_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1 // =0x1 +; CHECK-GI-NEXT: fmov s2, w8 +; CHECK-GI-NEXT: mov v2.s[1], w8 +; CHECK-GI-NEXT: mov v2.s[3], w8 +; CHECK-GI-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-GI-NEXT: cmeq v0.4s, v0.4s, #0 +; CHECK-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-NEXT: ret %t0 = shl <4 x i32> , %y %t1 = and <4 x i32> %t0, %x %res = icmp eq <4 x i32> %t1, ret <4 x i1> %res } define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwind { -; CHECK-LABEL: vec_4xi32_nonsplat_undef1_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 -; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b -; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 -; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: vec_4xi32_nonsplat_undef1_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: movi v2.4s, #1 +; CHECK-SD-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-SD-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-SD-NEXT: cmeq v0.4s, v0.4s, #0 +; CHECK-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: vec_4xi32_nonsplat_undef1_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d3, #0000000000000000 +; CHECK-GI-NEXT: movi v2.4s, #1 +; CHECK-GI-NEXT: mov v3.s[1], wzr +; CHECK-GI-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-GI-NEXT: mov v3.s[3], wzr +; CHECK-GI-NEXT: cmeq v0.4s, v0.4s, v3.4s +; CHECK-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-NEXT: ret %t0 = shl <4 x i32> , %y %t1 = and <4 x i32> %t0, %x %res = icmp eq <4 x i32> %t1, ret <4 x i1> %res } define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwind { -; CHECK-LABEL: vec_4xi32_nonsplat_undef2_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 -; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b -; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 -; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: vec_4xi32_nonsplat_undef2_eq: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: movi v2.4s, #1 +; CHECK-SD-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-SD-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-SD-NEXT: cmeq v0.4s, v0.4s, #0 +; CHECK-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: vec_4xi32_nonsplat_undef2_eq: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1 // =0x1 +; CHECK-GI-NEXT: movi d3, #0000000000000000 +; CHECK-GI-NEXT: fmov s2, w8 +; CHECK-GI-NEXT: mov v2.s[1], w8 +; CHECK-GI-NEXT: mov v3.s[1], wzr +; CHECK-GI-NEXT: mov v2.s[3], w8 +; CHECK-GI-NEXT: mov v3.s[3], wzr +; CHECK-GI-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-GI-NEXT: cmeq v0.4s, v0.4s, v3.4s +; CHECK-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-NEXT: ret %t0 = shl <4 x i32> , %y %t1 = and <4 x i32> %t0, %x %res = icmp eq <4 x i32> %t1, @@ -263,12 +411,22 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwi ;------------------------------------------------------------------------------; define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind { -; CHECK-LABEL: scalar_i8_signbit_ne: -; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: lsr w0, w8, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i8_signbit_ne: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: and w8, w0, #0xff +; CHECK-SD-NEXT: lsr w8, w8, w1 +; CHECK-SD-NEXT: lsr w0, w8, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i8_signbit_ne: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #-128 // =0xffffff80 +; CHECK-GI-NEXT: and w9, w1, #0xff +; CHECK-GI-NEXT: lsl w8, w8, w9 +; CHECK-GI-NEXT: and w8, w8, w0 +; CHECK-GI-NEXT: tst w8, #0xff +; CHECK-GI-NEXT: cset w0, ne +; CHECK-GI-NEXT: ret %t0 = shl i8 128, %y %t1 = and i8 %t0, %x %res = icmp ne i8 %t1, 0 ; we are perfectly happy with 'ne' predicate @@ -310,13 +468,24 @@ define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind { } define i1 @scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind { -; CHECK-LABEL: scalar_i8_bitsinmiddle_slt: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #24 // =0x18 -; CHECK-NEXT: lsl w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: ubfx w0, w8, #7, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i8_bitsinmiddle_slt: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #24 // =0x18 +; CHECK-SD-NEXT: lsl w8, w8, w1 +; CHECK-SD-NEXT: and w8, w8, w0 +; CHECK-SD-NEXT: ubfx w0, w8, #7, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i8_bitsinmiddle_slt: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #24 // =0x18 +; CHECK-GI-NEXT: and w9, w1, #0xff +; CHECK-GI-NEXT: lsl w8, w8, w9 +; CHECK-GI-NEXT: and w8, w8, w0 +; CHECK-GI-NEXT: sxtb w8, w8 +; CHECK-GI-NEXT: cmp w8, #0 +; CHECK-GI-NEXT: cset w0, mi +; CHECK-GI-NEXT: ret %t0 = shl i8 24, %y %t1 = and i8 %t0, %x %res = icmp slt i8 %t1, 0 @@ -324,15 +493,20 @@ define i1 @scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind { } define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind { -; CHECK-LABEL: scalar_i8_signbit_eq_with_nonzero: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-128 // =0xffffff80 -; CHECK-NEXT: lsl w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: and w8, w8, #0x80 -; CHECK-NEXT: cmp w8, #1 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_i8_signbit_eq_with_nonzero: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #-128 // =0xffffff80 +; CHECK-SD-NEXT: lsl w8, w8, w1 +; CHECK-SD-NEXT: and w8, w8, w0 +; CHECK-SD-NEXT: and w8, w8, #0x80 +; CHECK-SD-NEXT: cmp w8, #1 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_i8_signbit_eq_with_nonzero: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w0, wzr +; CHECK-GI-NEXT: ret %t0 = shl i8 128, %y %t1 = and i8 %t0, %x %res = icmp eq i8 %t1, 1 ; should be comparing with 0 diff --git a/llvm/test/CodeGen/AArch64/signbit-test.ll b/llvm/test/CodeGen/AArch64/signbit-test.ll index c74a934ee09d8..298495bcf5a01 100644 --- a/llvm/test/CodeGen/AArch64/signbit-test.ll +++ b/llvm/test/CodeGen/AArch64/signbit-test.ll @@ -1,13 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s +; RUN: llc -mtriple=aarch64-- < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64-- -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI define i64 @test_clear_mask_i64_i32(i64 %x) nounwind { -; CHECK-LABEL: test_clear_mask_i64_i32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #42 // =0x2a -; CHECK-NEXT: cmn w0, #1 -; CHECK-NEXT: csel x0, x8, x0, gt -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_clear_mask_i64_i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: mov w8, #42 // =0x2a +; CHECK-SD-NEXT: cmn w0, #1 +; CHECK-SD-NEXT: csel x0, x8, x0, gt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_clear_mask_i64_i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w8, #42 // =0x2a +; CHECK-GI-NEXT: tst x0, #0x80000000 +; CHECK-GI-NEXT: csel x0, x8, x0, eq +; CHECK-GI-NEXT: ret entry: %a = and i64 %x, 2147483648 %r = icmp eq i64 %a, 0 diff --git a/llvm/test/CodeGen/AArch64/signed-truncation-check.ll b/llvm/test/CodeGen/AArch64/signed-truncation-check.ll index 7c80f9320faec..fc01c6b2c5471 100644 --- a/llvm/test/CodeGen/AArch64/signed-truncation-check.ll +++ b/llvm/test/CodeGen/AArch64/signed-truncation-check.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; https://bugs.llvm.org/show_bug.cgi?id=38149 @@ -19,13 +20,22 @@ ; ---------------------------------------------------------------------------- ; define i1 @shifts_eqcmp_i16_i8(i16 %x) nounwind { -; CHECK-LABEL: shifts_eqcmp_i16_i8: -; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: and w8, w8, #0xffff -; CHECK-NEXT: cmp w8, w0, uxth -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shifts_eqcmp_i16_i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sxtb w8, w0 +; CHECK-SD-NEXT: and w8, w8, #0xffff +; CHECK-SD-NEXT: cmp w8, w0, uxth +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shifts_eqcmp_i16_i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: lsl w8, w0, #8 +; CHECK-GI-NEXT: sbfx w8, w8, #8, #8 +; CHECK-GI-NEXT: and w8, w8, #0xffff +; CHECK-GI-NEXT: cmp w8, w0, uxth +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret %tmp0 = shl i16 %x, 8 ; 16-8 %tmp1 = ashr exact i16 %tmp0, 8 ; 16-8 %tmp2 = icmp eq i16 %tmp1, %x @@ -97,26 +107,43 @@ define i1 @shifts_eqcmp_i64_i8(i64 %x) nounwind { ; ---------------------------------------------------------------------------- ; define i1 @add_ugecmp_i16_i8(i16 %x) nounwind { -; CHECK-LABEL: add_ugecmp_i16_i8: -; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, #128 -; CHECK-NEXT: lsr w8, w8, #8 -; CHECK-NEXT: cmp w8, #254 -; CHECK-NEXT: cset w0, hi -; CHECK-NEXT: ret +; CHECK-SD-LABEL: add_ugecmp_i16_i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: and w8, w0, #0xffff +; CHECK-SD-NEXT: sub w8, w8, #128 +; CHECK-SD-NEXT: lsr w8, w8, #8 +; CHECK-SD-NEXT: cmp w8, #254 +; CHECK-SD-NEXT: cset w0, hi +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: add_ugecmp_i16_i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #-128 // =0xffffff80 +; CHECK-GI-NEXT: mov w9, #65280 // =0xff00 +; CHECK-GI-NEXT: add w8, w8, w0, uxth +; CHECK-GI-NEXT: cmp w8, w9 +; CHECK-GI-NEXT: cset w0, hs +; CHECK-GI-NEXT: ret %tmp0 = add i16 %x, -128 ; ~0U << (8-1) %tmp1 = icmp uge i16 %tmp0, -256 ; ~0U << 8 ret i1 %tmp1 } define i1 @add_ugecmp_i32_i16_i8(i16 %xx) nounwind { -; CHECK-LABEL: add_ugecmp_i32_i16_i8: -; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: cmp w8, w8, sxtb -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: add_ugecmp_i32_i16_i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: and w8, w0, #0xffff +; CHECK-SD-NEXT: cmp w8, w8, sxtb +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: add_ugecmp_i32_i16_i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #-128 // =0xffffff80 +; CHECK-GI-NEXT: add w8, w8, w0, uxth +; CHECK-GI-NEXT: cmn w8, #256 +; CHECK-GI-NEXT: cset w0, hs +; CHECK-GI-NEXT: ret %x = zext i16 %xx to i32 %tmp0 = add i32 %x, -128 ; ~0U << (8-1) %tmp1 = icmp uge i32 %tmp0, -256 ; ~0U << 8 @@ -124,55 +151,92 @@ define i1 @add_ugecmp_i32_i16_i8(i16 %xx) nounwind { } define i1 @add_ugecmp_i32_i16(i32 %x) nounwind { -; CHECK-LABEL: add_ugecmp_i32_i16: -; CHECK: // %bb.0: -; CHECK-NEXT: cmp w0, w0, sxth -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: add_ugecmp_i32_i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, w0, sxth +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: add_ugecmp_i32_i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sub w8, w0, #8, lsl #12 // =32768 +; CHECK-GI-NEXT: cmn w8, #16, lsl #12 // =65536 +; CHECK-GI-NEXT: cset w0, hs +; CHECK-GI-NEXT: ret %tmp0 = add i32 %x, -32768 ; ~0U << (16-1) %tmp1 = icmp uge i32 %tmp0, -65536 ; ~0U << 16 ret i1 %tmp1 } define i1 @add_ugecmp_i32_i8(i32 %x) nounwind { -; CHECK-LABEL: add_ugecmp_i32_i8: -; CHECK: // %bb.0: -; CHECK-NEXT: cmp w0, w0, sxtb -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: add_ugecmp_i32_i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, w0, sxtb +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: add_ugecmp_i32_i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sub w8, w0, #128 +; CHECK-GI-NEXT: cmn w8, #256 +; CHECK-GI-NEXT: cset w0, hs +; CHECK-GI-NEXT: ret %tmp0 = add i32 %x, -128 ; ~0U << (8-1) %tmp1 = icmp uge i32 %tmp0, -256 ; ~0U << 8 ret i1 %tmp1 } define i1 @add_ugecmp_i64_i32(i64 %x) nounwind { -; CHECK-LABEL: add_ugecmp_i64_i32: -; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, w0, sxtw -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: add_ugecmp_i64_i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp x0, w0, sxtw +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: add_ugecmp_i64_i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #-2147483648 // =0xffffffff80000000 +; CHECK-GI-NEXT: mov x9, #-4294967296 // =0xffffffff00000000 +; CHECK-GI-NEXT: add x8, x0, x8 +; CHECK-GI-NEXT: cmp x8, x9 +; CHECK-GI-NEXT: cset w0, hs +; CHECK-GI-NEXT: ret %tmp0 = add i64 %x, -2147483648 ; ~0U << (32-1) %tmp1 = icmp uge i64 %tmp0, -4294967296 ; ~0U << 32 ret i1 %tmp1 } define i1 @add_ugecmp_i64_i16(i64 %x) nounwind { -; CHECK-LABEL: add_ugecmp_i64_i16: -; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, w0, sxth -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: add_ugecmp_i64_i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp x0, w0, sxth +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: add_ugecmp_i64_i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sub x8, x0, #8, lsl #12 // =32768 +; CHECK-GI-NEXT: cmn x8, #16, lsl #12 // =65536 +; CHECK-GI-NEXT: cset w0, hs +; CHECK-GI-NEXT: ret %tmp0 = add i64 %x, -32768 ; ~0U << (16-1) %tmp1 = icmp uge i64 %tmp0, -65536 ; ~0U << 16 ret i1 %tmp1 } define i1 @add_ugecmp_i64_i8(i64 %x) nounwind { -; CHECK-LABEL: add_ugecmp_i64_i8: -; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, w0, sxtb -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: add_ugecmp_i64_i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp x0, w0, sxtb +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: add_ugecmp_i64_i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sub x8, x0, #128 +; CHECK-GI-NEXT: cmn x8, #256 +; CHECK-GI-NEXT: cset w0, hs +; CHECK-GI-NEXT: ret %tmp0 = add i64 %x, -128 ; ~0U << (8-1) %tmp1 = icmp uge i64 %tmp0, -256 ; ~0U << 8 ret i1 %tmp1 @@ -180,14 +244,23 @@ define i1 @add_ugecmp_i64_i8(i64 %x) nounwind { ; Slightly more canonical variant define i1 @add_ugtcmp_i16_i8(i16 %x) nounwind { -; CHECK-LABEL: add_ugtcmp_i16_i8: -; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, #128 -; CHECK-NEXT: lsr w8, w8, #8 -; CHECK-NEXT: cmp w8, #254 -; CHECK-NEXT: cset w0, hi -; CHECK-NEXT: ret +; CHECK-SD-LABEL: add_ugtcmp_i16_i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: and w8, w0, #0xffff +; CHECK-SD-NEXT: sub w8, w8, #128 +; CHECK-SD-NEXT: lsr w8, w8, #8 +; CHECK-SD-NEXT: cmp w8, #254 +; CHECK-SD-NEXT: cset w0, hi +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: add_ugtcmp_i16_i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #-128 // =0xffffff80 +; CHECK-GI-NEXT: mov w9, #65279 // =0xfeff +; CHECK-GI-NEXT: add w8, w8, w0, uxth +; CHECK-GI-NEXT: cmp w8, w9 +; CHECK-GI-NEXT: cset w0, hi +; CHECK-GI-NEXT: ret %tmp0 = add i16 %x, -128 ; ~0U << (8-1) %tmp1 = icmp ugt i16 %tmp0, -257 ; ~0U << 8 - 1 ret i1 %tmp1 @@ -198,68 +271,113 @@ define i1 @add_ugtcmp_i16_i8(i16 %x) nounwind { ; ---------------------------------------------------------------------------- ; define i1 @add_ultcmp_i16_i8(i16 %x) nounwind { -; CHECK-LABEL: add_ultcmp_i16_i8: -; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: and w8, w8, #0xffff -; CHECK-NEXT: cmp w8, w0, uxth -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: add_ultcmp_i16_i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sxtb w8, w0 +; CHECK-SD-NEXT: and w8, w8, #0xffff +; CHECK-SD-NEXT: cmp w8, w0, uxth +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: add_ultcmp_i16_i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: add w8, w0, #128 +; CHECK-GI-NEXT: and w8, w8, #0xffff +; CHECK-GI-NEXT: cmp w8, #256 +; CHECK-GI-NEXT: cset w0, lo +; CHECK-GI-NEXT: ret %tmp0 = add i16 %x, 128 ; 1U << (8-1) %tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8 ret i1 %tmp1 } define i1 @add_ultcmp_i32_i16(i32 %x) nounwind { -; CHECK-LABEL: add_ultcmp_i32_i16: -; CHECK: // %bb.0: -; CHECK-NEXT: cmp w0, w0, sxth -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: add_ultcmp_i32_i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, w0, sxth +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: add_ultcmp_i32_i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: add w8, w0, #8, lsl #12 // =32768 +; CHECK-GI-NEXT: cmp w8, #16, lsl #12 // =65536 +; CHECK-GI-NEXT: cset w0, lo +; CHECK-GI-NEXT: ret %tmp0 = add i32 %x, 32768 ; 1U << (16-1) %tmp1 = icmp ult i32 %tmp0, 65536 ; 1U << 16 ret i1 %tmp1 } define i1 @add_ultcmp_i32_i8(i32 %x) nounwind { -; CHECK-LABEL: add_ultcmp_i32_i8: -; CHECK: // %bb.0: -; CHECK-NEXT: cmp w0, w0, sxtb -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: add_ultcmp_i32_i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, w0, sxtb +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: add_ultcmp_i32_i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: add w8, w0, #128 +; CHECK-GI-NEXT: cmp w8, #256 +; CHECK-GI-NEXT: cset w0, lo +; CHECK-GI-NEXT: ret %tmp0 = add i32 %x, 128 ; 1U << (8-1) %tmp1 = icmp ult i32 %tmp0, 256 ; 1U << 8 ret i1 %tmp1 } define i1 @add_ultcmp_i64_i32(i64 %x) nounwind { -; CHECK-LABEL: add_ultcmp_i64_i32: -; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, w0, sxtw -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: add_ultcmp_i64_i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp x0, w0, sxtw +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: add_ultcmp_i64_i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #-2147483648 // =0x80000000 +; CHECK-GI-NEXT: mov x9, #4294967296 // =0x100000000 +; CHECK-GI-NEXT: add x8, x0, x8 +; CHECK-GI-NEXT: cmp x8, x9 +; CHECK-GI-NEXT: cset w0, lo +; CHECK-GI-NEXT: ret %tmp0 = add i64 %x, 2147483648 ; 1U << (32-1) %tmp1 = icmp ult i64 %tmp0, 4294967296 ; 1U << 32 ret i1 %tmp1 } define i1 @add_ultcmp_i64_i16(i64 %x) nounwind { -; CHECK-LABEL: add_ultcmp_i64_i16: -; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, w0, sxth -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: add_ultcmp_i64_i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp x0, w0, sxth +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: add_ultcmp_i64_i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: add x8, x0, #8, lsl #12 // =32768 +; CHECK-GI-NEXT: cmp x8, #16, lsl #12 // =65536 +; CHECK-GI-NEXT: cset w0, lo +; CHECK-GI-NEXT: ret %tmp0 = add i64 %x, 32768 ; 1U << (16-1) %tmp1 = icmp ult i64 %tmp0, 65536 ; 1U << 16 ret i1 %tmp1 } define i1 @add_ultcmp_i64_i8(i64 %x) nounwind { -; CHECK-LABEL: add_ultcmp_i64_i8: -; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, w0, sxtb -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: add_ultcmp_i64_i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp x0, w0, sxtb +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: add_ultcmp_i64_i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: add x8, x0, #128 +; CHECK-GI-NEXT: cmp x8, #256 +; CHECK-GI-NEXT: cset w0, lo +; CHECK-GI-NEXT: ret %tmp0 = add i64 %x, 128 ; 1U << (8-1) %tmp1 = icmp ult i64 %tmp0, 256 ; 1U << 8 ret i1 %tmp1 @@ -267,13 +385,21 @@ define i1 @add_ultcmp_i64_i8(i64 %x) nounwind { ; Slightly more canonical variant define i1 @add_ulecmp_i16_i8(i16 %x) nounwind { -; CHECK-LABEL: add_ulecmp_i16_i8: -; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: and w8, w8, #0xffff -; CHECK-NEXT: cmp w8, w0, uxth -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: add_ulecmp_i16_i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sxtb w8, w0 +; CHECK-SD-NEXT: and w8, w8, #0xffff +; CHECK-SD-NEXT: cmp w8, w0, uxth +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: add_ulecmp_i16_i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: add w8, w0, #128 +; CHECK-GI-NEXT: and w8, w8, #0xffff +; CHECK-GI-NEXT: cmp w8, #255 +; CHECK-GI-NEXT: cset w0, ls +; CHECK-GI-NEXT: ret %tmp0 = add i16 %x, 128 ; 1U << (8-1) %tmp1 = icmp ule i16 %tmp0, 255 ; (1U << 8) - 1 ret i1 %tmp1 @@ -284,12 +410,20 @@ define i1 @add_ulecmp_i16_i8(i16 %x) nounwind { ; Adding not a constant define i1 @add_ultcmp_bad_i16_i8_add(i16 %x, i16 %y) nounwind { -; CHECK-LABEL: add_ultcmp_bad_i16_i8_add: -; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: tst w8, #0xff00 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: add_ultcmp_bad_i16_i8_add: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: add w8, w0, w1 +; CHECK-SD-NEXT: tst w8, #0xff00 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: add_ultcmp_bad_i16_i8_add: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: add w8, w0, w1 +; CHECK-GI-NEXT: and w8, w8, #0xffff +; CHECK-GI-NEXT: cmp w8, #256 +; CHECK-GI-NEXT: cset w0, lo +; CHECK-GI-NEXT: ret %tmp0 = add i16 %x, %y %tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8 ret i1 %tmp1 @@ -311,12 +445,20 @@ define i1 @add_ultcmp_bad_i16_i8_cmp(i16 %x, i16 %y) nounwind { ; Second constant is not larger than the first one define i1 @add_ultcmp_bad_i8_i16(i16 %x) nounwind { -; CHECK-LABEL: add_ultcmp_bad_i8_i16: -; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: add w8, w8, #128 -; CHECK-NEXT: lsr w0, w8, #16 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: add_ultcmp_bad_i8_i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: and w8, w0, #0xffff +; CHECK-SD-NEXT: add w8, w8, #128 +; CHECK-SD-NEXT: lsr w0, w8, #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: add_ultcmp_bad_i8_i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: and w8, w0, #0xffff +; CHECK-GI-NEXT: add w8, w8, #128 +; CHECK-GI-NEXT: cmp w8, w8, uxth +; CHECK-GI-NEXT: cset w0, ne +; CHECK-GI-NEXT: ret %tmp0 = add i16 %x, 128 ; 1U << (8-1) %tmp1 = icmp ult i16 %tmp0, 128 ; 1U << (8-1) ret i1 %tmp1 @@ -324,12 +466,20 @@ define i1 @add_ultcmp_bad_i8_i16(i16 %x) nounwind { ; First constant is not power of two define i1 @add_ultcmp_bad_i16_i8_c0notpoweroftwo(i16 %x) nounwind { -; CHECK-LABEL: add_ultcmp_bad_i16_i8_c0notpoweroftwo: -; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, #192 -; CHECK-NEXT: tst w8, #0xff00 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: add_ultcmp_bad_i16_i8_c0notpoweroftwo: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: add w8, w0, #192 +; CHECK-SD-NEXT: tst w8, #0xff00 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: add_ultcmp_bad_i16_i8_c0notpoweroftwo: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: add w8, w0, #192 +; CHECK-GI-NEXT: and w8, w8, #0xffff +; CHECK-GI-NEXT: cmp w8, #256 +; CHECK-GI-NEXT: cset w0, lo +; CHECK-GI-NEXT: ret %tmp0 = add i16 %x, 192 ; (1U << (8-1)) + (1U << (8-1-1)) %tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8 ret i1 %tmp1 @@ -351,12 +501,20 @@ define i1 @add_ultcmp_bad_i16_i8_c1notpoweroftwo(i16 %x) nounwind { ; Magic check fails, 64 << 1 != 256 define i1 @add_ultcmp_bad_i16_i8_magic(i16 %x) nounwind { -; CHECK-LABEL: add_ultcmp_bad_i16_i8_magic: -; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, #64 -; CHECK-NEXT: tst w8, #0xff00 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: add_ultcmp_bad_i16_i8_magic: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: add w8, w0, #64 +; CHECK-SD-NEXT: tst w8, #0xff00 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: add_ultcmp_bad_i16_i8_magic: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: add w8, w0, #64 +; CHECK-GI-NEXT: and w8, w8, #0xffff +; CHECK-GI-NEXT: cmp w8, #256 +; CHECK-GI-NEXT: cset w0, lo +; CHECK-GI-NEXT: ret %tmp0 = add i16 %x, 64 ; 1U << (8-1-1) %tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8 ret i1 %tmp1 @@ -364,12 +522,20 @@ define i1 @add_ultcmp_bad_i16_i8_magic(i16 %x) nounwind { ; Bad 'destination type' define i1 @add_ultcmp_bad_i16_i4(i16 %x) nounwind { -; CHECK-LABEL: add_ultcmp_bad_i16_i4: -; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, #8 -; CHECK-NEXT: tst w8, #0xfff0 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: add_ultcmp_bad_i16_i4: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: add w8, w0, #8 +; CHECK-SD-NEXT: tst w8, #0xfff0 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: add_ultcmp_bad_i16_i4: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: add w8, w0, #8 +; CHECK-GI-NEXT: and w8, w8, #0xffff +; CHECK-GI-NEXT: cmp w8, #16 +; CHECK-GI-NEXT: cset w0, lo +; CHECK-GI-NEXT: ret %tmp0 = add i16 %x, 8 ; 1U << (4-1) %tmp1 = icmp ult i16 %tmp0, 16 ; 1U << 4 ret i1 %tmp1 @@ -377,12 +543,20 @@ define i1 @add_ultcmp_bad_i16_i4(i16 %x) nounwind { ; Bad storage type define i1 @add_ultcmp_bad_i24_i8(i24 %x) nounwind { -; CHECK-LABEL: add_ultcmp_bad_i24_i8: -; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, #128 -; CHECK-NEXT: tst w8, #0xffff00 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: add_ultcmp_bad_i24_i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: add w8, w0, #128 +; CHECK-SD-NEXT: tst w8, #0xffff00 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: add_ultcmp_bad_i24_i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: add w8, w0, #128 +; CHECK-GI-NEXT: and w8, w8, #0xffffff +; CHECK-GI-NEXT: cmp w8, #256 +; CHECK-GI-NEXT: cset w0, lo +; CHECK-GI-NEXT: ret %tmp0 = add i24 %x, 128 ; 1U << (8-1) %tmp1 = icmp ult i24 %tmp0, 256 ; 1U << 8 ret i1 %tmp1 From 30579c0708660cd25de7b82b624ddff5601f03b0 Mon Sep 17 00:00:00 2001 From: Orlando Cazalet-Hyams Date: Thu, 30 Oct 2025 09:08:43 +0000 Subject: [PATCH 08/21] [DebugInfo] Add bit size to _BitInt name in debug info (#165583) Follow on from #164372 This changes the DW_AT_name for `_BitInt(N)` from `_BitInt` to `_BitInt(N)` --- clang/lib/CodeGen/CGDebugInfo.cpp | 5 ++++- clang/test/DebugInfo/Generic/bit-int.c | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 07a2cfb21bef2..fd2f6dcf182b5 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -1174,7 +1174,10 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { } llvm::DIType *CGDebugInfo::CreateType(const BitIntType *Ty) { - StringRef Name = Ty->isUnsigned() ? "unsigned _BitInt" : "_BitInt"; + SmallString<32> Name; + llvm::raw_svector_ostream OS(Name); + OS << (Ty->isUnsigned() ? "unsigned _BitInt(" : "_BitInt(") + << Ty->getNumBits() << ")"; llvm::dwarf::TypeKind Encoding = Ty->isUnsigned() ? llvm::dwarf::DW_ATE_unsigned : llvm::dwarf::DW_ATE_signed; diff --git a/clang/test/DebugInfo/Generic/bit-int.c b/clang/test/DebugInfo/Generic/bit-int.c index 94b93013e3b46..88ecc139eee9f 100644 --- a/clang/test/DebugInfo/Generic/bit-int.c +++ b/clang/test/DebugInfo/Generic/bit-int.c @@ -4,5 +4,5 @@ unsigned _BitInt(17) a; _BitInt(2) b; -// CHECK: !DIBasicType(name: "_BitInt", size: 8, dataSize: 2, encoding: DW_ATE_signed) -// CHECK: !DIBasicType(name: "unsigned _BitInt", size: 32, dataSize: 17, encoding: DW_ATE_unsigned) +// CHECK: !DIBasicType(name: "_BitInt(2)", size: 8, dataSize: 2, encoding: DW_ATE_signed) +// CHECK: !DIBasicType(name: "unsigned _BitInt(17)", size: 32, dataSize: 17, encoding: DW_ATE_unsigned) From 8f624815bf7a85768aed48dab8047a3465c8f2ed Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 30 Oct 2025 10:23:40 +0100 Subject: [PATCH 09/21] [MemCpyOpt] Allow stack move optimization if one address captured (#165527) Allow the stack move optimization (which merges two allocas) when the address of only one alloca is captured (and the provenance is not captured). Both addresses need to be captured to observe that the allocas were merged. Fixes https://github.com/llvm/llvm-project/issues/165484. --- .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 20 +++++-- llvm/test/Transforms/MemCpyOpt/stack-move.ll | 58 +++++++++++++++++++ 2 files changed, 73 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index e043d072a7638..08be5df9872b7 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1534,8 +1534,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, bool SrcNotDom = false; auto CaptureTrackingWithModRef = - [&](Instruction *AI, - function_ref ModRefCallback) -> bool { + [&](Instruction *AI, function_ref ModRefCallback, + bool &AddressCaptured) -> bool { SmallVector Worklist; Worklist.push_back(AI); unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking(); @@ -1559,8 +1559,9 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, if (!Visited.insert(&U).second) continue; UseCaptureInfo CI = DetermineUseCaptureKind(U, AI); - if (capturesAnything(CI.UseCC)) + if (capturesAnyProvenance(CI.UseCC)) return false; + AddressCaptured |= capturesAddress(CI.UseCC); if (UI->mayReadOrWriteMemory()) { if (UI->isLifetimeStartOrEnd()) { @@ -1627,7 +1628,9 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, return true; }; - if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback)) + bool DestAddressCaptured = false; + if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback, + DestAddressCaptured)) return false; // Bailout if Dest may have any ModRef before Store. if (!ReachabilityWorklist.empty() && @@ -1653,7 +1656,14 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, return true; }; - if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback)) + bool SrcAddressCaptured = false; + if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback, + SrcAddressCaptured)) + return false; + + // If both the source and destination address are captured, the fact that they + // are no longer two separate allocations may be observed. + if (DestAddressCaptured && SrcAddressCaptured) return false; // We can do the transformation. First, move the SrcAlloca to the start of the diff --git a/llvm/test/Transforms/MemCpyOpt/stack-move.ll b/llvm/test/Transforms/MemCpyOpt/stack-move.ll index 940e30ec46881..0c2e05fa8fed6 100644 --- a/llvm/test/Transforms/MemCpyOpt/stack-move.ll +++ b/llvm/test/Transforms/MemCpyOpt/stack-move.ll @@ -1729,3 +1729,61 @@ define i32 @test_ret_only_capture() { %v = load i32, ptr %a ret i32 %v } + +declare ptr @captures_address_only(ptr captures(address)) + +; Can transform: Only one address captured. +define void @test_captures_address_captures_none() { +; CHECK-LABEL: define void @test_captures_address_captures_none() { +; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 +; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 +; CHECK-NEXT: call void @captures_address_only(ptr [[SRC]]) +; CHECK-NEXT: call void @use_nocapture(ptr [[SRC]]) +; CHECK-NEXT: ret void +; + %src = alloca %struct.Foo, align 4 + %dst = alloca %struct.Foo, align 4 + store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src + call void @captures_address_only(ptr %src) + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dst, ptr align 4 %src, i64 12, i1 false) + call void @use_nocapture(ptr %dst) + ret void +} + +; Can transform: Only one address captured. +define void @test_captures_none_and_captures_address() { +; CHECK-LABEL: define void @test_captures_none_and_captures_address() { +; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 +; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 +; CHECK-NEXT: call void @use_nocapture(ptr [[SRC]]) +; CHECK-NEXT: call void @captures_address_only(ptr [[SRC]]) +; CHECK-NEXT: ret void +; + %src = alloca %struct.Foo, align 4 + %dst = alloca %struct.Foo, align 4 + store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src + call void @use_nocapture(ptr %src) + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dst, ptr align 4 %src, i64 12, i1 false) + call void @captures_address_only(ptr %dst) + ret void +} + +; Cannot transform: Both addresses captured. +define void @test_captures_address_and_captures_address() { +; CHECK-LABEL: define void @test_captures_address_and_captures_address() { +; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 +; CHECK-NEXT: [[DST:%.*]] = alloca [[STRUCT_FOO]], align 4 +; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 +; CHECK-NEXT: call void @captures_address_only(ptr [[SRC]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DST]], ptr align 4 [[SRC]], i64 12, i1 false) +; CHECK-NEXT: call void @captures_address_only(ptr [[DST]]) +; CHECK-NEXT: ret void +; + %src = alloca %struct.Foo, align 4 + %dst = alloca %struct.Foo, align 4 + store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src + call void @captures_address_only(ptr %src) + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dst, ptr align 4 %src, i64 12, i1 false) + call void @captures_address_only(ptr %dst) + ret void +} From 43ea75dd89e71c5773a11aba9d581e6b5292eab7 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 30 Oct 2025 10:24:19 +0100 Subject: [PATCH 10/21] [DeveloperPolicy] Add guidelines for adding/enabling passes (#158591) This documents two things: * The recommended way to go about adding a new pass. * The criteria for enabling a pass. RFC: https://discourse.llvm.org/t/rfc-guidelines-for-adding-enabling-new-passes/88290 --- llvm/docs/DeveloperPolicy.rst | 49 +++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/llvm/docs/DeveloperPolicy.rst b/llvm/docs/DeveloperPolicy.rst index 45f2df20984e6..9135406c2e2a1 100644 --- a/llvm/docs/DeveloperPolicy.rst +++ b/llvm/docs/DeveloperPolicy.rst @@ -1189,6 +1189,55 @@ Suggested disclaimer for the project README and the main project web page: necessarily a reflection of the completeness or stability of the code, it does indicate that the project is not yet endorsed as a component of LLVM. +Adding or enabling a new LLVM pass +---------------------------------- + +The guidelines here are primarily targeted at the enablement of new major +passes in the target-independent optimization pipeline. Small additions, or +backend-specific passes, require a lesser degree of care. Before creating a new +pass, consider whether the functionality can be integrated into an existing +pass first. This is often both faster and more powerful. + +When adding a new pass, the goal should be to enable it as part of the default +optimization pipeline as early as possible and then continue development +incrementally. (This does not apply to passes that are only relevant for +specific uses of LLVM, such as GC support passes.) + +The recommended workflow is: + +1. Implement a basic version of the pass and add it to the pass pipeline behind + a flag that is disabled by default. The initial version should focus on + handling simple cases correctly and efficiently. +2. Enable the pass by default. Separating this step allows easily disabling the + pass if issues are encountered, without having to revert the entire + implementation. +3. Incrementally extend the pass with new functionality. As the pass is already + enabled, it becomes easier to identify the specific change that has caused a + regression in correctness, optimization quality or compile-time. + +When enabling a pass, certain requirements must be met (in no particular order): + + * **Maintenance:** The pass (and any analyses it depends on) must have at + least one maintainer. + * **Usefulness:** There should be evidence that the pass improves performance + (or whatever metric it optimizes for) on real-world workloads. Improvements + seen only on synthetic benchmarks may be insufficient. + * **Compile-Time:** The pass should not have a large impact on compile-time, + where the evaluation of what "large" means is up to reviewer discretion, and + may differ based on the value the pass provides. In any case, it is expected + that a concerted effort has been made to mitigate the compile-time impact, + both for the average case, and for pathological cases. + * **Correctness:** The pass should have no known correctness issues (except + global correctness issues that affect all of LLVM). If an old pass is being + enabled (rather than implementing a new one incrementally), additional due + diligence is required. The pass should be fully reviewed to ensure that it + still complies with current quality standards. Fuzzing with disabled + profitability checks may help gain additional confidence in the + implementation. + +If non-trivial issues are found in a newly enabled pass, it may be temporarily +disabled again, until the issues have been resolved. + .. _copyright-license-patents: Copyright, License, and Patents From bb1158f14a72f6baca18773748b55776c16a7830 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Thu, 30 Oct 2025 10:34:41 +0100 Subject: [PATCH 11/21] [libc++] Fix LLVM 22 TODOs (#153367) We've upgraded to LLVM 22 now, so we can remove a bunch of TODOs. --- libcxx/include/__config | 11 +------- libcxx/include/__configuration/abi.h | 8 ------ libcxx/include/__format/format_arg.h | 17 +++++------- libcxx/include/__format/format_context.h | 4 +-- libcxx/include/__hash_table | 12 --------- libcxx/include/__iterator/concepts.h | 15 +++++------ libcxx/include/__math/traits.h | 15 +++-------- libcxx/include/__ranges/transform_view.h | 3 +-- libcxx/include/__tree | 12 --------- .../reference_constructs_from_temporary.h | 6 ----- libcxx/include/forward_list | 11 -------- libcxx/include/list | 11 -------- libcxx/include/tuple | 4 +-- libcxx/include/variant | 2 +- .../meta/is_referenceable.compile.pass.cpp | 2 +- .../c.math/constexpr-cxx23-clang.pass.cpp | 7 ----- .../transform_error.mandates.verify.cpp | 27 ++++++------------- .../transform_error.mandates.verify.cpp | 27 ++++--------------- .../format.arg/visit.pass.cpp | 2 -- .../format.arg/visit.return_type.pass.cpp | 2 -- .../visit_format_arg.deprecated.verify.cpp | 1 - .../format.arg/visit_format_arg.pass.cpp | 6 ++--- .../format.arguments/format.args/get.pass.cpp | 6 ++--- ...855_tuple_ref_binding_diagnostics.pass.cpp | 22 --------------- .../robust_against_adl.pass.cpp | 1 - .../variant.visit.member/visit.pass.cpp | 2 -- .../visit_return_type.pass.cpp | 2 -- libcxx/test/support/test_basic_format_arg.h | 2 +- libcxx/test/support/test_macros.h | 7 ----- 29 files changed, 44 insertions(+), 203 deletions(-) diff --git a/libcxx/include/__config b/libcxx/include/__config index b4c081dcdff1b..357f77b7d27d6 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -1050,8 +1050,7 @@ typedef __char32_t char32_t; # define _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(_ClassName) static_assert(true, "") # endif -// TODO(LLVM 22): Remove the workaround -# if defined(__OBJC__) && (!defined(_LIBCPP_CLANG_VER) || _LIBCPP_CLANG_VER < 2001) +# if defined(__OBJC__) && defined(_LIBCPP_APPLE_CLANG_VER) # define _LIBCPP_WORKAROUND_OBJCXX_COMPILER_INTRINSICS # endif @@ -1255,14 +1254,6 @@ typedef __char32_t char32_t; # define _LIBCPP_DIAGNOSE_NULLPTR # endif -// TODO(LLVM 22): Remove this macro once LLVM19 support ends. __cpp_explicit_this_parameter has been set in LLVM20. -// Clang-18 has support for deducing this, but it does not set the FTM. -# if defined(__cpp_explicit_this_parameter) || (defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER >= 1800) -# define _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER 1 -# else -# define _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER 0 -# endif - #endif // __cplusplus #endif // _LIBCPP___CONFIG diff --git a/libcxx/include/__configuration/abi.h b/libcxx/include/__configuration/abi.h index c9936df30ff7f..38b85c6ac70de 100644 --- a/libcxx/include/__configuration/abi.h +++ b/libcxx/include/__configuration/abi.h @@ -61,14 +61,6 @@ // According to the Standard, `bitset::operator[] const` returns bool # define _LIBCPP_ABI_BITSET_VECTOR_BOOL_CONST_SUBSCRIPT_RETURN_BOOL -// In LLVM 20, we've changed to take these ABI breaks unconditionally. These flags only exist in case someone is running -// into the static_asserts we added to catch the ABI break and don't care that it is one. -// TODO(LLVM 22): Remove these flags -# define _LIBCPP_ABI_LIST_REMOVE_NODE_POINTER_UB -# define _LIBCPP_ABI_TREE_REMOVE_NODE_POINTER_UB -# define _LIBCPP_ABI_FIX_UNORDERED_NODE_POINTER_UB -# define _LIBCPP_ABI_FORWARD_LIST_REMOVE_NODE_POINTER_UB - // These flags are documented in ABIGuarantees.rst # define _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT # define _LIBCPP_ABI_DO_NOT_EXPORT_BASIC_STRING_COMMON diff --git a/libcxx/include/__format/format_arg.h b/libcxx/include/__format/format_arg.h index ed5e76275ea87..19794f0f084ce 100644 --- a/libcxx/include/__format/format_arg.h +++ b/libcxx/include/__format/format_arg.h @@ -149,7 +149,7 @@ _LIBCPP_HIDE_FROM_ABI decltype(auto) __visit_format_arg(_Visitor&& __vis, basic_ __libcpp_unreachable(); } -# if _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER +# if _LIBCPP_STD_VER >= 26 template _LIBCPP_HIDE_FROM_ABI _Rp __visit_format_arg(_Visitor&& __vis, basic_format_arg<_Context> __arg) { @@ -200,7 +200,7 @@ _LIBCPP_HIDE_FROM_ABI _Rp __visit_format_arg(_Visitor&& __vis, basic_format_arg< __libcpp_unreachable(); } -# endif // _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER +# endif // _LIBCPP_STD_VER >= 26 /// Contains the values used in basic_format_arg. /// @@ -285,7 +285,7 @@ class _LIBCPP_NO_SPECIALIZATIONS basic_format_arg { _LIBCPP_HIDE_FROM_ABI explicit operator bool() const noexcept { return __type_ != __format::__arg_t::__none; } -# if _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER +# if _LIBCPP_STD_VER >= 26 // This function is user facing, so it must wrap the non-standard types of // the "variant" in a handle to stay conforming. See __arg_t for more details. @@ -329,7 +329,7 @@ class _LIBCPP_NO_SPECIALIZATIONS basic_format_arg { } } -# endif // _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER +# endif // _LIBCPP_STD_VER >= 26 private: using char_type = typename _Context::char_type; @@ -371,11 +371,8 @@ class basic_format_arg<_Context>::handle { // This function is user facing, so it must wrap the non-standard types of // the "variant" in a handle to stay conforming. See __arg_t for more details. template -# if _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER -_LIBCPP_DEPRECATED_IN_CXX26 -# endif - _LIBCPP_HIDE_FROM_ABI decltype(auto) - visit_format_arg(_Visitor&& __vis, basic_format_arg<_Context> __arg) { +_LIBCPP_DEPRECATED_IN_CXX26 _LIBCPP_HIDE_FROM_ABI decltype(auto) +visit_format_arg(_Visitor&& __vis, basic_format_arg<_Context> __arg) { switch (__arg.__type_) { # if _LIBCPP_HAS_INT128 case __format::__arg_t::__i128: { @@ -387,7 +384,7 @@ _LIBCPP_DEPRECATED_IN_CXX26 typename __basic_format_arg_value<_Context>::__handle __h{__arg.__value_.__u128_}; return std::invoke(std::forward<_Visitor>(__vis), typename basic_format_arg<_Context>::handle{__h}); } -# endif // _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER +# endif // _LIBCPP_HAS_INT128 default: return std::__visit_format_arg(std::forward<_Visitor>(__vis), __arg); } diff --git a/libcxx/include/__format/format_context.h b/libcxx/include/__format/format_context.h index e672ee7ad0581..1771dd34b82fb 100644 --- a/libcxx/include/__format/format_context.h +++ b/libcxx/include/__format/format_context.h @@ -175,13 +175,13 @@ class basic_format_context::__itera __format::__determine_arg_t(), __basic_format_arg_value(__arg)}; }; -# if _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER +# if _LIBCPP_STD_VER >= 26 return static_cast<_Context*>(__c)->arg(__id).visit(std::move(__visitor)); # else _LIBCPP_SUPPRESS_DEPRECATED_PUSH return std::visit_format_arg(std::move(__visitor), static_cast<_Context*>(__c)->arg(__id)); _LIBCPP_SUPPRESS_DEPRECATED_POP -# endif // _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER +# endif // _LIBCPP_STD_VER >= 26 }) { } diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table index 5432abb4ab39d..e1897949a47e6 100644 --- a/libcxx/include/__hash_table +++ b/libcxx/include/__hash_table @@ -83,18 +83,6 @@ struct __hash_node_base { typedef _NodePtr __node_pointer; typedef __node_base_pointer __next_pointer; -// TODO(LLVM 22): Remove this check -#ifndef _LIBCPP_ABI_FIX_UNORDERED_NODE_POINTER_UB - static_assert(sizeof(__node_base_pointer) == sizeof(__node_pointer) && _LIBCPP_ALIGNOF(__node_base_pointer) == - _LIBCPP_ALIGNOF(__node_pointer), - "It looks like you are using std::__hash_table (an implementation detail for the unordered containers) " - "with a fancy pointer type that thas a different representation depending on whether it points to a " - "__hash_table base pointer or a __hash_table node pointer (both of which are implementation details of " - "the standard library). This means that your ABI is being broken between LLVM 19 and LLVM 20. If you " - "don't care about your ABI being broken, define the _LIBCPP_ABI_TREE_REMOVE_NODE_POINTER_UB macro to " - "silence this diagnostic."); -#endif - __next_pointer __next_; _LIBCPP_HIDE_FROM_ABI __next_pointer __ptr() _NOEXCEPT { diff --git a/libcxx/include/__iterator/concepts.h b/libcxx/include/__iterator/concepts.h index f38688734b38a..3b43920443636 100644 --- a/libcxx/include/__iterator/concepts.h +++ b/libcxx/include/__iterator/concepts.h @@ -117,15 +117,12 @@ template concept __signed_integer_like = signed_integral<_Tp>; template -concept weakly_incrementable = - // TODO: remove this once the clang bug is fixed (https://llvm.org/PR48173). - !same_as<_Ip, bool> && // Currently, clang does not handle bool correctly. - movable<_Ip> && requires(_Ip __i) { - typename iter_difference_t<_Ip>; - requires __signed_integer_like>; - { ++__i } -> same_as<_Ip&>; // not required to be equality-preserving - __i++; // not required to be equality-preserving - }; +concept weakly_incrementable = movable<_Ip> && requires(_Ip __i) { + typename iter_difference_t<_Ip>; + requires __signed_integer_like>; + { ++__i } -> same_as<_Ip&>; // not required to be equality-preserving + __i++; // not required to be equality-preserving +}; // [iterator.concept.inc] template diff --git a/libcxx/include/__math/traits.h b/libcxx/include/__math/traits.h index 00db2a8289fb3..ff22cee7305d7 100644 --- a/libcxx/include/__math/traits.h +++ b/libcxx/include/__math/traits.h @@ -25,33 +25,26 @@ namespace __math { // signbit -// TODO(LLVM 22): Remove conditional once support for Clang 19 is dropped. -#if defined(_LIBCPP_COMPILER_GCC) || __has_constexpr_builtin(__builtin_signbit) -# define _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_CONSTEXPR_SINCE_CXX23 -#else -# define _LIBCPP_SIGNBIT_CONSTEXPR -#endif - // The universal C runtime (UCRT) in the WinSDK provides floating point overloads // for std::signbit(). By defining our overloads as templates, we can work around // this issue as templates are less preferred than non-template functions. template -[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(float __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool signbit(float __x) _NOEXCEPT { return __builtin_signbit(__x); } template -[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool signbit(double __x) _NOEXCEPT { return __builtin_signbit(__x); } template -[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(long double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool signbit(long double __x) _NOEXCEPT { return __builtin_signbit(__x); } template ::value, int> = 0> -[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(_A1 __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool signbit(_A1 __x) _NOEXCEPT { return __x < 0; } diff --git a/libcxx/include/__ranges/transform_view.h b/libcxx/include/__ranges/transform_view.h index ae85dfa452d72..ab1adf9cdbe68 100644 --- a/libcxx/include/__ranges/transform_view.h +++ b/libcxx/include/__ranges/transform_view.h @@ -13,7 +13,6 @@ #include <__compare/three_way_comparable.h> #include <__concepts/constructible.h> #include <__concepts/convertible_to.h> -#include <__concepts/copyable.h> #include <__concepts/derived_from.h> #include <__concepts/equality_comparable.h> #include <__concepts/invocable.h> @@ -64,7 +63,7 @@ concept __regular_invocable_with_range_ref = regular_invocable<_Fn, range_refere template concept __transform_view_constraints = view<_View> && is_object_v<_Fn> && regular_invocable<_Fn&, range_reference_t<_View>> && - __is_referenceable_v>>; + __referenceable>>; # if _LIBCPP_STD_VER >= 23 template diff --git a/libcxx/include/__tree b/libcxx/include/__tree index 0738c8c6a5e2b..694796922c914 100644 --- a/libcxx/include/__tree +++ b/libcxx/include/__tree @@ -823,18 +823,6 @@ public: using __node_allocator _LIBCPP_NODEBUG = __rebind_alloc<__alloc_traits, __node>; using __node_traits _LIBCPP_NODEBUG = allocator_traits<__node_allocator>; -// TODO(LLVM 22): Remove this check -#ifndef _LIBCPP_ABI_TREE_REMOVE_NODE_POINTER_UB - static_assert(sizeof(__node_base_pointer) == sizeof(__end_node_pointer) && _LIBCPP_ALIGNOF(__node_base_pointer) == - _LIBCPP_ALIGNOF(__end_node_pointer), - "It looks like you are using std::__tree (an implementation detail for (multi)map/set) with a fancy " - "pointer type that thas a different representation depending on whether it points to a __tree base " - "pointer or a __tree node pointer (both of which are implementation details of the standard library). " - "This means that your ABI is being broken between LLVM 19 and LLVM 20. If you don't care about your " - "ABI being broken, define the _LIBCPP_ABI_TREE_REMOVE_NODE_POINTER_UB macro to silence this " - "diagnostic."); -#endif - private: // check for sane allocator pointer rebinding semantics. Rebinding the // allocator for a new pointer type should be exactly the same as rebinding diff --git a/libcxx/include/__type_traits/reference_constructs_from_temporary.h b/libcxx/include/__type_traits/reference_constructs_from_temporary.h index 2ff549b4e15ce..3d097ce90cb09 100644 --- a/libcxx/include/__type_traits/reference_constructs_from_temporary.h +++ b/libcxx/include/__type_traits/reference_constructs_from_temporary.h @@ -30,14 +30,8 @@ _LIBCPP_NO_SPECIALIZATIONS inline constexpr bool reference_constructs_from_tempo #endif -#if __has_builtin(__reference_constructs_from_temporary) template inline const bool __reference_constructs_from_temporary_v = __reference_constructs_from_temporary(_Tp, _Up); -#else -// TODO(LLVM 22): Remove this as all supported compilers should have __reference_constructs_from_temporary implemented. -template -inline const bool __reference_constructs_from_temporary_v = __reference_binds_to_temporary(_Tp, _Up); -#endif _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/forward_list b/libcxx/include/forward_list index df7da20cfb611..88d863f494e86 100644 --- a/libcxx/include/forward_list +++ b/libcxx/include/forward_list @@ -284,17 +284,6 @@ struct __forward_node_traits { typedef _NodePtr __node_pointer; typedef __forward_begin_node<_NodePtr> __begin_node; typedef __rebind_pointer_t<_NodePtr, __begin_node> __begin_node_pointer; - -// TODO(LLVM 22): Remove this check -# ifndef _LIBCPP_ABI_FORWARD_LIST_REMOVE_NODE_POINTER_UB - static_assert(sizeof(__begin_node_pointer) == sizeof(__node_pointer) && _LIBCPP_ALIGNOF(__begin_node_pointer) == - _LIBCPP_ALIGNOF(__node_pointer), - "It looks like you are using std::forward_list with a fancy pointer type that thas a different " - "representation depending on whether it points to a forward_list base pointer or a forward_list node " - "pointer (both of which are implementation details of the standard library). This means that your ABI " - "is being broken between LLVM 19 and LLVM 20. If you don't care about your ABI being broken, define " - "the _LIBCPP_ABI_FORWARD_LIST_REMOVE_NODE_POINTER_UB macro to silence this diagnostic."); -# endif }; template diff --git a/libcxx/include/list b/libcxx/include/list index c5c2a8508999c..0ff85d2ebcb86 100644 --- a/libcxx/include/list +++ b/libcxx/include/list @@ -276,17 +276,6 @@ template struct __list_node_pointer_traits { typedef __rebind_pointer_t<_VoidPtr, __list_node<_Tp, _VoidPtr> > __node_pointer; typedef __rebind_pointer_t<_VoidPtr, __list_node_base<_Tp, _VoidPtr> > __base_pointer; - -// TODO(LLVM 22): Remove this check -# ifndef _LIBCPP_ABI_LIST_REMOVE_NODE_POINTER_UB - static_assert(sizeof(__node_pointer) == sizeof(__node_pointer) && _LIBCPP_ALIGNOF(__base_pointer) == - _LIBCPP_ALIGNOF(__node_pointer), - "It looks like you are using std::list with a fancy pointer type that thas a different representation " - "depending on whether it points to a list base pointer or a list node pointer (both of which are " - "implementation details of the standard library). This means that your ABI is being broken between " - "LLVM 19 and LLVM 20. If you don't care about your ABI being broken, define the " - "_LIBCPP_ABI_LIST_REMOVE_NODE_POINTER_UB macro to silence this diagnostic."); -# endif }; template diff --git a/libcxx/include/tuple b/libcxx/include/tuple index 5f3bb72e0678b..466f501b5f4f8 100644 --- a/libcxx/include/tuple +++ b/libcxx/include/tuple @@ -301,7 +301,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool __tuple_compare_equal(c template >> inline constexpr bool __can_tuple_compare_equal = false; -// TODO(LLVM 22): Remove `tuple_size_v<_Tp> == tuple_size_v<_Up>` here once once LLVM-20 support ends +// TODO(LLVM 23): Remove `tuple_size_v<_Tp> == tuple_size_v<_Up>` here once once LLVM-20 support ends // because the resolution of CWG2369 landed in LLVM-21. template requires(tuple_size_v<_Tp> == tuple_size_v<_Up>) @@ -328,7 +328,7 @@ concept __tuple_like_no_tuple = __tuple_like<_Tp> && !__is_tuple_v<_Tp>; template struct __tuple_common_comparison_category_impl {}; -// TODO(LLVM 22): Remove `tuple_size_v<_Tp> == tuple_size_v<_Up>` here once once LLVM-20 support ends +// TODO(LLVM 23): Remove `tuple_size_v<_Tp> == tuple_size_v<_Up>` here once once LLVM-20 support ends // because the resolution of CWG2369 landed in LLVM-21. template requires(tuple_size_v<_Tp> == tuple_size_v<_Up>) && requires { diff --git a/libcxx/include/variant b/libcxx/include/variant index 9beef146f203c..8e958581a6b07 100644 --- a/libcxx/include/variant +++ b/libcxx/include/variant @@ -1299,7 +1299,7 @@ public: __impl_.__swap(__that.__impl_); } -# if _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER +# if _LIBCPP_STD_VER >= 26 // Helper class to implement [variant.visit]/10 // Constraints: The call to visit does not use an explicit template-argument-list // that begins with a type template-argument. diff --git a/libcxx/test/libcxx-03/utilities/meta/is_referenceable.compile.pass.cpp b/libcxx/test/libcxx-03/utilities/meta/is_referenceable.compile.pass.cpp index 093bbae289723..f39d1a5da41af 100644 --- a/libcxx/test/libcxx-03/utilities/meta/is_referenceable.compile.pass.cpp +++ b/libcxx/test/libcxx-03/utilities/meta/is_referenceable.compile.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // -// __is_referenceable_v +// __libcpp_is_referenceable // // [defns.referenceable] defines "a referenceable type" as: // An object type, a function type that does not have cv-qualifiers diff --git a/libcxx/test/libcxx/numerics/c.math/constexpr-cxx23-clang.pass.cpp b/libcxx/test/libcxx/numerics/c.math/constexpr-cxx23-clang.pass.cpp index 3f17f21e8c108..20887b8cf2678 100644 --- a/libcxx/test/libcxx/numerics/c.math/constexpr-cxx23-clang.pass.cpp +++ b/libcxx/test/libcxx/numerics/c.math/constexpr-cxx23-clang.pass.cpp @@ -220,16 +220,9 @@ int main(int, char**) { ASSERT_CONSTEXPR_CXX23(std::isnormal(-1.0) == 1); ASSERT_CONSTEXPR_CXX23(std::isnormal(-1.0L) == 1); -// TODO(LLVM 22): Remove `__has_constexpr_builtin` conditional once support for Clang 19 is dropped. -#if !__has_constexpr_builtin(__builtin_signbit) - ASSERT_NOT_CONSTEXPR_CXX23(std::signbit(-1.0f) == 1); - ASSERT_NOT_CONSTEXPR_CXX23(std::signbit(-1.0) == 1); - ASSERT_NOT_CONSTEXPR_CXX23(std::signbit(-1.0L) == 1); -#else ASSERT_CONSTEXPR_CXX23(std::signbit(-1.0f) == 1); ASSERT_CONSTEXPR_CXX23(std::signbit(-1.0) == 1); ASSERT_CONSTEXPR_CXX23(std::signbit(-1.0L) == 1); -#endif ASSERT_NOT_CONSTEXPR_CXX23(std::isgreater(-1.0f, 0.0f) == 0); ASSERT_NOT_CONSTEXPR_CXX23(std::isgreater(-1.0, 0.0) == 0); diff --git a/libcxx/test/libcxx/utilities/expected/expected.expected/transform_error.mandates.verify.cpp b/libcxx/test/libcxx/utilities/expected/expected.expected/transform_error.mandates.verify.cpp index 09ebd0069b3a9..3e9bdd98cd394 100644 --- a/libcxx/test/libcxx/utilities/expected/expected.expected/transform_error.mandates.verify.cpp +++ b/libcxx/test/libcxx/utilities/expected/expected.expected/transform_error.mandates.verify.cpp @@ -8,15 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 -// With clang-cl, some warnings have a 'which is a Microsoft extension' suffix -// which break the tests. But #102851 will turn it into an error, making the test pass. -// However, upstream libcxx buildbots do not build clang from source while testing, so -// this tests still expected to fail on these bots. -// -// TODO(LLVM 22): Remove '0-1' from 'expected-error-re@*:* 0-1 {{union member {{.*}} has reference type {{.*}}}}' -// and remove 'expected-warning-re@*:* 0-1 {{union member {{.*}} has reference type {{.*}}, which is a Microsoft extension}}' -// once LLVM 22 releases. See https://llvm.org/PR104885. - // Test the mandates // template constexpr auto transform_error(F&& f) &; @@ -55,41 +46,39 @@ void test() { { std::expected e; e.transform_error(return_unexpected); // expected-error-re@*:* {{static assertion failed {{.*}}The result of {{.*}} must be a valid template argument for unexpected}} - // expected-error-re@*:* 0-1 {{no matching constructor for initialization of{{.*}}}} // expected-error-re@*:* {{static assertion failed {{.*}}[expected.object.general] A program that instantiates the definition of template expected for {{.*}} is ill-formed.}} - // expected-error-re@*:* 0-1 {{union member {{.*}} has reference type {{.*}}}} + // expected-error-re@*:* {{union member {{.*}} has reference type {{.*}}}} e.transform_error(return_no_object); // expected-error-re@*:* {{static assertion failed {{.*}}The result of {{.*}} must be a valid template argument for unexpected}} - // expected-error-re@*:* 0-1 {{no matching constructor for initialization of{{.*}}}} + // expected-error-re@*:* {{no matching constructor for initialization of{{.*}}}} // expected-error-re@*:* {{static assertion failed {{.*}}[expected.object.general] A program that instantiates the definition of template expected for {{.*}} is ill-formed.}} - // expected-warning-re@*:* 0-1 {{union member {{.*}} has reference type {{.*}}, which is a Microsoft extension}} } // Test const& overload { const std::expected e; e.transform_error(return_unexpected); // expected-error-re@*:* {{static assertion failed {{.*}}The result of {{.*}} must be a valid template argument for unexpected}} - // expected-error-re@*:* 0-2 {{no matching constructor for initialization of{{.*}}}} + // expected-error-re@*:* {{no matching constructor for initialization of{{.*}}}} e.transform_error(return_no_object); // expected-error-re@*:* {{static assertion failed {{.*}}The result of {{.*}} must be a valid template argument for unexpected}} - // expected-error-re@*:* 0-2 {{no matching constructor for initialization of{{.*}}}} + // expected-error-re@*:* {{no matching constructor for initialization of{{.*}}}} } // Test && overload { std::expected e; std::move(e).transform_error(return_unexpected); // expected-error-re@*:* {{static assertion failed {{.*}}The result of {{.*}} must be a valid template argument for unexpected}} - // expected-error-re@*:* 0-2 {{no matching constructor for initialization of{{.*}}}} + // expected-error-re@*:* {{no matching constructor for initialization of{{.*}}}} std::move(e).transform_error(return_no_object); // expected-error-re@*:* {{static assertion failed {{.*}}The result of {{.*}} must be a valid template argument for unexpected}} - // expected-error-re@*:* 0-2 {{no matching constructor for initialization of{{.*}}}} + // expected-error-re@*:* {{no matching constructor for initialization of{{.*}}}} } // Test const&& overload { const std::expected e; std::move(e).transform_error(return_unexpected); // expected-error-re@*:* {{static assertion failed {{.*}}The result of {{.*}} must be a valid template argument for unexpected}} - // expected-error-re@*:* 0-2 {{no matching constructor for initialization of{{.*}}}} + // expected-error-re@*:* {{no matching constructor for initialization of{{.*}}}} std::move(e).transform_error(return_no_object); // expected-error-re@*:* {{static assertion failed {{.*}}The result of {{.*}} must be a valid template argument for unexpected}} - // expected-error-re@*:* 0-2 {{no matching constructor for initialization of{{.*}}}} + // expected-error-re@*:* {{no matching constructor for initialization of{{.*}}}} } } // clang-format on diff --git a/libcxx/test/libcxx/utilities/expected/expected.void/transform_error.mandates.verify.cpp b/libcxx/test/libcxx/utilities/expected/expected.void/transform_error.mandates.verify.cpp index 9fd7452af64fb..c5acc27af03ea 100644 --- a/libcxx/test/libcxx/utilities/expected/expected.void/transform_error.mandates.verify.cpp +++ b/libcxx/test/libcxx/utilities/expected/expected.void/transform_error.mandates.verify.cpp @@ -8,16 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 -// With clang-cl, some warnings have a 'which is a Microsoft extension' suffix -// which break the tests. But #102851 will turn it into an error, making the test pass. -// However, upstream libcxx buildbots do not build clang from source while testing, so -// this tests still expected to fail on these bots. -// -// TODO(LLVM 22): Remove '0-1' from 'expected-error-re@*:* 0-1 {{union member {{.*}} has reference type {{.*}}}}' -// and remove 'expected-warning-re@*:* 0-1 {{union member {{.*}} has reference type {{.*}}, which is a Microsoft extension}}' -// and remove 'expected-error-re@*:* 0-1 {{call to deleted constructor of {{.*}}}}' -// once LLVM 22 releases. See See https://llvm.org/PR104885. - // Test the mandates // template constexpr auto transform_error(F&& f) &; @@ -56,43 +46,36 @@ void test() { { std::expected e; e.transform_error(return_unexpected); // expected-error-re@*:* {{static assertion failed {{.*}}The result of {{.*}} must be a valid template argument for unexpected}} - // expected-error-re@*:* 0-1 {{no matching constructor for initialization of{{.*}}}} + // expected-error-re@*:* {{no matching constructor for initialization of{{.*}}}} // expected-error-re@*:* {{static assertion failed {{.*}}A program that instantiates expected with a E that is not a valid argument for unexpected is ill-formed}} - // expected-error-re@*:* 0-1 {{call to deleted constructor of {{.*}}}} - // expected-error-re@*:* 0-1 {{union member {{.*}} has reference type {{.*}}}} + // expected-error-re@*:* {{union member {{.*}} has reference type {{.*}}}} e.transform_error(return_no_object); // expected-error-re@*:* {{static assertion failed {{.*}}The result of {{.*}} must be a valid template argument for unexpected}} - // expected-error-re@*:* 0-1 {{no matching constructor for initialization of{{.*}}}} + // expected-error-re@*:* {{no matching constructor for initialization of{{.*}}}} // expected-error-re@*:* {{static assertion failed {{.*}}A program that instantiates expected with a E that is not a valid argument for unexpected is ill-formed}} - // expected-warning-re@*:* 0-1 {{union member {{.*}} has reference type {{.*}}, which is a Microsoft extension}} } // Test const& overload { const std::expected e; e.transform_error(return_unexpected); // expected-error-re@*:* {{static assertion failed {{.*}}The result of {{.*}} must be a valid template argument for unexpected}} - // expected-error-re@*:* 0-1 {{no matching constructor for initialization of{{.*}}}} + // expected-error-re@*:* {{no matching constructor for initialization of{{.*}}}} e.transform_error(return_no_object); // expected-error-re@*:* {{static assertion failed {{.*}}The result of {{.*}} must be a valid template argument for unexpected}} - // expected-error-re@*:* 0-1 {{no matching constructor for initialization of{{.*}}}} - // expected-error-re@*:* 0-1 {{call to deleted constructor of {{.*}}}} + // expected-error-re@*:* {{no matching constructor for initialization of{{.*}}}} } // Test && overload { std::expected e; std::move(e).transform_error(return_unexpected); // expected-error-re@*:* {{static assertion failed {{.*}}The result of {{.*}} must be a valid template argument for unexpected}} - // expected-error-re@*:* 0-1 {{no matching constructor for initialization of{{.*}}}} std::move(e).transform_error(return_no_object); // expected-error-re@*:* {{static assertion failed {{.*}}The result of {{.*}} must be a valid template argument for unexpected}} - // expected-error-re@*:* 0-1 {{no matching constructor for initialization of{{.*}}}} } // Test const&& overload { const std::expected e; std::move(e).transform_error(return_unexpected); // expected-error-re@*:* {{static assertion failed {{.*}}The result of {{.*}} must be a valid template argument for unexpected}} - // expected-error-re@*:* 0-1 {{no matching constructor for initialization of{{.*}}}} std::move(e).transform_error(return_no_object); // expected-error-re@*:* {{static assertion failed {{.*}}The result of {{.*}} must be a valid template argument for unexpected}} - // expected-error-re@*:* 0-1 {{no matching constructor for initialization of{{.*}}}} } } // clang-format on diff --git a/libcxx/test/std/utilities/format/format.arguments/format.arg/visit.pass.cpp b/libcxx/test/std/utilities/format/format.arguments/format.arg/visit.pass.cpp index 20e0a5ed66bd0..68fe8b6de41d6 100644 --- a/libcxx/test/std/utilities/format/format.arguments/format.arg/visit.pass.cpp +++ b/libcxx/test/std/utilities/format/format.arguments/format.arg/visit.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20, c++23 // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME -// The tested functionality needs deducing this. -// XFAIL: apple-clang // diff --git a/libcxx/test/std/utilities/format/format.arguments/format.arg/visit.return_type.pass.cpp b/libcxx/test/std/utilities/format/format.arguments/format.arg/visit.return_type.pass.cpp index 8a79dd4d50f20..4ae63e896caed 100644 --- a/libcxx/test/std/utilities/format/format.arguments/format.arg/visit.return_type.pass.cpp +++ b/libcxx/test/std/utilities/format/format.arguments/format.arg/visit.return_type.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20, c++23 // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME -// The tested functionality needs deducing this. -// XFAIL: apple-clang // diff --git a/libcxx/test/std/utilities/format/format.arguments/format.arg/visit_format_arg.deprecated.verify.cpp b/libcxx/test/std/utilities/format/format.arguments/format.arg/visit_format_arg.deprecated.verify.cpp index 146ceba58872e..77df72d3c4c6c 100644 --- a/libcxx/test/std/utilities/format/format.arguments/format.arg/visit_format_arg.deprecated.verify.cpp +++ b/libcxx/test/std/utilities/format/format.arguments/format.arg/visit_format_arg.deprecated.verify.cpp @@ -8,7 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20, c++23 // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME -// XFAIL: apple-clang // diff --git a/libcxx/test/std/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp b/libcxx/test/std/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp index d99675a71f321..9b7c8a7f4f8b4 100644 --- a/libcxx/test/std/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp +++ b/libcxx/test/std/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp @@ -9,6 +9,8 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + // // template @@ -25,10 +27,6 @@ #include "make_string.h" #include "min_allocator.h" -#if TEST_STD_VER >= 26 && defined(TEST_HAS_EXPLICIT_THIS_PARAMETER) -TEST_CLANG_DIAGNOSTIC_IGNORED("-Wdeprecated-declarations") -#endif - template void test(From value) { auto store = std::make_format_args(value); diff --git a/libcxx/test/std/utilities/format/format.arguments/format.args/get.pass.cpp b/libcxx/test/std/utilities/format/format.arguments/format.args/get.pass.cpp index c7dd82d726b3a..cbddc4f437a53 100644 --- a/libcxx/test/std/utilities/format/format.arguments/format.args/get.pass.cpp +++ b/libcxx/test/std/utilities/format/format.arguments/format.args/get.pass.cpp @@ -32,7 +32,7 @@ void test(From value) { else assert(false); }; -#if TEST_STD_VER >= 26 && defined(TEST_HAS_EXPLICIT_THIS_PARAMETER) +#if TEST_STD_VER >= 26 format_args.get(0).visit(visitor); #else std::visit_format_arg(visitor, format_args.get(0)); @@ -47,7 +47,7 @@ void test_handle(T value) { std::basic_format_args format_args{store}; auto visitor = [](auto a) { assert((std::is_same_v::handle>)); }; -#if TEST_STD_VER >= 26 && defined(TEST_HAS_EXPLICIT_THIS_PARAMETER) +#if TEST_STD_VER >= 26 format_args.get(0).visit(visitor); #else std::visit_format_arg(visitor, format_args.get(0)); @@ -73,7 +73,7 @@ void test_string_view(From value) { else assert(false); }; -#if TEST_STD_VER >= 26 && defined(TEST_HAS_EXPLICIT_THIS_PARAMETER) +#if TEST_STD_VER >= 26 format_args.get(0).visit(visitor); #else std::visit_format_arg(visitor, format_args.get(0)); diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/PR20855_tuple_ref_binding_diagnostics.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/PR20855_tuple_ref_binding_diagnostics.pass.cpp index d78de0eec8e53..0f6a6734264c3 100644 --- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/PR20855_tuple_ref_binding_diagnostics.pass.cpp +++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/PR20855_tuple_ref_binding_diagnostics.pass.cpp @@ -16,17 +16,6 @@ #include #include #include -#include "test_macros.h" - -#if TEST_HAS_BUILTIN(__reference_constructs_from_temporary) -# define ASSERT_REFERENCE_BINDS_TEMPORARY(...) static_assert(__reference_constructs_from_temporary(__VA_ARGS__), "") -# define ASSERT_NOT_REFERENCE_BINDS_TEMPORARY(...) \ - static_assert(!__reference_constructs_from_temporary(__VA_ARGS__), "") -#else -// TODO(LLVM 22): Remove this as all support compilers should have __reference_constructs_from_temporary implemented. -# define ASSERT_REFERENCE_BINDS_TEMPORARY(...) static_assert(__reference_binds_to_temporary(__VA_ARGS__), "") -# define ASSERT_NOT_REFERENCE_BINDS_TEMPORARY(...) static_assert(!__reference_binds_to_temporary(__VA_ARGS__), "") -#endif template struct ConvertsTo { @@ -42,17 +31,6 @@ struct ConvertsTo { struct Base {}; struct Derived : Base {}; - -static_assert(std::is_same::value, ""); -ASSERT_REFERENCE_BINDS_TEMPORARY(std::string const&, decltype("abc")); -ASSERT_REFERENCE_BINDS_TEMPORARY(std::string const&, decltype(("abc"))); -ASSERT_REFERENCE_BINDS_TEMPORARY(std::string const&, const char*&&); - -ASSERT_NOT_REFERENCE_BINDS_TEMPORARY(int&, const ConvertsTo&); -ASSERT_NOT_REFERENCE_BINDS_TEMPORARY(const int&, ConvertsTo&); -ASSERT_NOT_REFERENCE_BINDS_TEMPORARY(Base&, Derived&); - - static_assert(std::is_constructible>::value, ""); static_assert(std::is_constructible>::value, ""); diff --git a/libcxx/test/std/utilities/variant/variant.visit.member/robust_against_adl.pass.cpp b/libcxx/test/std/utilities/variant/variant.visit.member/robust_against_adl.pass.cpp index 7be7c7ff9122b..38cf34a9c699c 100644 --- a/libcxx/test/std/utilities/variant/variant.visit.member/robust_against_adl.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.visit.member/robust_against_adl.pass.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20, c++23 -// XFAIL: apple-clang // diff --git a/libcxx/test/std/utilities/variant/variant.visit.member/visit.pass.cpp b/libcxx/test/std/utilities/variant/variant.visit.member/visit.pass.cpp index f68112d30fc35..aeb1297c136ae 100644 --- a/libcxx/test/std/utilities/variant/variant.visit.member/visit.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.visit.member/visit.pass.cpp @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20, c++23 -// The tested functionality needs deducing this. -// XFAIL: apple-clang // diff --git a/libcxx/test/std/utilities/variant/variant.visit.member/visit_return_type.pass.cpp b/libcxx/test/std/utilities/variant/variant.visit.member/visit_return_type.pass.cpp index 90320ae518c34..7ca05908ab340 100644 --- a/libcxx/test/std/utilities/variant/variant.visit.member/visit_return_type.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.visit.member/visit_return_type.pass.cpp @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20, c++23 -// The tested functionality needs deducing this. -// XFAIL: apple-clang // diff --git a/libcxx/test/support/test_basic_format_arg.h b/libcxx/test/support/test_basic_format_arg.h index f51f6e97cbed0..99cd558c3c5bf 100644 --- a/libcxx/test/support/test_basic_format_arg.h +++ b/libcxx/test/support/test_basic_format_arg.h @@ -21,7 +21,7 @@ bool test_basic_format_arg(std::basic_format_arg arg, T expected) { else return false; }; -#if TEST_STD_VER >= 26 && defined(TEST_HAS_EXPLICIT_THIS_PARAMETER) +#if TEST_STD_VER >= 26 return arg.visit(std::move(visitor)); #else return std::visit_format_arg(std::move(visitor), arg); diff --git a/libcxx/test/support/test_macros.h b/libcxx/test/support/test_macros.h index c4e1600572456..8d88d6fad7d0b 100644 --- a/libcxx/test/support/test_macros.h +++ b/libcxx/test/support/test_macros.h @@ -531,13 +531,6 @@ inline Tp const& DoNotOptimize(Tp const& value) { # define TEST_IF_AIX(arg_true, arg_false) arg_false #endif -// Clang-18 has support for deducing this, but it does not set the FTM. -#ifdef _LIBCPP_USE_FROZEN_CXX03_HEADERS -// This is a C++20 featue, so we don't care whether the compiler could support it -#elif defined(_LIBCPP_VERSION) && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER -# define TEST_HAS_EXPLICIT_THIS_PARAMETER -#endif - // Placement `operator new`/`operator new[]` are not yet constexpr in C++26 // when using MS ABI, because they are from . #if defined(__cpp_lib_constexpr_new) && __cpp_lib_constexpr_new >= 202406L From 689e95c2f1f45310a471765cc7a3ede99622e30f Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 30 Oct 2025 10:33:44 +0100 Subject: [PATCH 12/21] [GVN] Add tests for pointer replacement with different addr size (NFC) --- llvm/test/Transforms/GVN/assume-equal.ll | 44 ++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/llvm/test/Transforms/GVN/assume-equal.ll b/llvm/test/Transforms/GVN/assume-equal.ll index bbbc5c58584a6..a38980169fc52 100644 --- a/llvm/test/Transforms/GVN/assume-equal.ll +++ b/llvm/test/Transforms/GVN/assume-equal.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt < %s -passes=gvn -S | FileCheck %s +target datalayout = "p1:64:64:64:32" + %struct.A = type { ptr } @_ZTV1A = available_externally unnamed_addr constant [4 x ptr] [ptr null, ptr @_ZTI1A, ptr @_ZN1A3fooEv, ptr @_ZN1A3barEv], align 8 @_ZTI1A = external constant ptr @@ -372,6 +374,20 @@ define i1 @assume_ptr_eq_different_prov_does_not_matter_icmp(ptr %p, ptr %p2) { ret i1 %c } +define i1 @assume_ptr_eq_different_prov_does_not_matter_icmp_addrsize(ptr addrspace(1) %p, ptr addrspace(1) %p2) { +; CHECK-LABEL: define i1 @assume_ptr_eq_different_prov_does_not_matter_icmp_addrsize( +; CHECK-SAME: ptr addrspace(1) [[P:%.*]], ptr addrspace(1) [[P2:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr addrspace(1) [[P]], [[P2]] +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[C:%.*]] = icmp eq ptr addrspace(1) [[P]], null +; CHECK-NEXT: ret i1 [[C]] +; + %cmp = icmp eq ptr addrspace(1) %p, %p2 + call void @llvm.assume(i1 %cmp) + %c = icmp eq ptr addrspace(1) %p2, null + ret i1 %c +} + ; This is not correct, as it may change the provenance exposed by ptrtoint. ; We still allow it for now. define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoint(ptr %p, ptr %p2) { @@ -388,6 +404,20 @@ define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoint(ptr %p, ptr %p ret i64 %int } +define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoint_addrsize(ptr addrspace(1) %p, ptr addrspace(1) %p2) { +; CHECK-LABEL: define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoint_addrsize( +; CHECK-SAME: ptr addrspace(1) [[P:%.*]], ptr addrspace(1) [[P2:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr addrspace(1) [[P]], [[P2]] +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[INT:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64 +; CHECK-NEXT: ret i64 [[INT]] +; + %cmp = icmp eq ptr addrspace(1) %p, %p2 + call void @llvm.assume(i1 %cmp) + %int = ptrtoint ptr addrspace(1) %p2 to i64 + ret i64 %int +} + define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoaddr(ptr %p, ptr %p2) { ; CHECK-LABEL: define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoaddr( ; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]]) { @@ -402,6 +432,20 @@ define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoaddr(ptr %p, ptr % ret i64 %int } +define i32 @assume_ptr_eq_different_prov_does_not_matter_ptrtoaddr_addrsize(ptr addrspace(1) %p, ptr addrspace(1) %p2) { +; CHECK-LABEL: define i32 @assume_ptr_eq_different_prov_does_not_matter_ptrtoaddr_addrsize( +; CHECK-SAME: ptr addrspace(1) [[P:%.*]], ptr addrspace(1) [[P2:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr addrspace(1) [[P]], [[P2]] +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[INT:%.*]] = ptrtoaddr ptr addrspace(1) [[P]] to i32 +; CHECK-NEXT: ret i32 [[INT]] +; + %cmp = icmp eq ptr addrspace(1) %p, %p2 + call void @llvm.assume(i1 %cmp) + %int = ptrtoaddr ptr addrspace(1) %p2 to i32 + ret i32 %int +} + define i8 @assume_ptr_eq_same_prov(ptr %p, i64 %x) { ; CHECK-LABEL: define i8 @assume_ptr_eq_same_prov( ; CHECK-SAME: ptr [[P:%.*]], i64 [[X:%.*]]) { From eccbfde028b2322156245cbd733b316aa5b3c56b Mon Sep 17 00:00:00 2001 From: Pankaj Dwivedi Date: Thu, 30 Oct 2025 15:05:25 +0530 Subject: [PATCH 13/21] [AMDGPU] insert eof white space (#165673) --- llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp index b5e2d76db662e..65e6ed9d1d428 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp @@ -188,4 +188,4 @@ INITIALIZE_PASS_END(AMDGPUUniformIntrinsicCombineLegacy, DEBUG_TYPE, FunctionPass *llvm::createAMDGPUUniformIntrinsicCombineLegacyPass() { return new AMDGPUUniformIntrinsicCombineLegacy(); -} \ No newline at end of file +} From 932fa0e0871acce4f68fab504527f5b4e46f16f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Gr=C3=A4nitz?= Date: Thu, 30 Oct 2025 10:46:37 +0100 Subject: [PATCH 14/21] [ORC] Fix missing include for MemoryAccess interface (NFC) (#165576) MemoryAccess base class was included from Core.h when it was a subclass of ExecutorProcessControl, but this changed in 0faa181434cf959110651fe974bef31e7390eba8 --- llvm/include/llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h index c69b6f736651e..86207265021c5 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// Implements ExecutorProcessControl::MemoryAccess by making calls to +// Implements the MemoryAccess interface by making calls to // ExecutorProcessControl::callWrapperAsync. // // This simplifies the implementaton of new ExecutorProcessControl instances, @@ -19,6 +19,7 @@ #define LLVM_EXECUTIONENGINE_ORC_EPCGENERICMEMORYACCESS_H #include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/MemoryAccess.h" namespace llvm { namespace orc { From 96feee44741ef30ac4054d65c2ead4d21819ccca Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Thu, 30 Oct 2025 11:16:45 +0100 Subject: [PATCH 15/21] [clang][NFC] Make ellipse strings constexpr (#165680) Also rename map to Map, remove the m_ prefix from member variables and fix the naming of the existing color variables. --- clang/lib/Frontend/TextDiagnostic.cpp | 171 +++++++++++++------------- 1 file changed, 86 insertions(+), 85 deletions(-) diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index c33d8f8ca9ebd..aea3e72d92a84 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -22,22 +22,16 @@ using namespace clang; -static const enum raw_ostream::Colors noteColor = raw_ostream::CYAN; -static const enum raw_ostream::Colors remarkColor = - raw_ostream::BLUE; -static const enum raw_ostream::Colors fixitColor = - raw_ostream::GREEN; -static const enum raw_ostream::Colors caretColor = - raw_ostream::GREEN; -static const enum raw_ostream::Colors warningColor = - raw_ostream::MAGENTA; -static const enum raw_ostream::Colors templateColor = - raw_ostream::CYAN; -static const enum raw_ostream::Colors errorColor = raw_ostream::RED; -static const enum raw_ostream::Colors fatalColor = raw_ostream::RED; +static constexpr raw_ostream::Colors NoteColor = raw_ostream::CYAN; +static constexpr raw_ostream::Colors RemarkColor = raw_ostream::BLUE; +static constexpr raw_ostream::Colors FixitColor = raw_ostream::GREEN; +static constexpr raw_ostream::Colors CaretColor = raw_ostream::GREEN; +static constexpr raw_ostream::Colors WarningColor = raw_ostream::MAGENTA; +static constexpr raw_ostream::Colors TemplateColor = raw_ostream::CYAN; +static constexpr raw_ostream::Colors ErrorColor = raw_ostream::RED; +static constexpr raw_ostream::Colors FatalColor = raw_ostream::RED; // Used for changing only the bold attribute. -static const enum raw_ostream::Colors savedColor = - raw_ostream::SAVEDCOLOR; +static constexpr raw_ostream::Colors SavedColor = raw_ostream::SAVEDCOLOR; // Magenta is taken for 'warning'. Red is already 'error' and 'cyan' // is already taken for 'note'. Green is already used to underline @@ -95,11 +89,11 @@ static void applyTemplateHighlighting(raw_ostream &OS, StringRef Str, Str = Str.substr(Pos + 1); if (Normal) - OS.changeColor(templateColor, true); + OS.changeColor(TemplateColor, true); else { OS.resetColor(); if (Bold) - OS.changeColor(savedColor, true); + OS.changeColor(SavedColor, true); } Normal = !Normal; } @@ -289,46 +283,46 @@ static void genColumnByteMapping(StringRef SourceLine, unsigned TabStop, namespace { struct SourceColumnMap { SourceColumnMap(StringRef SourceLine, unsigned TabStop) - : m_SourceLine(SourceLine) { + : SourceLine(SourceLine) { - genColumnByteMapping(SourceLine, TabStop, m_columnToByte, m_byteToColumn); + genColumnByteMapping(SourceLine, TabStop, ColumnToByte, ByteToColumn); - assert(m_byteToColumn.size()==SourceLine.size()+1); - assert(0 < m_byteToColumn.size() && 0 < m_columnToByte.size()); - assert(m_byteToColumn.size() == - static_cast(m_columnToByte.back().V + 1)); - assert(static_cast(m_byteToColumn.back().V + 1) == - m_columnToByte.size()); + assert(ByteToColumn.size() == SourceLine.size() + 1); + assert(0 < ByteToColumn.size() && 0 < ColumnToByte.size()); + assert(ByteToColumn.size() == + static_cast(ColumnToByte.back().V + 1)); + assert(static_cast(ByteToColumn.back().V + 1) == + ColumnToByte.size()); } - Columns columns() const { return m_byteToColumn.back(); } - Bytes bytes() const { return m_columnToByte.back(); } + Columns columns() const { return ByteToColumn.back(); } + Bytes bytes() const { return ColumnToByte.back(); } /// Map a byte to the column which it is at the start of, or return -1 /// if it is not at the start of a column (for a UTF-8 trailing byte). Columns byteToColumn(Bytes N) const { - assert(0 <= N.V && N.V < static_cast(m_byteToColumn.size())); - return m_byteToColumn[N.V]; + assert(0 <= N.V && N.V < static_cast(ByteToColumn.size())); + return ByteToColumn[N.V]; } /// Map a byte to the first column which contains it. Columns byteToContainingColumn(Bytes N) const { - assert(0 <= N.V && N.V < static_cast(m_byteToColumn.size())); - while (!m_byteToColumn[N.V].isValid()) + assert(0 <= N.V && N.V < static_cast(ByteToColumn.size())); + while (!ByteToColumn[N.V].isValid()) --N.V; - return m_byteToColumn[N.V]; + return ByteToColumn[N.V]; } /// Map a column to the byte which starts the column, or return -1 if /// the column the second or subsequent column of an expanded tab or similar /// multi-column entity. Bytes columnToByte(Columns N) const { - assert(0 <= N.V && N.V < static_cast(m_columnToByte.size())); - return m_columnToByte[N.V]; + assert(0 <= N.V && N.V < static_cast(ColumnToByte.size())); + return ColumnToByte[N.V]; } /// Map from a byte index to the next byte which starts a column. Bytes startOfNextColumn(Bytes N) const { - assert(0 <= N.V && N.V < static_cast(m_byteToColumn.size() - 1)); + assert(0 <= N.V && N.V < static_cast(ByteToColumn.size() - 1)); N = N.next(); while (!byteToColumn(N).isValid()) N = N.next(); @@ -337,21 +331,19 @@ struct SourceColumnMap { /// Map from a byte index to the previous byte which starts a column. Bytes startOfPreviousColumn(Bytes N) const { - assert(0 < N.V && N.V < static_cast(m_byteToColumn.size())); + assert(0 < N.V && N.V < static_cast(ByteToColumn.size())); N = N.prev(); while (!byteToColumn(N).isValid()) N = N.prev(); return N; } - StringRef getSourceLine() const { - return m_SourceLine; - } + StringRef getSourceLine() const { return SourceLine; } private: - StringRef m_SourceLine; - SmallVector m_byteToColumn; - SmallVector m_columnToByte; + StringRef SourceLine; + SmallVector ByteToColumn; + SmallVector ColumnToByte; }; } // end anonymous namespace @@ -361,12 +353,12 @@ static void selectInterestingSourceRegion(std::string &SourceLine, std::string &CaretLine, std::string &FixItInsertionLine, Columns NonGutterColumns, - const SourceColumnMap &map) { + const SourceColumnMap &Map) { Columns CaretColumns = Columns(CaretLine.size()); Columns FixItColumns = Columns(llvm::sys::locale::columnWidth(FixItInsertionLine)); Columns MaxColumns = - std::max({map.columns().V, CaretColumns.V, FixItColumns.V}); + std::max({Map.columns().V, CaretColumns.V, FixItColumns.V}); // if the number of columns is less than the desired number we're done if (MaxColumns <= NonGutterColumns) return; @@ -415,14 +407,14 @@ static void selectInterestingSourceRegion(std::string &SourceLine, // CaretEnd may have been set at the middle of a character // If it's not at a character's first column then advance it past the current // character. - while (CaretEnd < map.columns() && !map.columnToByte(CaretEnd).isValid()) + while (CaretEnd < Map.columns() && !Map.columnToByte(CaretEnd).isValid()) CaretEnd = CaretEnd.next(); assert( - (CaretStart > map.columns() || map.columnToByte(CaretStart).isValid()) && + (CaretStart > Map.columns() || Map.columnToByte(CaretStart).isValid()) && "CaretStart must not point to a column in the middle of a source" " line character"); - assert((CaretEnd > map.columns() || map.columnToByte(CaretEnd).isValid()) && + assert((CaretEnd > Map.columns() || Map.columnToByte(CaretEnd).isValid()) && "CaretEnd must not point to a column in the middle of a source line" " character"); @@ -431,20 +423,19 @@ static void selectInterestingSourceRegion(std::string &SourceLine, // number of columns we have, try to grow the slice to encompass // more context. - Bytes SourceStart = map.columnToByte(std::min(CaretStart.V, map.columns().V)); - Bytes SourceEnd = map.columnToByte(std::min(CaretEnd.V, map.columns().V)); + Bytes SourceStart = Map.columnToByte(std::min(CaretStart.V, Map.columns().V)); + Bytes SourceEnd = Map.columnToByte(std::min(CaretEnd.V, Map.columns().V)); Columns CaretColumnsOutsideSource = CaretEnd - CaretStart - - (map.byteToColumn(SourceEnd) - map.byteToColumn(SourceStart)); + (Map.byteToColumn(SourceEnd) - Map.byteToColumn(SourceStart)); - char const *front_ellipse = " ..."; - char const *front_space = " "; - char const *back_ellipse = "..."; - Columns EllipsesColumns = - Columns(strlen(front_ellipse) + strlen(back_ellipse)); + constexpr StringRef FrontEllipse = " ..."; + constexpr StringRef FrontSpace = " "; + constexpr StringRef BackEllipse = "..."; + Columns EllipsesColumns = Columns(FrontEllipse.size() + BackEllipse.size()); - Columns TargetColumns = Columns(NonGutterColumns); + Columns TargetColumns = NonGutterColumns; // Give us extra room for the ellipses // and any of the caret line that extends past the source if (TargetColumns > EllipsesColumns + CaretColumnsOutsideSource) @@ -454,25 +445,25 @@ static void selectInterestingSourceRegion(std::string &SourceLine, bool ExpandedRegion = false; if (SourceStart > 0) { - Bytes NewStart = map.startOfPreviousColumn(SourceStart); + Bytes NewStart = Map.startOfPreviousColumn(SourceStart); // Skip over any whitespace we see here; we're looking for // another bit of interesting text. // FIXME: Detect non-ASCII whitespace characters too. while (NewStart > 0 && isWhitespace(SourceLine[NewStart.V])) - NewStart = map.startOfPreviousColumn(NewStart); + NewStart = Map.startOfPreviousColumn(NewStart); // Skip over this bit of "interesting" text. while (NewStart > 0) { - Bytes Prev = map.startOfPreviousColumn(NewStart); + Bytes Prev = Map.startOfPreviousColumn(NewStart); if (isWhitespace(SourceLine[Prev.V])) break; NewStart = Prev; } - assert(map.byteToColumn(NewStart).isValid()); + assert(Map.byteToColumn(NewStart).isValid()); Columns NewColumns = - map.byteToColumn(SourceEnd) - map.byteToColumn(NewStart); + Map.byteToColumn(SourceEnd) - Map.byteToColumn(NewStart); if (NewColumns <= TargetColumns) { SourceStart = NewStart; ExpandedRegion = true; @@ -480,21 +471,21 @@ static void selectInterestingSourceRegion(std::string &SourceLine, } if (SourceEnd < SourceLine.size()) { - Bytes NewEnd = map.startOfNextColumn(SourceEnd); + Bytes NewEnd = Map.startOfNextColumn(SourceEnd); // Skip over any whitespace we see here; we're looking for // another bit of interesting text. // FIXME: Detect non-ASCII whitespace characters too. while (NewEnd < SourceLine.size() && isWhitespace(SourceLine[NewEnd.V])) - NewEnd = map.startOfNextColumn(NewEnd); + NewEnd = Map.startOfNextColumn(NewEnd); // Skip over this bit of "interesting" text. while (NewEnd < SourceLine.size() && isWhitespace(SourceLine[NewEnd.V])) - NewEnd = map.startOfNextColumn(NewEnd); + NewEnd = Map.startOfNextColumn(NewEnd); - assert(map.byteToColumn(NewEnd).isValid()); + assert(Map.byteToColumn(NewEnd).isValid()); Columns NewColumns = - map.byteToColumn(NewEnd) - map.byteToColumn(SourceStart); + Map.byteToColumn(NewEnd) - Map.byteToColumn(SourceStart); if (NewColumns <= TargetColumns) { SourceEnd = NewEnd; ExpandedRegion = true; @@ -505,8 +496,8 @@ static void selectInterestingSourceRegion(std::string &SourceLine, break; } - CaretStart = map.byteToColumn(SourceStart); - CaretEnd = map.byteToColumn(SourceEnd) + CaretColumnsOutsideSource; + CaretStart = Map.byteToColumn(SourceStart); + CaretEnd = Map.byteToColumn(SourceEnd) + CaretColumnsOutsideSource; // [CaretStart, CaretEnd) is the slice we want. Update the various // output lines to show only this slice. @@ -516,8 +507,8 @@ static void selectInterestingSourceRegion(std::string &SourceLine, assert(CaretStart <= CaretEnd); Columns BackColumnsRemoved = - map.byteToColumn(Bytes{static_cast(SourceLine.size())}) - - map.byteToColumn(SourceEnd); + Map.byteToColumn(Bytes{static_cast(SourceLine.size())}) - + Map.byteToColumn(SourceEnd); Columns FrontColumnsRemoved = CaretStart; Columns ColumnsKept = CaretEnd - CaretStart; @@ -527,19 +518,19 @@ static void selectInterestingSourceRegion(std::string &SourceLine, // The line needs some truncation, and we'd prefer to keep the front // if possible, so remove the back - if (BackColumnsRemoved > Columns(strlen(back_ellipse))) - SourceLine.replace(SourceEnd.V, std::string::npos, back_ellipse); + if (BackColumnsRemoved > Columns(BackEllipse.size())) + SourceLine.replace(SourceEnd.V, std::string::npos, BackEllipse); // If that's enough then we're done if (FrontColumnsRemoved + ColumnsKept <= Columns(NonGutterColumns)) return; // Otherwise remove the front as well - if (FrontColumnsRemoved > Columns(strlen(front_ellipse))) { - SourceLine.replace(0, SourceStart.V, front_ellipse); - CaretLine.replace(0, CaretStart.V, front_space); + if (FrontColumnsRemoved > Columns(FrontEllipse.size())) { + SourceLine.replace(0, SourceStart.V, FrontEllipse); + CaretLine.replace(0, CaretStart.V, FrontSpace); if (!FixItInsertionLine.empty()) - FixItInsertionLine.replace(0, CaretStart.V, front_space); + FixItInsertionLine.replace(0, CaretStart.V, FrontSpace); } } @@ -733,11 +724,21 @@ TextDiagnostic::printDiagnosticLevel(raw_ostream &OS, switch (Level) { case DiagnosticsEngine::Ignored: llvm_unreachable("Invalid diagnostic type"); - case DiagnosticsEngine::Note: OS.changeColor(noteColor, true); break; - case DiagnosticsEngine::Remark: OS.changeColor(remarkColor, true); break; - case DiagnosticsEngine::Warning: OS.changeColor(warningColor, true); break; - case DiagnosticsEngine::Error: OS.changeColor(errorColor, true); break; - case DiagnosticsEngine::Fatal: OS.changeColor(fatalColor, true); break; + case DiagnosticsEngine::Note: + OS.changeColor(NoteColor, true); + break; + case DiagnosticsEngine::Remark: + OS.changeColor(RemarkColor, true); + break; + case DiagnosticsEngine::Warning: + OS.changeColor(WarningColor, true); + break; + case DiagnosticsEngine::Error: + OS.changeColor(ErrorColor, true); + break; + case DiagnosticsEngine::Fatal: + OS.changeColor(FatalColor, true); + break; } } @@ -765,7 +766,7 @@ void TextDiagnostic::printDiagnosticMessage(raw_ostream &OS, if (ShowColors && !IsSupplemental) { // Print primary diagnostic messages in bold and without color, to visually // indicate the transition from continuation notes and other output. - OS.changeColor(savedColor, true); + OS.changeColor(SavedColor, true); Bold = true; } @@ -843,7 +844,7 @@ void TextDiagnostic::emitDiagnosticLoc(FullSourceLoc Loc, PresumedLoc PLoc, return; if (DiagOpts.ShowColors) - OS.changeColor(savedColor, true); + OS.changeColor(SavedColor, true); emitFilename(PLoc.getFilename(), Loc.getManager()); switch (DiagOpts.getFormat()) { @@ -1470,7 +1471,7 @@ void TextDiagnostic::emitSnippetAndCaret( if (!CaretLine.empty()) { indentForLineNumbers(); if (DiagOpts.ShowColors) - OS.changeColor(caretColor, true); + OS.changeColor(CaretColor, true); OS << CaretLine << '\n'; if (DiagOpts.ShowColors) OS.resetColor(); @@ -1480,7 +1481,7 @@ void TextDiagnostic::emitSnippetAndCaret( indentForLineNumbers(); if (DiagOpts.ShowColors) // Print fixit line in color - OS.changeColor(fixitColor, false); + OS.changeColor(FixitColor, false); if (DiagOpts.ShowSourceRanges) OS << ' '; OS << FixItInsertionLine << '\n'; From 25ece5ba925347a5688f180af3131659948c3828 Mon Sep 17 00:00:00 2001 From: Ritanya-B-Bharadwaj Date: Thu, 30 Oct 2025 15:48:13 +0530 Subject: [PATCH 16/21] [clang][OpenMP] New OpenMP 6.0 threadset clause (#135807) Initial parsing/sema/codegen support for threadset clause in task and taskloop directives [Section 14.8 in in OpenMP 6.0 spec] --------- --- clang/docs/OpenMPSupport.rst | 1324 ++++++++--------- clang/docs/ReleaseNotes.rst | 1 + clang/include/clang/AST/OpenMPClause.h | 80 + clang/include/clang/AST/RecursiveASTVisitor.h | 6 + clang/include/clang/Basic/OpenMPKinds.def | 8 +- clang/include/clang/Basic/OpenMPKinds.h | 7 + clang/include/clang/Sema/SemaOpenMP.h | 6 + clang/lib/AST/OpenMPClause.cpp | 8 + clang/lib/AST/StmtProfile.cpp | 2 + clang/lib/Basic/OpenMPKinds.cpp | 19 + clang/lib/CodeGen/CGOpenMPRuntime.cpp | 6 + clang/lib/Parse/ParseOpenMP.cpp | 1 + clang/lib/Sema/SemaOpenMP.cpp | 21 + clang/lib/Sema/TreeTransform.h | 7 + clang/lib/Serialization/ASTReader.cpp | 14 + clang/lib/Serialization/ASTWriter.cpp | 6 + clang/test/OpenMP/task_ast_print.cpp | 26 +- clang/test/OpenMP/task_codegen.cpp | 33 + clang/test/OpenMP/task_threadset_messages.cpp | 99 ++ clang/test/OpenMP/taskloop_ast_print.cpp | 16 + clang/test/OpenMP/taskloop_codegen.cpp | 53 + clang/tools/libclang/CIndex.cpp | 2 + flang/include/flang/Lower/OpenMP/Clauses.h | 1 + flang/include/flang/Parser/dump-parse-tree.h | 2 + flang/include/flang/Parser/parse-tree.h | 8 + flang/lib/Lower/OpenMP/Clauses.cpp | 15 + flang/lib/Semantics/check-omp-structure.cpp | 1 + llvm/include/llvm/Frontend/OpenMP/ClauseT.h | 14 +- llvm/include/llvm/Frontend/OpenMP/OMP.td | 6 + 29 files changed, 1118 insertions(+), 674 deletions(-) create mode 100755 clang/test/OpenMP/task_threadset_messages.cpp diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index 61b5babbd18a8..10a8d095fede3 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -1,662 +1,662 @@ -.. raw:: html - - - -.. role:: none -.. role:: part -.. role:: good - -.. contents:: - :local: - -============== -OpenMP Support -============== - -Clang fully supports OpenMP 4.5, almost all of 5.0 and most of 5.1/2. -Clang supports offloading to X86_64, AArch64, PPC64[LE], NVIDIA GPUs (all models) and AMD GPUs (all models). - -In addition, the LLVM OpenMP runtime `libomp` supports the OpenMP Tools -Interface (OMPT) on x86, x86_64, AArch64, and PPC64 on Linux, Windows, and macOS. -OMPT is also supported for NVIDIA and AMD GPUs. - -For the list of supported features from OpenMP 5.0 and 5.1 -see `OpenMP implementation details`_ and `OpenMP 51 implementation details`_. - -General improvements -==================== -- New collapse clause scheme to avoid expensive remainder operations. - Compute loop index variables after collapsing a loop nest via the - collapse clause by replacing the expensive remainder operation with - multiplications and additions. - -- When using the collapse clause on a loop nest the default behavior - is to automatically extend the representation of the loop counter to - 64 bits for the cases where the sizes of the collapsed loops are not - known at compile time. To prevent this conservative choice and use - at most 32 bits, compile your program with the - `-fopenmp-optimistic-collapse`. - - -GPU devices support -=================== - -Data-sharing modes ------------------- - -Clang supports two data-sharing models for Cuda devices: `Generic` and `Cuda` -modes. The default mode is `Generic`. `Cuda` mode can give an additional -performance and can be activated using the `-fopenmp-cuda-mode` flag. In -`Generic` mode all local variables that can be shared in the parallel regions -are stored in the global memory. In `Cuda` mode local variables are not shared -between the threads and it is user responsibility to share the required data -between the threads in the parallel regions. Often, the optimizer is able to -reduce the cost of `Generic` mode to the level of `Cuda` mode, but the flag, -as well as other assumption flags, can be used for tuning. - -Features not supported or with limited support for Cuda devices ---------------------------------------------------------------- - -- Cancellation constructs are not supported. - -- Doacross loop nest is not supported. - -- User-defined reductions are supported only for trivial types. - -- Nested parallelism: inner parallel regions are executed sequentially. - -- Debug information for OpenMP target regions is supported, but sometimes it may - be required to manually specify the address class of the inspected variables. - In some cases the local variables are actually allocated in the global memory, - but the debug info may be not aware of it. - - -.. _OpenMP implementation details: - -OpenMP 5.0 Implementation Details -================================= - -The following table provides a quick overview over various OpenMP 5.0 features -and their implementation status. Please post on the -`Discourse forums (Runtimes - OpenMP category)`_ for more -information or if you want to help with the -implementation. - -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -|Category | Feature | Status | Reviews | -+==============================+==============================================================+==========================+=======================================================================+ -| loop | support != in the canonical loop form | :good:`done` | D54441 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| loop | #pragma omp loop (directive) | :part:`partial` | D145823 (combined forms) | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| loop | #pragma omp loop bind | :part:`worked on` | D144634 (needs review) | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| loop | collapse imperfectly nested loop | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| loop | collapse non-rectangular nested loop | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| loop | C++ range-base for loop | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| loop | clause: if for SIMD directives | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| loop | inclusive scan (matching C++17 PSTL) | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| memory management | memory allocators | :good:`done` | r341687,r357929 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| memory management | allocate directive and allocate clause | :good:`done` | r355614,r335952 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| OMPD | OMPD interfaces | :good:`done` | https://reviews.llvm.org/D99914 (Supports only HOST(CPU) and Linux | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| OMPT | OMPT interfaces (callback support) | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| thread affinity | thread affinity | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| task | taskloop reduction | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| task | task affinity | :part:`not upstream` | https://github.com/jklinkenberg/openmp/tree/task-affinity | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| task | clause: depend on the taskwait construct | :good:`done` | D113540 (regular codegen only) | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| task | depend objects and detachable tasks | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| task | mutexinoutset dependence-type for tasks | :good:`done` | D53380,D57576 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| task | combined taskloop constructs | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| task | master taskloop | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| task | parallel master taskloop | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| task | master taskloop simd | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| task | parallel master taskloop simd | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| SIMD | atomic and simd constructs inside SIMD code | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| SIMD | SIMD nontemporal | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | infer target functions from initializers | :part:`worked on` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | infer target variables from initializers | :good:`done` | D146418 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | OMP_TARGET_OFFLOAD environment variable | :good:`done` | D50522 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | support full 'defaultmap' functionality | :good:`done` | D69204 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | device specific functions | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | clause: device_type | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | clause: extended device | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | clause: uses_allocators clause | :good:`done` | https://github.com/llvm/llvm-project/pull/157025 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | clause: in_reduction | :part:`worked on` | r308768 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | omp_get_device_num() | :good:`done` | D54342,D128347 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | structure mapping of references | :none:`unclaimed` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | nested target declare | :good:`done` | D51378 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | implicitly map 'this' (this[:1]) | :good:`done` | D55982 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | allow access to the reference count (omp_target_is_present) | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | requires directive | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | clause: unified_shared_memory | :good:`done` | D52625,D52359 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | clause: unified_address | :part:`partial` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | clause: reverse_offload | :part:`partial` | D52780,D155003 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | clause: atomic_default_mem_order | :good:`done` | D53513 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | clause: dynamic_allocators | :part:`unclaimed parts` | D53079 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | user-defined mappers | :good:`done` | D56326,D58638,D58523,D58074,D60972,D59474 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | map array-section with implicit mapper | :good:`done` | https://github.com/llvm/llvm-project/pull/101101 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | mapping lambda expression | :good:`done` | D51107 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | clause: use_device_addr for target data | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | support close modifier on map clause | :good:`done` | D55719,D55892 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | teams construct on the host device | :good:`done` | r371553 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | support non-contiguous array sections for target update | :good:`done` | https://github.com/llvm/llvm-project/pull/144635 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | pointer attachment | :part:`being repaired` | @abhinavgaba (https://github.com/llvm/llvm-project/pull/153683) | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| atomic | hints for the atomic construct | :good:`done` | D51233 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| base language | C11 support | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| base language | C++11/14/17 support | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| base language | lambda support | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| misc | array shaping | :good:`done` | D74144 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| misc | library shutdown (omp_pause_resource[_all]) | :good:`done` | D55078 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| misc | metadirectives | :part:`mostly done` | D91944, https://github.com/llvm/llvm-project/pull/128640 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| misc | conditional modifier for lastprivate clause | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| misc | iterator and multidependences | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| misc | depobj directive and depobj dependency kind | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| misc | user-defined function variants | :good:`done`. | D67294, D64095, D71847, D71830, D109635 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| misc | pointer/reference to pointer based array reductions | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| misc | prevent new type definitions in clauses | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| memory model | memory model update (seq_cst, acq_rel, release, acquire,...) | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ - - -.. _OpenMP 51 implementation details: - -OpenMP 5.1 Implementation Details -================================= - -The following table provides a quick overview over various OpenMP 5.1 features -and their implementation status. -Please post on the -`Discourse forums (Runtimes - OpenMP category)`_ for more -information or if you want to help with the -implementation. - -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -|Category | Feature | Status | Reviews | -+==============================+==============================================================+==========================+=======================================================================+ -| atomic | 'compare' clause on atomic construct | :good:`done` | D120290, D120007, D118632, D120200, D116261, D118547, D116637 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| atomic | 'fail' clause on atomic construct | :part:`worked on` | D123235 (in progress) | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| base language | C++ attribute specifier syntax | :good:`done` | D105648 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | 'present' map type modifier | :good:`done` | D83061, D83062, D84422 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | 'present' motion modifier | :good:`done` | D84711, D84712 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | 'present' in defaultmap clause | :good:`done` | D92427 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | map clause reordering based on 'present' modifier | :none:`unclaimed` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | device-specific environment variables | :none:`unclaimed` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | omp_target_is_accessible routine | :good:`done` | https://github.com/llvm/llvm-project/pull/138294 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | omp_get_mapped_ptr routine | :good:`done` | D141545 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | new async target memory copy routines | :good:`done` | D136103 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | thread_limit clause on target construct | :part:`partial` | D141540 (offload), D152054 (host, in progress) | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | has_device_addr clause on target construct | :none:`unclaimed` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | iterators in map clause or motion clauses | :none:`unclaimed` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | indirect clause on declare target directive | :part:`In Progress` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | allow virtual functions calls for mapped object on device | :part:`partial` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | interop construct | :part:`partial` | parsing/sema done: D98558, D98834, D98815 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | assorted routines for querying interoperable properties | :part:`partial` | D106674 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| loop | Loop tiling transformation | :good:`done` | D76342 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| loop | Loop unrolling transformation | :good:`done` | D99459 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| loop | 'reproducible'/'unconstrained' modifiers in 'order' clause | :part:`partial` | D127855 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| memory management | alignment for allocate directive and clause | :good:`done` | D115683 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| memory management | 'allocator' modifier for allocate clause | :good:`done` | https://github.com/llvm/llvm-project/pull/114883 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| memory management | 'align' modifier for allocate clause | :good:`done` | https://github.com/llvm/llvm-project/pull/121814 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| memory management | new memory management routines | :none:`unclaimed` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| memory management | changes to omp_alloctrait_key enum | :none:`unclaimed` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| memory model | seq_cst clause on flush construct | :good:`done` | https://github.com/llvm/llvm-project/pull/114072 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| misc | 'omp_all_memory' keyword and use in 'depend' clause | :good:`done` | D125828, D126321 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| misc | error directive | :good:`done` | D139166 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| misc | scope construct | :good:`done` | D157933, https://github.com/llvm/llvm-project/pull/109197 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| misc | routines for controlling and querying team regions | :part:`partial` | D95003 (libomp only) | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| misc | changes to ompt_scope_endpoint_t enum | :none:`unclaimed` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| misc | omp_display_env routine | :good:`done` | D74956 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| misc | extended OMP_PLACES syntax | :none:`unclaimed` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| misc | OMP_NUM_TEAMS and OMP_TEAMS_THREAD_LIMIT env vars | :good:`done` | D138769 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| misc | 'target_device' selector in context specifier | :none:`worked on` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| misc | begin/end declare variant | :good:`done` | D71179 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| misc | dispatch construct and function variant argument adjustment | :part:`worked on` | D99537, D99679 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| misc | assumes directives | :part:`worked on` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| misc | assume directive | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| misc | nothing directive | :good:`done` | D123286 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| misc | masked construct and related combined constructs | :good:`done` | D99995, D100514, PR-121741(parallel_masked_taskloop) | -| | | | PR-121746(parallel_masked_task_loop_simd),PR-121914(masked_taskloop) | -| | | | PR-121916(masked_taskloop_simd) | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| misc | default(firstprivate) & default(private) | :good:`done` | D75591 (firstprivate), D125912 (private) | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| other | deprecating master construct | :none:`unclaimed` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| OMPT | new barrier types added to ompt_sync_region_t enum | :none:`unclaimed` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| OMPT | async data transfers added to ompt_target_data_op_t enum | :none:`unclaimed` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| OMPT | new barrier state values added to ompt_state_t enum | :none:`unclaimed` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| OMPT | new 'emi' callbacks for external monitoring interfaces | :good:`done` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| OMPT | device tracing interface | :none:`in progress` | jplehr | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| task | 'strict' modifier for taskloop construct | :none:`unclaimed` | | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| task | inoutset in depend clause | :good:`done` | D97085, D118383 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| task | nowait clause on taskwait | :part:`partial` | parsing/sema done: D131830, D141531 | -+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ - - -.. _OpenMP 5.2 implementation details: - -OpenMP 5.2 Implementation Details -================================= - -The following table provides a quick overview of various OpenMP 5.2 features -and their implementation status. Please post on the -`Discourse forums (Runtimes - OpenMP category)`_ for more -information or if you want to help with the -implementation. - - - -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -|Feature | C/C++ Status | Fortran Status | Reviews | -+=============================================================+===========================+===========================+==========================================================================+ -| omp_in_explicit_task() | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| semantics of explicit_task_var and implicit_task_var | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| ompx sentinel for C/C++ directive extensions | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| ompx prefix for clause extensions | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| if clause on teams construct | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| step modifier added | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| declare mapper: Add iterator modifier on map clause | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| declare mapper: Add iterator modifier on map clause | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| memspace and traits modifiers to uses allocator i | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Add otherwise clause to metadirectives | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| doacross clause with support for omp_cur_iteration | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| position of interop_type in init clause on iterop | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| implicit map type for target enter/exit data | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| work OMPT type for work-sharing loop constructs | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| allocate and firstprivate on scope directive | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Change loop consistency for order clause | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Add memspace and traits modifiers to uses_allocators | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Keep original base pointer on map w/o matched candidate | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Pure procedure support for certain directives | :none:`N/A` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| ALLOCATE statement support for allocators | :none:`N/A` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| dispatch construct extension to support end directive | :none:`N/A` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ - - - -.. _OpenMP 5.2 Deprecations: - -OpenMP 5.2 Deprecations -======================= - - - -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| | C/C++ Status | Fortran Status | Reviews | -+=============================================================+===========================+===========================+==========================================================================+ -| Linear clause syntax | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| The minus operator | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Map clause modifiers without commas | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| The use of allocate directives with ALLOCATE statement | :good:`N/A` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| uses_allocators list syntax | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| The default clause on metadirectives | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| The delimited form of the declare target directive | :none:`unclaimed` | :good:`N/A` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| The use of the to clause on the declare target directive | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| The syntax of the destroy clause on the depobj construct | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| keyword source and sink as task-dependence modifiers | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| interop types in any position on init clause of interop | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| ompd prefix usage for some ICVs | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ - -.. _OpenMP 6.0 implementation details: - -OpenMP 6.0 Implementation Details -================================= - -The following table provides a quick overview of various OpenMP 6.0 features -and their implementation status. Please post on the -`Discourse forums (Runtimes - OpenMP category)`_ for more -information or if you want to help with the -implementation. - - -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -|Feature | C/C++ Status | Fortran Status | Reviews | -+=============================================================+===========================+===========================+==========================================================================+ -| free-agent threads | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| threadset clause | :part:`in progress` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Recording of task graphs | :part:`in progress` | :part:`in progress` | clang: jtb20, flang: kparzysz | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Parallel inductions | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| init_complete for scan directive | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| loop interchange transformation | :good:`done` | :none:`unclaimed` | Clang (interchange): https://github.com/llvm/llvm-project/pull/93022 | -| | | | Clang (permutation): https://github.com/llvm/llvm-project/pull/92030 | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| loop reverse transformation | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/92916 | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| loop stripe transformation | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/119891 | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| loop fusion transformation | :part:`in progress` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/139293 | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| loop index set splitting transformation | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| loop transformation apply clause | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| loop fuse transformation | :good:`done` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| workdistribute construct | | :none:`in progress` | @skc7, @mjklemm | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| task_iteration | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| memscope clause for atomic and flush | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| transparent clause (hull tasks) | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| rule-based compound directives | :part:`In Progress` | :part:`In Progress` | kparzysz | -| | | | Testing for Fortran missing | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| C23, C++23 | :none:`unclaimed` | | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Fortran 2023 | | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| decl attribute for declarative directives | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| C attribute syntax | :none:`unclaimed` | | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| pure directives in DO CONCURRENT | | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Optional argument for all clauses | :none:`partial` | :none:`In Progress` | Parse/Sema (nowait): https://github.com/llvm/llvm-project/pull/159628 | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Function references for locator list items | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| All clauses accept directive name modifier | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Extensions to depobj construct | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Extensions to atomic construct | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Private reductions | :good:`mostly` | :none:`unclaimed` | Parse/Sema:https://github.com/llvm/llvm-project/pull/129938 | -| | | | Codegen: https://github.com/llvm/llvm-project/pull/134709 | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Self maps | :part:`partial` | :none:`unclaimed` | parsing/sema done: https://github.com/llvm/llvm-project/pull/129888 | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Release map type for declare mapper | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Extensions to interop construct | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| no_openmp_constructs | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/125933 | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| safe_sync and progress with identifier and API | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| OpenMP directives in concurrent loop regions | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/125621 | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| atomics constructs on concurrent loop regions | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/125621 | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Loop construct with DO CONCURRENT | | :part:`In Progress` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| device_type clause for target construct | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| nowait for ancestor target directives | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| New API for devices' num_teams/thread_limit | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Host and device environment variables | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| num_threads ICV and clause accepts list | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Numeric names for environment variables | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Increment between places for OMP_PLACES | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| OMP_AVAILABLE_DEVICES envirable | :none:`unclaimed` | :none:`unclaimed` | (should wait for "Traits for default device envirable" being done) | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Traits for default device envirable | :part:`in progress` | :none:`unclaimed` | ro-i | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Optionally omit array length expression | :good:`done` | :none:`unclaimed` | (Parse) https://github.com/llvm/llvm-project/pull/148048, | -| | | | (Sema) https://github.com/llvm/llvm-project/pull/152786 | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Canonical loop sequences | :part:`in progress` | :part:`in progress` | Clang: https://github.com/llvm/llvm-project/pull/139293 | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Clarifications to Fortran map semantics | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| default clause at target construct | :part:`In Progress` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| ref count update use_device_{ptr, addr} | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Clarifications to implicit reductions | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| ref modifier for map clauses | :part:`In Progress` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| map-type modifiers in arbitrary position | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/90499 | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Lift nesting restriction on concurrent loop | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/125621 | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| priority clause for target constructs | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| changes to target_data construct | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Non-const do_not_sync for nowait/nogroup | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| need_device_addr modifier for adjust_args clause | :part:`partial` | :none:`unclaimed` | Parsing/Sema: https://github.com/llvm/llvm-project/pull/143442 | -| | | | https://github.com/llvm/llvm-project/pull/149586 | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Prescriptive num_threads | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/160659 | -| | | | https://github.com/llvm/llvm-project/pull/146403 | -| | | | https://github.com/llvm/llvm-project/pull/146404 | -| | | | https://github.com/llvm/llvm-project/pull/146405 | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Message and severity clauses | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/146093 | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Local clause on declare target | :part:`In Progress` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| groupprivate directive | :part:`In Progress` | :part:`partial` | Flang: kparzysz, mjklemm | -| | | | | -| | | | Flang parser: https://github.com/llvm/llvm-project/pull/153807 | -| | | | Flang sema: https://github.com/llvm/llvm-project/pull/154779 | -| | | | Clang parse/sema: https://github.com/llvm/llvm-project/pull/158134 | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| variable-category on default clause | :good:`done` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Changes to omp_target_is_accessible | :part:`In Progress` | :part:`In Progress` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| defaultmap implicit-behavior 'storage' | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/158336 | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| defaultmap implicit-behavior 'private' | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/158712 | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ - -.. _OpenMP 6.1 implementation details: - -OpenMP 6.1 Implementation Details (Experimental) -================================================ - -The following table provides a quick overview over various OpenMP 6.1 features -and their implementation status. Since OpenMP 6.1 has not yet been released, the -following features are experimental and are subject to change at any time. -Please post on the `Discourse forums (Runtimes - OpenMP category)`_ for more -information or if you want to help with the -implementation. - -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -|Feature | C/C++ Status | Fortran Status | Reviews | -+=============================================================+===========================+===========================+==========================================================================+ -| dyn_groupprivate clause | :part:`In Progress` | :part:`In Progress` | C/C++: kevinsala (https://github.com/llvm/llvm-project/pull/152651 | -| | | | https://github.com/llvm/llvm-project/pull/152830 | -| | | | https://github.com/llvm/llvm-project/pull/152831) | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| loop flatten transformation | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| loop grid/tile modifiers for sizes clause | :none:`unclaimed` | :none:`unclaimed` | | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| attach map-type modifier | :part:`In Progress` | :none:`unclaimed` | C/C++: @abhinavgaba; | -| | | | RT: @abhinavgaba (https://github.com/llvm/llvm-project/pull/149036, | -| | | | https://github.com/llvm/llvm-project/pull/158370) | -+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ - - -OpenMP Extensions -================= - -The following table provides a quick overview over various OpenMP -extensions and their implementation status. These extensions are not -currently defined by any standard, so links to associated LLVM -documentation are provided. As these extensions mature, they will be -considered for standardization. Please post on the -`Discourse forums (Runtimes - OpenMP category)`_ to provide feedback. - -+------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+ -|Category | Feature | Status | Reviews | -+==============================+===================================================================================+==========================+========================================================+ -| atomic extension | `'atomic' strictly nested within 'teams' | :good:`prototyped` | D126323 | -| | `_ | | | -+------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+ -| device extension | `'ompx_hold' map type modifier | :good:`prototyped` | D106509, D106510 | -| | `_ | | | -+------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+ -| device extension | `'ompx_bare' clause on 'target teams' construct | :good:`prototyped` | #66844, #70612 | -| | `_ | | | -+------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+ -| device extension | Multi-dim 'num_teams' and 'thread_limit' clause on 'target teams ompx_bare' | :good:`partial` | #99732, #101407, #102715 | -| | construct | | | -+------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+ - -.. _Discourse forums (Runtimes - OpenMP category): https://discourse.llvm.org/c/runtimes/openmp/35 +.. raw:: html + + + +.. role:: none +.. role:: part +.. role:: good + +.. contents:: + :local: + +============== +OpenMP Support +============== + +Clang fully supports OpenMP 4.5, almost all of 5.0 and most of 5.1/2. +Clang supports offloading to X86_64, AArch64, PPC64[LE], NVIDIA GPUs (all models) and AMD GPUs (all models). + +In addition, the LLVM OpenMP runtime `libomp` supports the OpenMP Tools +Interface (OMPT) on x86, x86_64, AArch64, and PPC64 on Linux, Windows, and macOS. +OMPT is also supported for NVIDIA and AMD GPUs. + +For the list of supported features from OpenMP 5.0 and 5.1 +see `OpenMP implementation details`_ and `OpenMP 51 implementation details`_. + +General improvements +==================== +- New collapse clause scheme to avoid expensive remainder operations. + Compute loop index variables after collapsing a loop nest via the + collapse clause by replacing the expensive remainder operation with + multiplications and additions. + +- When using the collapse clause on a loop nest the default behavior + is to automatically extend the representation of the loop counter to + 64 bits for the cases where the sizes of the collapsed loops are not + known at compile time. To prevent this conservative choice and use + at most 32 bits, compile your program with the + `-fopenmp-optimistic-collapse`. + + +GPU devices support +=================== + +Data-sharing modes +------------------ + +Clang supports two data-sharing models for Cuda devices: `Generic` and `Cuda` +modes. The default mode is `Generic`. `Cuda` mode can give an additional +performance and can be activated using the `-fopenmp-cuda-mode` flag. In +`Generic` mode all local variables that can be shared in the parallel regions +are stored in the global memory. In `Cuda` mode local variables are not shared +between the threads and it is user responsibility to share the required data +between the threads in the parallel regions. Often, the optimizer is able to +reduce the cost of `Generic` mode to the level of `Cuda` mode, but the flag, +as well as other assumption flags, can be used for tuning. + +Features not supported or with limited support for Cuda devices +--------------------------------------------------------------- + +- Cancellation constructs are not supported. + +- Doacross loop nest is not supported. + +- User-defined reductions are supported only for trivial types. + +- Nested parallelism: inner parallel regions are executed sequentially. + +- Debug information for OpenMP target regions is supported, but sometimes it may + be required to manually specify the address class of the inspected variables. + In some cases the local variables are actually allocated in the global memory, + but the debug info may be not aware of it. + + +.. _OpenMP implementation details: + +OpenMP 5.0 Implementation Details +================================= + +The following table provides a quick overview over various OpenMP 5.0 features +and their implementation status. Please post on the +`Discourse forums (Runtimes - OpenMP category)`_ for more +information or if you want to help with the +implementation. + ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +|Category | Feature | Status | Reviews | ++==============================+==============================================================+==========================+=======================================================================+ +| loop | support != in the canonical loop form | :good:`done` | D54441 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| loop | #pragma omp loop (directive) | :part:`partial` | D145823 (combined forms) | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| loop | #pragma omp loop bind | :part:`worked on` | D144634 (needs review) | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| loop | collapse imperfectly nested loop | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| loop | collapse non-rectangular nested loop | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| loop | C++ range-base for loop | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| loop | clause: if for SIMD directives | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| loop | inclusive scan (matching C++17 PSTL) | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| memory management | memory allocators | :good:`done` | r341687,r357929 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| memory management | allocate directive and allocate clause | :good:`done` | r355614,r335952 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| OMPD | OMPD interfaces | :good:`done` | https://reviews.llvm.org/D99914 (Supports only HOST(CPU) and Linux | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| OMPT | OMPT interfaces (callback support) | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| thread affinity | thread affinity | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| task | taskloop reduction | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| task | task affinity | :part:`not upstream` | https://github.com/jklinkenberg/openmp/tree/task-affinity | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| task | clause: depend on the taskwait construct | :good:`done` | D113540 (regular codegen only) | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| task | depend objects and detachable tasks | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| task | mutexinoutset dependence-type for tasks | :good:`done` | D53380,D57576 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| task | combined taskloop constructs | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| task | master taskloop | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| task | parallel master taskloop | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| task | master taskloop simd | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| task | parallel master taskloop simd | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| SIMD | atomic and simd constructs inside SIMD code | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| SIMD | SIMD nontemporal | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | infer target functions from initializers | :part:`worked on` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | infer target variables from initializers | :good:`done` | D146418 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | OMP_TARGET_OFFLOAD environment variable | :good:`done` | D50522 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | support full 'defaultmap' functionality | :good:`done` | D69204 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | device specific functions | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | clause: device_type | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | clause: extended device | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | clause: uses_allocators clause | :good:`done` | https://github.com/llvm/llvm-project/pull/157025 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | clause: in_reduction | :part:`worked on` | r308768 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | omp_get_device_num() | :good:`done` | D54342,D128347 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | structure mapping of references | :none:`unclaimed` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | nested target declare | :good:`done` | D51378 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | implicitly map 'this' (this[:1]) | :good:`done` | D55982 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | allow access to the reference count (omp_target_is_present) | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | requires directive | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | clause: unified_shared_memory | :good:`done` | D52625,D52359 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | clause: unified_address | :part:`partial` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | clause: reverse_offload | :part:`partial` | D52780,D155003 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | clause: atomic_default_mem_order | :good:`done` | D53513 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | clause: dynamic_allocators | :part:`unclaimed parts` | D53079 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | user-defined mappers | :good:`done` | D56326,D58638,D58523,D58074,D60972,D59474 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | map array-section with implicit mapper | :good:`done` | https://github.com/llvm/llvm-project/pull/101101 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | mapping lambda expression | :good:`done` | D51107 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | clause: use_device_addr for target data | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | support close modifier on map clause | :good:`done` | D55719,D55892 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | teams construct on the host device | :good:`done` | r371553 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | support non-contiguous array sections for target update | :good:`done` | https://github.com/llvm/llvm-project/pull/144635 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | pointer attachment | :part:`being repaired` | @abhinavgaba (https://github.com/llvm/llvm-project/pull/153683) | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| atomic | hints for the atomic construct | :good:`done` | D51233 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| base language | C11 support | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| base language | C++11/14/17 support | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| base language | lambda support | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| misc | array shaping | :good:`done` | D74144 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| misc | library shutdown (omp_pause_resource[_all]) | :good:`done` | D55078 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| misc | metadirectives | :part:`mostly done` | D91944, https://github.com/llvm/llvm-project/pull/128640 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| misc | conditional modifier for lastprivate clause | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| misc | iterator and multidependences | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| misc | depobj directive and depobj dependency kind | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| misc | user-defined function variants | :good:`done`. | D67294, D64095, D71847, D71830, D109635 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| misc | pointer/reference to pointer based array reductions | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| misc | prevent new type definitions in clauses | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| memory model | memory model update (seq_cst, acq_rel, release, acquire,...) | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ + + +.. _OpenMP 51 implementation details: + +OpenMP 5.1 Implementation Details +================================= + +The following table provides a quick overview over various OpenMP 5.1 features +and their implementation status. +Please post on the +`Discourse forums (Runtimes - OpenMP category)`_ for more +information or if you want to help with the +implementation. + ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +|Category | Feature | Status | Reviews | ++==============================+==============================================================+==========================+=======================================================================+ +| atomic | 'compare' clause on atomic construct | :good:`done` | D120290, D120007, D118632, D120200, D116261, D118547, D116637 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| atomic | 'fail' clause on atomic construct | :part:`worked on` | D123235 (in progress) | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| base language | C++ attribute specifier syntax | :good:`done` | D105648 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | 'present' map type modifier | :good:`done` | D83061, D83062, D84422 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | 'present' motion modifier | :good:`done` | D84711, D84712 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | 'present' in defaultmap clause | :good:`done` | D92427 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | map clause reordering based on 'present' modifier | :none:`unclaimed` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | device-specific environment variables | :none:`unclaimed` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | omp_target_is_accessible routine | :good:`done` | https://github.com/llvm/llvm-project/pull/138294 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | omp_get_mapped_ptr routine | :good:`done` | D141545 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | new async target memory copy routines | :good:`done` | D136103 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | thread_limit clause on target construct | :part:`partial` | D141540 (offload), D152054 (host, in progress) | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | has_device_addr clause on target construct | :none:`unclaimed` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | iterators in map clause or motion clauses | :none:`unclaimed` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | indirect clause on declare target directive | :part:`In Progress` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | allow virtual functions calls for mapped object on device | :part:`partial` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | interop construct | :part:`partial` | parsing/sema done: D98558, D98834, D98815 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| device | assorted routines for querying interoperable properties | :part:`partial` | D106674 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| loop | Loop tiling transformation | :good:`done` | D76342 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| loop | Loop unrolling transformation | :good:`done` | D99459 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| loop | 'reproducible'/'unconstrained' modifiers in 'order' clause | :part:`partial` | D127855 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| memory management | alignment for allocate directive and clause | :good:`done` | D115683 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| memory management | 'allocator' modifier for allocate clause | :good:`done` | https://github.com/llvm/llvm-project/pull/114883 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| memory management | 'align' modifier for allocate clause | :good:`done` | https://github.com/llvm/llvm-project/pull/121814 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| memory management | new memory management routines | :none:`unclaimed` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| memory management | changes to omp_alloctrait_key enum | :none:`unclaimed` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| memory model | seq_cst clause on flush construct | :good:`done` | https://github.com/llvm/llvm-project/pull/114072 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| misc | 'omp_all_memory' keyword and use in 'depend' clause | :good:`done` | D125828, D126321 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| misc | error directive | :good:`done` | D139166 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| misc | scope construct | :good:`done` | D157933, https://github.com/llvm/llvm-project/pull/109197 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| misc | routines for controlling and querying team regions | :part:`partial` | D95003 (libomp only) | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| misc | changes to ompt_scope_endpoint_t enum | :none:`unclaimed` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| misc | omp_display_env routine | :good:`done` | D74956 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| misc | extended OMP_PLACES syntax | :none:`unclaimed` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| misc | OMP_NUM_TEAMS and OMP_TEAMS_THREAD_LIMIT env vars | :good:`done` | D138769 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| misc | 'target_device' selector in context specifier | :none:`worked on` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| misc | begin/end declare variant | :good:`done` | D71179 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| misc | dispatch construct and function variant argument adjustment | :part:`worked on` | D99537, D99679 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| misc | assumes directives | :part:`worked on` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| misc | assume directive | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| misc | nothing directive | :good:`done` | D123286 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| misc | masked construct and related combined constructs | :good:`done` | D99995, D100514, PR-121741(parallel_masked_taskloop) | +| | | | PR-121746(parallel_masked_task_loop_simd),PR-121914(masked_taskloop) | +| | | | PR-121916(masked_taskloop_simd) | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| misc | default(firstprivate) & default(private) | :good:`done` | D75591 (firstprivate), D125912 (private) | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| other | deprecating master construct | :none:`unclaimed` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| OMPT | new barrier types added to ompt_sync_region_t enum | :none:`unclaimed` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| OMPT | async data transfers added to ompt_target_data_op_t enum | :none:`unclaimed` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| OMPT | new barrier state values added to ompt_state_t enum | :none:`unclaimed` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| OMPT | new 'emi' callbacks for external monitoring interfaces | :good:`done` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| OMPT | device tracing interface | :none:`in progress` | jplehr | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| task | 'strict' modifier for taskloop construct | :none:`unclaimed` | | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| task | inoutset in depend clause | :good:`done` | D97085, D118383 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ +| task | nowait clause on taskwait | :part:`partial` | parsing/sema done: D131830, D141531 | ++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ + + +.. _OpenMP 5.2 implementation details: + +OpenMP 5.2 Implementation Details +================================= + +The following table provides a quick overview of various OpenMP 5.2 features +and their implementation status. Please post on the +`Discourse forums (Runtimes - OpenMP category)`_ for more +information or if you want to help with the +implementation. + + + ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +|Feature | C/C++ Status | Fortran Status | Reviews | ++=============================================================+===========================+===========================+==========================================================================+ +| omp_in_explicit_task() | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| semantics of explicit_task_var and implicit_task_var | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| ompx sentinel for C/C++ directive extensions | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| ompx prefix for clause extensions | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| if clause on teams construct | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| step modifier added | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| declare mapper: Add iterator modifier on map clause | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| declare mapper: Add iterator modifier on map clause | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| memspace and traits modifiers to uses allocator i | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Add otherwise clause to metadirectives | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| doacross clause with support for omp_cur_iteration | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| position of interop_type in init clause on iterop | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| implicit map type for target enter/exit data | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| work OMPT type for work-sharing loop constructs | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| allocate and firstprivate on scope directive | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Change loop consistency for order clause | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Add memspace and traits modifiers to uses_allocators | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Keep original base pointer on map w/o matched candidate | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Pure procedure support for certain directives | :none:`N/A` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| ALLOCATE statement support for allocators | :none:`N/A` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| dispatch construct extension to support end directive | :none:`N/A` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ + + + +.. _OpenMP 5.2 Deprecations: + +OpenMP 5.2 Deprecations +======================= + + + ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| | C/C++ Status | Fortran Status | Reviews | ++=============================================================+===========================+===========================+==========================================================================+ +| Linear clause syntax | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| The minus operator | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Map clause modifiers without commas | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| The use of allocate directives with ALLOCATE statement | :good:`N/A` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| uses_allocators list syntax | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| The default clause on metadirectives | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| The delimited form of the declare target directive | :none:`unclaimed` | :good:`N/A` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| The use of the to clause on the declare target directive | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| The syntax of the destroy clause on the depobj construct | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| keyword source and sink as task-dependence modifiers | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| interop types in any position on init clause of interop | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| ompd prefix usage for some ICVs | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ + +.. _OpenMP 6.0 implementation details: + +OpenMP 6.0 Implementation Details +================================= + +The following table provides a quick overview of various OpenMP 6.0 features +and their implementation status. Please post on the +`Discourse forums (Runtimes - OpenMP category)`_ for more +information or if you want to help with the +implementation. + + ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +|Feature | C/C++ Status | Fortran Status | Reviews | ++=============================================================+===========================+===========================+==========================================================================+ +| free-agent threads | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| threadset clause | :part:`partial` | :none:`unclaimed` | Parse/Sema/Codegen : https://github.com/llvm/llvm-project/pull/13580 | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Recording of task graphs | :part:`in progress` | :part:`in progress` | clang: jtb20, flang: kparzysz | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Parallel inductions | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| init_complete for scan directive | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| loop interchange transformation | :good:`done` | :none:`unclaimed` | Clang (interchange): https://github.com/llvm/llvm-project/pull/93022 | +| | | | Clang (permutation): https://github.com/llvm/llvm-project/pull/92030 | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| loop reverse transformation | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/92916 | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| loop stripe transformation | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/119891 | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| loop fusion transformation | :part:`in progress` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/139293 | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| loop index set splitting transformation | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| loop transformation apply clause | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| loop fuse transformation | :good:`done` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| workdistribute construct | | :none:`in progress` | @skc7, @mjklemm | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| task_iteration | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| memscope clause for atomic and flush | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| transparent clause (hull tasks) | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| rule-based compound directives | :part:`In Progress` | :part:`In Progress` | kparzysz | +| | | | Testing for Fortran missing | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| C23, C++23 | :none:`unclaimed` | | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Fortran 2023 | | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| decl attribute for declarative directives | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| C attribute syntax | :none:`unclaimed` | | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| pure directives in DO CONCURRENT | | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Optional argument for all clauses | :none:`partial` | :none:`In Progress` | Parse/Sema (nowait): https://github.com/llvm/llvm-project/pull/159628 | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Function references for locator list items | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| All clauses accept directive name modifier | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Extensions to depobj construct | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Extensions to atomic construct | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Private reductions | :good:`mostly` | :none:`unclaimed` | Parse/Sema:https://github.com/llvm/llvm-project/pull/129938 | +| | | | Codegen: https://github.com/llvm/llvm-project/pull/134709 | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Self maps | :part:`partial` | :none:`unclaimed` | parsing/sema done: https://github.com/llvm/llvm-project/pull/129888 | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Release map type for declare mapper | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Extensions to interop construct | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| no_openmp_constructs | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/125933 | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| safe_sync and progress with identifier and API | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| OpenMP directives in concurrent loop regions | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/125621 | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| atomics constructs on concurrent loop regions | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/125621 | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Loop construct with DO CONCURRENT | | :part:`In Progress` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| device_type clause for target construct | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| nowait for ancestor target directives | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| New API for devices' num_teams/thread_limit | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Host and device environment variables | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| num_threads ICV and clause accepts list | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Numeric names for environment variables | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Increment between places for OMP_PLACES | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| OMP_AVAILABLE_DEVICES envirable | :none:`unclaimed` | :none:`unclaimed` | (should wait for "Traits for default device envirable" being done) | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Traits for default device envirable | :part:`in progress` | :none:`unclaimed` | ro-i | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Optionally omit array length expression | :good:`done` | :none:`unclaimed` | (Parse) https://github.com/llvm/llvm-project/pull/148048, | +| | | | (Sema) https://github.com/llvm/llvm-project/pull/152786 | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Canonical loop sequences | :part:`in progress` | :part:`in progress` | Clang: https://github.com/llvm/llvm-project/pull/139293 | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Clarifications to Fortran map semantics | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| default clause at target construct | :part:`In Progress` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| ref count update use_device_{ptr, addr} | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Clarifications to implicit reductions | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| ref modifier for map clauses | :part:`In Progress` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| map-type modifiers in arbitrary position | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/90499 | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Lift nesting restriction on concurrent loop | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/125621 | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| priority clause for target constructs | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| changes to target_data construct | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Non-const do_not_sync for nowait/nogroup | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| need_device_addr modifier for adjust_args clause | :part:`partial` | :none:`unclaimed` | Parsing/Sema: https://github.com/llvm/llvm-project/pull/143442 | +| | | | https://github.com/llvm/llvm-project/pull/149586 | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Prescriptive num_threads | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/160659 | +| | | | https://github.com/llvm/llvm-project/pull/146403 | +| | | | https://github.com/llvm/llvm-project/pull/146404 | +| | | | https://github.com/llvm/llvm-project/pull/146405 | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Message and severity clauses | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/146093 | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Local clause on declare target | :part:`In Progress` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| groupprivate directive | :part:`In Progress` | :part:`partial` | Flang: kparzysz, mjklemm | +| | | | | +| | | | Flang parser: https://github.com/llvm/llvm-project/pull/153807 | +| | | | Flang sema: https://github.com/llvm/llvm-project/pull/154779 | +| | | | Clang parse/sema: https://github.com/llvm/llvm-project/pull/158134 | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| variable-category on default clause | :good:`done` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| Changes to omp_target_is_accessible | :part:`In Progress` | :part:`In Progress` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| defaultmap implicit-behavior 'storage' | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/158336 | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| defaultmap implicit-behavior 'private' | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/158712 | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ + +.. _OpenMP 6.1 implementation details: + +OpenMP 6.1 Implementation Details (Experimental) +================================================ + +The following table provides a quick overview over various OpenMP 6.1 features +and their implementation status. Since OpenMP 6.1 has not yet been released, the +following features are experimental and are subject to change at any time. +Please post on the `Discourse forums (Runtimes - OpenMP category)`_ for more +information or if you want to help with the +implementation. + ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +|Feature | C/C++ Status | Fortran Status | Reviews | ++=============================================================+===========================+===========================+==========================================================================+ +| dyn_groupprivate clause | :part:`In Progress` | :part:`In Progress` | C/C++: kevinsala (https://github.com/llvm/llvm-project/pull/152651 | +| | | | https://github.com/llvm/llvm-project/pull/152830 | +| | | | https://github.com/llvm/llvm-project/pull/152831) | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| loop flatten transformation | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| loop grid/tile modifiers for sizes clause | :none:`unclaimed` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| attach map-type modifier | :part:`In Progress` | :none:`unclaimed` | C/C++: @abhinavgaba; | +| | | | RT: @abhinavgaba (https://github.com/llvm/llvm-project/pull/149036, | +| | | | https://github.com/llvm/llvm-project/pull/158370) | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ + + +OpenMP Extensions +================= + +The following table provides a quick overview over various OpenMP +extensions and their implementation status. These extensions are not +currently defined by any standard, so links to associated LLVM +documentation are provided. As these extensions mature, they will be +considered for standardization. Please post on the +`Discourse forums (Runtimes - OpenMP category)`_ to provide feedback. + ++------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+ +|Category | Feature | Status | Reviews | ++==============================+===================================================================================+==========================+========================================================+ +| atomic extension | `'atomic' strictly nested within 'teams' | :good:`prototyped` | D126323 | +| | `_ | | | ++------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+ +| device extension | `'ompx_hold' map type modifier | :good:`prototyped` | D106509, D106510 | +| | `_ | | | ++------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+ +| device extension | `'ompx_bare' clause on 'target teams' construct | :good:`prototyped` | #66844, #70612 | +| | `_ | | | ++------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+ +| device extension | Multi-dim 'num_teams' and 'thread_limit' clause on 'target teams ompx_bare' | :good:`partial` | #99732, #101407, #102715 | +| | construct | | | ++------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+ + +.. _Discourse forums (Runtimes - OpenMP category): https://discourse.llvm.org/c/runtimes/openmp/35 diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index add1582344a0e..8435f367029a5 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -661,6 +661,7 @@ OpenMP Support modifier in the ``adjust_args`` clause. - Allow array length to be omitted in array section subscript expression. - Fixed non-contiguous strided update in the ``omp target update`` directive with the ``from`` clause. +- Added support for threadset clause in task and taskloop directives. - Properly handle array section/assumed-size array privatization in C/C++. - Added support to handle new syntax of the ``uses_allocators`` clause. - Added support for ``variable-category`` modifier in ``default clause``. diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index bc791e46e7c92..4f507485968cd 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -1424,6 +1424,86 @@ class OMPDefaultClause : public OMPClause { } }; +/// This represents 'threadset' clause in the '#pragma omp task ...' directive. +/// +/// \code +/// #pragma omp task threadset(omp_pool) +/// \endcode +/// In this example directive '#pragma omp task' has simple 'threadset' +/// clause with kind 'omp_pool'. +class OMPThreadsetClause final : public OMPClause { + friend class OMPClauseReader; + + /// Location of '('. + SourceLocation LParenLoc; + + /// A kind of the 'threadset' clause. + OpenMPThreadsetKind Kind = OMPC_THREADSET_unknown; + + /// Start location of the kind in source code. + SourceLocation KindLoc; + + /// Set kind of the clauses. + /// + /// \param K Argument of clause. + void setThreadsetKind(OpenMPThreadsetKind K) { Kind = K; } + + /// Set argument location. + /// + /// \param KLoc Argument location. + void setThreadsetKindLoc(SourceLocation KLoc) { KindLoc = KLoc; } + +public: + /// Build 'threadset' clause with argument \a A ('omp_team' or 'omp_pool'). + /// + /// \param A Argument of the clause ('omp_team' or 'omp_pool'). + /// \param ALoc Starting location of the argument. + /// \param StartLoc Starting location of the clause. + /// \param LParenLoc Location of '('. + /// \param EndLoc Ending location of the clause. + OMPThreadsetClause(OpenMPThreadsetKind A, SourceLocation ALoc, + SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation EndLoc) + : OMPClause(llvm::omp::OMPC_threadset, StartLoc, EndLoc), + LParenLoc(LParenLoc), Kind(A), KindLoc(ALoc) {} + + /// Build an empty clause. + OMPThreadsetClause() + : OMPClause(llvm::omp::OMPC_threadset, SourceLocation(), + SourceLocation()) {} + + /// Sets the location of '('. + void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } + + /// Returns the location of '('. + SourceLocation getLParenLoc() const { return LParenLoc; } + + /// Returns kind of the clause. + OpenMPThreadsetKind getThreadsetKind() const { return Kind; } + + /// Returns location of clause kind. + SourceLocation getThreadsetKindLoc() const { return KindLoc; } + + child_range children() { + return child_range(child_iterator(), child_iterator()); + } + + const_child_range children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + child_range used_children() { + return child_range(child_iterator(), child_iterator()); + } + const_child_range used_children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + static bool classof(const OMPClause *T) { + return T->getClauseKind() == llvm::omp::OMPC_threadset; + } +}; + /// This represents 'proc_bind' clause in the '#pragma omp ...' /// directive. /// diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 32b2b6bdb989c..8cb0a657023b4 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3523,6 +3523,12 @@ bool RecursiveASTVisitor::VisitOMPDefaultClause(OMPDefaultClause *) { return true; } +template +bool RecursiveASTVisitor::VisitOMPThreadsetClause( + OMPThreadsetClause *) { + return true; +} + template bool RecursiveASTVisitor::VisitOMPProcBindClause(OMPProcBindClause *) { return true; diff --git a/clang/include/clang/Basic/OpenMPKinds.def b/clang/include/clang/Basic/OpenMPKinds.def index 202d06fa1fcaa..328a0747a82a8 100644 --- a/clang/include/clang/Basic/OpenMPKinds.def +++ b/clang/include/clang/Basic/OpenMPKinds.def @@ -98,6 +98,9 @@ #ifndef OPENMP_ALLOCATE_MODIFIER #define OPENMP_ALLOCATE_MODIFIER(Name) #endif +#ifndef OPENMP_THREADSET_KIND +#define OPENMP_THREADSET_KIND(Name) +#endif // Static attributes for 'schedule' clause. OPENMP_SCHEDULE_KIND(static) @@ -255,6 +258,9 @@ OPENMP_DOACROSS_MODIFIER(sink) OPENMP_DOACROSS_MODIFIER(sink_omp_cur_iteration) OPENMP_DOACROSS_MODIFIER(source_omp_cur_iteration) +OPENMP_THREADSET_KIND(omp_pool) +OPENMP_THREADSET_KIND(omp_team) + #undef OPENMP_NUMTASKS_MODIFIER #undef OPENMP_NUMTHREADS_MODIFIER #undef OPENMP_GRAINSIZE_MODIFIER @@ -284,4 +290,4 @@ OPENMP_DOACROSS_MODIFIER(source_omp_cur_iteration) #undef OPENMP_DEFAULTMAP_MODIFIER #undef OPENMP_DOACROSS_MODIFIER #undef OPENMP_ALLOCATE_MODIFIER - +#undef OPENMP_THREADSET_KIND diff --git a/clang/include/clang/Basic/OpenMPKinds.h b/clang/include/clang/Basic/OpenMPKinds.h index ed89a31e2684b..c9ddbcd6d46c1 100644 --- a/clang/include/clang/Basic/OpenMPKinds.h +++ b/clang/include/clang/Basic/OpenMPKinds.h @@ -250,6 +250,13 @@ enum OpenMPAllocateClauseModifier { OMPC_ALLOCATE_unknown }; +/// OpenMP modifiers for 'threadset' clause. +enum OpenMPThreadsetKind { +#define OPENMP_THREADSET_KIND(Name) OMPC_THREADSET_##Name, +#include "clang/Basic/OpenMPKinds.def" + OMPC_THREADSET_unknown +}; + /// Number of allowed allocate-modifiers. static constexpr unsigned NumberOfOMPAllocateClauseModifiers = OMPC_ALLOCATE_unknown; diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index f9baeed03c347..ba12b403d9b9a 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -975,6 +975,12 @@ class SemaOpenMP : public SemaBase { OpenMPDefaultClauseVariableCategory VCKind, SourceLocation VCKindLoc, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc); + /// Called on well-formed 'threadset' clause. + OMPClause *ActOnOpenMPThreadsetClause(OpenMPThreadsetKind Kind, + SourceLocation KindLoc, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); /// Called on well-formed 'proc_bind' clause. OMPClause *ActOnOpenMPProcBindClause(llvm::omp::ProcBindKind Kind, SourceLocation KindLoc, diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 791df7ee1c3d4..59d94590e04d1 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -124,6 +124,7 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) { case OMPC_nowait: case OMPC_untied: case OMPC_mergeable: + case OMPC_threadset: case OMPC_threadprivate: case OMPC_groupprivate: case OMPC_flush: @@ -2035,6 +2036,13 @@ void OMPClausePrinter::VisitOMPDefaultClause(OMPDefaultClause *Node) { OS << ")"; } +void OMPClausePrinter::VisitOMPThreadsetClause(OMPThreadsetClause *Node) { + OS << "threadset(" + << getOpenMPSimpleClauseTypeName(OMPC_threadset, + unsigned(Node->getThreadsetKind())) + << ")"; +} + void OMPClausePrinter::VisitOMPProcBindClause(OMPProcBindClause *Node) { OS << "proc_bind(" << getOpenMPSimpleClauseTypeName(OMPC_proc_bind, diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 05b64ccda0d01..c909e1bcecd38 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -546,6 +546,8 @@ void OMPClauseProfiler::VisitOMPNocontextClause(const OMPNocontextClause *C) { void OMPClauseProfiler::VisitOMPDefaultClause(const OMPDefaultClause *C) { } +void OMPClauseProfiler::VisitOMPThreadsetClause(const OMPThreadsetClause *C) {} + void OMPClauseProfiler::VisitOMPProcBindClause(const OMPProcBindClause *C) { } void OMPClauseProfiler::VisitOMPUnifiedAddressClause( diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 64b2bff063340..3d41f2d197b81 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -210,6 +210,15 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str, #define OPENMP_ALLOCATE_MODIFIER(Name) .Case(#Name, OMPC_ALLOCATE_##Name) #include "clang/Basic/OpenMPKinds.def" .Default(OMPC_ALLOCATE_unknown); + case OMPC_threadset: { + unsigned Type = llvm::StringSwitch(Str) +#define OPENMP_THREADSET_KIND(Name) .Case(#Name, OMPC_THREADSET_##Name) +#include "clang/Basic/OpenMPKinds.def" + .Default(OMPC_THREADSET_unknown); + if (LangOpts.OpenMP < 60) + return OMPC_THREADSET_unknown; + return Type; + } case OMPC_num_threads: { unsigned Type = llvm::StringSwitch(Str) #define OPENMP_NUMTHREADS_MODIFIER(Name) .Case(#Name, OMPC_NUMTHREADS_##Name) @@ -565,6 +574,16 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind, #include "clang/Basic/OpenMPKinds.def" } llvm_unreachable("Invalid OpenMP 'num_threads' clause modifier"); + case OMPC_threadset: + switch (Type) { + case OMPC_THREADSET_unknown: + return "unknown"; +#define OPENMP_THREADSET_KIND(Name) \ + case OMPC_THREADSET_##Name: \ + return #Name; +#include "clang/Basic/OpenMPKinds.def" + } + llvm_unreachable("Invalid OpenMP 'threadset' clause modifier"); case OMPC_unknown: case OMPC_threadprivate: case OMPC_groupprivate: diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 66fea920812c2..121de42248e3b 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -3731,6 +3731,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, DestructorsFlag = 0x8, PriorityFlag = 0x20, DetachableFlag = 0x40, + FreeAgentFlag = 0x80, }; unsigned Flags = Data.Tied ? TiedFlag : 0; bool NeedsCleanup = false; @@ -3740,6 +3741,11 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, if (NeedsCleanup) Flags = Flags | DestructorsFlag; } + if (const auto *Clause = D.getSingleClause()) { + OpenMPThreadsetKind Kind = Clause->getThreadsetKind(); + if (Kind == OMPC_THREADSET_omp_pool) + Flags = Flags | FreeAgentFlag; + } if (Data.Priority.getInt()) Flags = Flags | PriorityFlag; if (D.hasClausesOfKind()) diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 25199c739ace9..31bc941e6a015 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -3221,6 +3221,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, else Clause = ParseOpenMPSingleExprClause(CKind, WrongDirective); break; + case OMPC_threadset: case OMPC_fail: case OMPC_proc_bind: case OMPC_atomic_default_mem_order: diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 6d5cb0fcaea24..256f9521b3a7e 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -17216,6 +17216,10 @@ OMPClause *SemaOpenMP::ActOnOpenMPSimpleClause( static_cast(Argument), ArgumentLoc, StartLoc, LParenLoc, EndLoc); break; + case OMPC_threadset: + Res = ActOnOpenMPThreadsetClause(static_cast(Argument), + ArgumentLoc, StartLoc, LParenLoc, EndLoc); + break; case OMPC_if: case OMPC_final: case OMPC_num_threads: @@ -17355,6 +17359,23 @@ OMPClause *SemaOpenMP::ActOnOpenMPDefaultClause( OMPDefaultClause(M, MLoc, VCKind, VCKindLoc, StartLoc, LParenLoc, EndLoc); } +OMPClause *SemaOpenMP::ActOnOpenMPThreadsetClause(OpenMPThreadsetKind Kind, + SourceLocation KindLoc, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { + if (Kind == OMPC_THREADSET_unknown) { + Diag(KindLoc, diag::err_omp_unexpected_clause_value) + << getListOfPossibleValues(OMPC_threadset, /*First=*/0, + /*Last=*/unsigned(OMPC_THREADSET_unknown)) + << getOpenMPClauseName(OMPC_threadset); + return nullptr; + } + + return new (getASTContext()) + OMPThreadsetClause(Kind, KindLoc, StartLoc, LParenLoc, EndLoc); +} + OMPClause *SemaOpenMP::ActOnOpenMPProcBindClause(ProcBindKind Kind, SourceLocation KindKwLoc, SourceLocation StartLoc, diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 0c8c1d18d317e..8c20078e97a13 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -10622,6 +10622,13 @@ TreeTransform::TransformOMPDefaultClause(OMPDefaultClause *C) { C->getEndLoc()); } +template +OMPClause * +TreeTransform::TransformOMPThreadsetClause(OMPThreadsetClause *C) { + // No need to rebuild this clause, no template-dependent parameters. + return C; +} + template OMPClause * TreeTransform::TransformOMPProcBindClause(OMPProcBindClause *C) { diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index c1b5cb730e4a4..e3106f8d8e13c 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11255,6 +11255,9 @@ OMPClause *OMPClauseReader::readClause() { case llvm::omp::OMPC_mergeable: C = new (Context) OMPMergeableClause(); break; + case llvm::omp::OMPC_threadset: + C = new (Context) OMPThreadsetClause(); + break; case llvm::omp::OMPC_read: C = new (Context) OMPReadClause(); break; @@ -11658,6 +11661,17 @@ void OMPClauseReader::VisitOMPDefaultClause(OMPDefaultClause *C) { C->setDefaultVariableCategoryLocation(Record.readSourceLocation()); } +// Read the parameter of threadset clause. This will have been saved when +// OMPClauseWriter is called. +void OMPClauseReader::VisitOMPThreadsetClause(OMPThreadsetClause *C) { + C->setLParenLoc(Record.readSourceLocation()); + SourceLocation ThreadsetKindLoc = Record.readSourceLocation(); + C->setThreadsetKindLoc(ThreadsetKindLoc); + OpenMPThreadsetKind TKind = + static_cast(Record.readInt()); + C->setThreadsetKind(TKind); +} + void OMPClauseReader::VisitOMPProcBindClause(OMPProcBindClause *C) { C->setProcBindKind(static_cast(Record.readInt())); C->setLParenLoc(Record.readSourceLocation()); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 377e3966874f3..3ac338e013deb 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -7913,6 +7913,12 @@ void OMPClauseWriter::VisitOMPDefaultClause(OMPDefaultClause *C) { Record.AddSourceLocation(C->getDefaultVCLoc()); } +void OMPClauseWriter::VisitOMPThreadsetClause(OMPThreadsetClause *C) { + Record.AddSourceLocation(C->getLParenLoc()); + Record.AddSourceLocation(C->getThreadsetKindLoc()); + Record.writeEnum(C->getThreadsetKind()); +} + void OMPClauseWriter::VisitOMPProcBindClause(OMPProcBindClause *C) { Record.push_back(unsigned(C->getProcBindKind())); Record.AddSourceLocation(C->getLParenLoc()); diff --git a/clang/test/OpenMP/task_ast_print.cpp b/clang/test/OpenMP/task_ast_print.cpp index 30fb7ab75cc87..b059f187156ee 100644 --- a/clang/test/OpenMP/task_ast_print.cpp +++ b/clang/test/OpenMP/task_ast_print.cpp @@ -1,8 +1,10 @@ // RUN: %clang_cc1 -verify -Wno-vla -fopenmp -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -verify -Wno-vla -fopenmp -fopenmp-version=60 -DOMP60 -ast-print %s | FileCheck %s --check-prefix=CHECK60 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -verify -Wno-vla %s -ast-print | FileCheck %s // RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -fopenmp-version=60 -DOMP60 -ast-print %s | FileCheck %s --check-prefix=CHECK60 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -verify -Wno-vla %s -ast-print | FileCheck %s // RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -ast-dump %s | FileCheck %s --check-prefix=DUMP @@ -101,8 +103,8 @@ T tmain(T argc, T *argv) { a = 2; #pragma omp task default(none), private(argc, b) firstprivate(argv) shared(d) if (argc > 0) final(S::TS > 0) priority(argc) affinity(argc, argv[b:argc], arr[:], ([argc][sizeof(T)])argv) foo(); -#pragma omp taskgroup task_reduction(-: argc) -#pragma omp task if (C) mergeable priority(C) in_reduction(-: argc) +#pragma omp taskgroup task_reduction(+: argc) +#pragma omp task if (C) mergeable priority(C) in_reduction(+: argc) foo(); return 0; } @@ -119,8 +121,8 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: a = 2; // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S::TS > 0) priority(argc) affinity(argc,argv[b:argc],arr[:],([argc][sizeof(T)])argv) // CHECK-NEXT: foo() -// CHECK-NEXT: #pragma omp taskgroup task_reduction(-: argc) -// CHECK-NEXT: #pragma omp task if(C) mergeable priority(C) in_reduction(-: argc) +// CHECK-NEXT: #pragma omp taskgroup task_reduction(+: argc) +// CHECK-NEXT: #pragma omp task if(C) mergeable priority(C) in_reduction(+: argc) // CHECK-NEXT: foo() // CHECK: template<> int tmain(int argc, int *argv) { // CHECK-NEXT: int b = argc, c, d, e, f, g; @@ -134,8 +136,8 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: a = 2; // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S::TS > 0) priority(argc) affinity(argc,argv[b:argc],arr[:],([argc][sizeof(int)])argv) // CHECK-NEXT: foo() -// CHECK-NEXT: #pragma omp taskgroup task_reduction(-: argc) -// CHECK-NEXT: #pragma omp task if(5) mergeable priority(5) in_reduction(-: argc) +// CHECK-NEXT: #pragma omp taskgroup task_reduction(+: argc) +// CHECK-NEXT: #pragma omp task if(5) mergeable priority(5) in_reduction(+: argc) // CHECK-NEXT: foo() // CHECK: template<> long tmain(long argc, long *argv) { // CHECK-NEXT: long b = argc, c, d, e, f, g; @@ -149,8 +151,8 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: a = 2; // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S::TS > 0) priority(argc) affinity(argc,argv[b:argc],arr[:],([argc][sizeof(long)])argv) // CHECK-NEXT: foo() -// CHECK-NEXT: #pragma omp taskgroup task_reduction(-: argc) -// CHECK-NEXT: #pragma omp task if(1) mergeable priority(1) in_reduction(-: argc) +// CHECK-NEXT: #pragma omp taskgroup task_reduction(+: argc) +// CHECK-NEXT: #pragma omp task if(1) mergeable priority(1) in_reduction(+: argc) // CHECK-NEXT: foo() enum Enum {}; @@ -199,6 +201,14 @@ int main(int argc, char **argv) { #pragma omp task depend(inout: omp_all_memory) foo(); // CHECK-NEXT: foo(); +#ifdef OMP60 +#pragma omp task threadset(omp_pool) +#pragma omp task threadset(omp_team) + foo(); +#endif + // CHECK60: #pragma omp task threadset(omp_pool) + // CHECK60: #pragma omp task threadset(omp_team) + // CHECK60-NEXT: foo(); return tmain(b, &b) + tmain(x, &x); } diff --git a/clang/test/OpenMP/task_codegen.cpp b/clang/test/OpenMP/task_codegen.cpp index c3e6d9e6b1cf7..ba8e6945de9d0 100644 --- a/clang/test/OpenMP/task_codegen.cpp +++ b/clang/test/OpenMP/task_codegen.cpp @@ -41,6 +41,9 @@ // RUN: -emit-llvm -o - -DOMP51 | FileCheck %s \ // RUN: --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -verify -Wno-vla -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=60 -DOMP60 -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK6 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -DOMP60 -fopenmp-enable-irbuilder -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -DOMP60 -fopenmp-enable-irbuilder -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK6 // expected-no-diagnostics #ifndef HEADER @@ -65,6 +68,7 @@ struct S { S(const S &s) : a(s.a) {} ~S() {} }; + int a; int main() { char b; @@ -147,6 +151,7 @@ int main() { + // s1 = S(); @@ -215,6 +220,19 @@ void test_omp_all_memory() } } #endif // OMP51 + +#ifdef OMP60 +void test_threadset() +{ +#pragma omp task threadset(omp_team) + { + } +#pragma omp task threadset(omp_pool) + { + } +} +#endif // OMP60 + #endif // CHECK1-LABEL: define {{[^@]+}}@main // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { @@ -10243,3 +10261,18 @@ void test_omp_all_memory() // CHECK4-51-NEXT: call void @__cxx_global_var_init() // CHECK4-51-NEXT: ret void // +// CHECK6-LABEL: define void @_Z14test_threadsetv() +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 1 +// CHECK6-NEXT: [[AGG_CAPTURED2:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 1 +// CHECK6-NEXT: call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR:[0-9]+]]) +// CHECK6-NEXT: [[TMP0:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %omp_global_thread_num, i32 1, i64 40, i64 1, ptr @.omp_task_entry..[[ENTRY1:[0-9]+]]) +// CHECK6-NEXT: getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %0, i32 0, i32 0 +// CHECK6-NEXT: call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR:[0-9]+]]) +// CHECK6-NEXT: call i32 @__kmpc_omp_task(ptr @1, i32 %omp_global_thread_num1, ptr %0) +// CHECK6-NEXT: call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR2:[0-9]+]]) +// CHECK6-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %omp_global_thread_num3, i32 129, i64 40, i64 1, ptr @.omp_task_entry..[[ENTRY2:[0-9]+]]) +// CHECK6-NEXT: getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %3, i32 0, i32 0 +// CHECK6-NEXT: call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR2:[0-9]+]]) +// CHECK6-NEXT: call i32 @__kmpc_omp_task(ptr @1, i32 %omp_global_thread_num4, ptr %3) +// CHECK6-NEXT: ret void diff --git a/clang/test/OpenMP/task_threadset_messages.cpp b/clang/test/OpenMP/task_threadset_messages.cpp new file mode 100755 index 0000000000000..f553a2da17ab9 --- /dev/null +++ b/clang/test/OpenMP/task_threadset_messages.cpp @@ -0,0 +1,99 @@ +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected,omp51 -fopenmp -fopenmp-version=51 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected -DOMP60 -fopenmp -fopenmp-version=60 -std=c++11 -ferror-limit 200 -o - %s + +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected,omp51 -fopenmp-simd -fopenmp-version=51 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected -DOMP60 -fopenmp-simd -fopenmp-version=60 -std=c++11 -ferror-limit 200 -o - %s + +#ifdef OMP60 +struct ComplexStruct { + int data[10]; + struct InnerStruct { + float value; + } inner; +}; + +// Template class with member functions using 'threadset'. +template +class TemplateClass { +public: + void foo() { + #pragma omp task threadset(omp_pool) + { + T temp; + } + } + void bar() { + #pragma omp taskloop threadset(omp_team) + for (int i = 0; i < 10; ++i) {} + } +}; + +// Valid uses of 'threadset' with 'omp_pool' and 'omp_team' in task directive. +void test_task_threadset_valid() { + int a; + #pragma omp task threadset(omp_pool) + #pragma omp task threadset(omp_team) + #pragma omp task threadset(omp_pool) if(1) + #pragma omp task threadset(omp_team) priority(5) + #pragma omp task threadset(omp_pool) depend(out: a) + #pragma omp parallel + { + #pragma omp task threadset(omp_pool) + { + #pragma omp taskloop threadset(omp_team) + for (int i = 0; i < 5; ++i) {} + } + } + + TemplateClass obj; + obj.foo(); + obj.bar(); +} + +// Invalid uses of 'threadset' with incorrect arguments in task directive. +void test_task_threadset_invalid_args() { + #pragma omp task threadset(invalid_arg) // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} + #pragma omp task threadset(123) // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} + #pragma omp task threadset(omp_pool, omp_team) // expected-error {{expected ')'}} expected-note {{to match this '('}} + #pragma omp task threadset() // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} + {} +} + +// Valid uses of 'threadset' with 'omp_pool' and 'omp_team' in taskloop directive. +void test_taskloop_threadset_valid() { + #pragma omp taskloop threadset(omp_pool) + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset(omp_team) + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset(omp_pool) grainsize(5) + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset(omp_team) num_tasks(2) + for (int i = 0; i < 10; ++i) {} +} + +// Invalid uses of 'threadset' with incorrect arguments in taskloop directive. +void test_taskloop_threadset_invalid_args() { + #pragma omp taskloop threadset(invalid_arg) // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset(123) // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset(omp_pool, omp_team) // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset() // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} + for (int i = 0; i < 10; ++i) {} +} + +#else +void test_threadset_not_supported() { + #pragma omp task threadset(omp_pool) // omp45-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} omp50-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} omp51-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} + #pragma omp task threadset(omp_team) // omp45-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} omp50-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} omp51-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} + #pragma omp taskloop threadset(omp_team) // omp45-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} omp50-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} omp51-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset(omp_pool) // omp45-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} omp50-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} omp51-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} + for (int i = 0; i < 10; ++i) {} +} +#endif diff --git a/clang/test/OpenMP/taskloop_ast_print.cpp b/clang/test/OpenMP/taskloop_ast_print.cpp index 1b6d7240fa66c..e4bf20af5d78e 100644 --- a/clang/test/OpenMP/taskloop_ast_print.cpp +++ b/clang/test/OpenMP/taskloop_ast_print.cpp @@ -1,8 +1,10 @@ // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -DOMP60 -ast-print %s | FileCheck %s --check-prefix=CHECK60 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -verify %s -ast-print | FileCheck %s // RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=60 -DOMP60 -ast-print %s | FileCheck %s --check-prefix=CHECK60 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -verify %s -ast-print | FileCheck %s // expected-no-diagnostics @@ -87,6 +89,20 @@ int main(int argc, char **argv) { // CHECK-NEXT: #pragma omp cancel taskgroup // CHECK-NEXT: #pragma omp cancellation point taskgroup // CHECK-NEXT: foo(); +#ifdef OMP60 +#pragma omp taskloop threadset(omp_team) + for (int i = 0; i < 10; ++i) { +#pragma omp taskloop threadset(omp_pool) + for (int j = 0; j < 10; ++j) { + foo(); + } +} +#endif + // CHECK60: #pragma omp taskloop threadset(omp_team) + // CHECK60-NEXT: for (int i = 0; i < 10; ++i) { + // CHECK60: #pragma omp taskloop threadset(omp_pool) + // CHECK60-NEXT: for (int j = 0; j < 10; ++j) { + // CHECK60-NEXT: foo(); return (tmain(argc) + tmain(argv[0][0])); } diff --git a/clang/test/OpenMP/taskloop_codegen.cpp b/clang/test/OpenMP/taskloop_codegen.cpp index 69f8d3b160bfd..d1197607a2684 100644 --- a/clang/test/OpenMP/taskloop_codegen.cpp +++ b/clang/test/OpenMP/taskloop_codegen.cpp @@ -5,7 +5,12 @@ // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s + // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} + +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK6 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK6 // expected-no-diagnostics #ifndef HEADER #define HEADER @@ -241,4 +246,52 @@ void taskloop_with_class() { } } +#ifdef OMP60 +void test_threadset() +{ +#pragma omp taskloop threadset(omp_team) + for (int i = 0; i < 10; ++i) { + } +#pragma omp taskloop threadset(omp_pool) + for (int i = 0; i < 10; ++i) { + } +} +#endif // OMP60 +// CHECK6-LABEL: define void @_Z14test_threadsetv() +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 1 +// CHECK6-NEXT: %[[TMP:.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 1 +// CHECK6-NEXT: %[[TMP2:.*]] = alloca i32, align 4 +// CHECK6-NEXT: %[[TID0:.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR:[0-9]+]]) +// CHECK6-NEXT: call void @__kmpc_taskgroup(ptr @1, i32 %[[TID0:.*]]) +// CHECK6-NEXT: %[[TID1:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[TID0:.*]], i32 1, i64 80, i64 1, ptr @.omp_task_entry..[[ENTRY1:[0-9]+]]) +// CHECK6-NEXT: %[[TID2:.*]] = getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %[[TID1:.*]], i32 0, i32 0 +// CHECK6-NEXT: %[[TID3:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID2:.*]], i32 0, i32 5 +// CHECK6-NEXT: store i64 0, ptr %[[TID3:.*]], align 8 +// CHECK6-NEXT: %[[TID4:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID2:.*]], i32 0, i32 6 +// CHECK6-NEXT: store i64 9, ptr %[[TID4:.*]], align 8 +// CHECK6-NEXT: %[[TID5:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID2:.*]], i32 0, i32 7 +// CHECK6-NEXT: store i64 1, ptr %[[TID5:.*]], align 8 +// CHECK6-NEXT: %[[TID6:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID2:.*]], i32 0, i32 9 +// CHECK6-NEXT: call void @llvm.memset.p0.i64(ptr align 8 %[[TID6:.*]], i8 0, i64 8, i1 false) +// CHECK6-NEXT: %[[TID7:.*]] = load i64, ptr %[[TID5:.*]], align 8 +// CHECK6-NEXT: call void @__kmpc_taskloop(ptr @1, i32 %[[TID0:.*]], ptr %[[TID1:.*]], i32 1, ptr %[[TID3:.*]], ptr %4, i64 %[[TID7:.*]], i32 1, i32 0, i64 0, ptr null) +// CHECK6-NEXT: call void @__kmpc_end_taskgroup(ptr @1, i32 %[[TID0:.*]]) +// CHECK6-NEXT: call void @__kmpc_taskgroup(ptr @1, i32 %[[TID0:.*]]) +// CHECK6-NEXT: %[[TID8:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[TID0:.*]], i32 129, i64 80, i64 1, ptr @.omp_task_entry..[[ENTRY1:[0-9]+]]) +// CHECK6-NEXT: %[[TID9:.*]] = getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %[[TID8:.*]], i32 0, i32 0 +// CHECK6-NEXT: %[[TID10:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID9:.*]], i32 0, i32 5 +// CHECK6-NEXT: store i64 0, ptr %[[TID10:.*]], align 8 +// CHECK6-NEXT: %[[TID11:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID9:.*]], i32 0, i32 6 +// CHECK6-NEXT: store i64 9, ptr %[[TID11:.*]], align 8 +// CHECK6-NEXT: %[[TID12:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID9:.*]], i32 0, i32 7 +// CHECK6-NEXT: store i64 1, ptr %[[TID12:.*]], align 8 +// CHECK6-NEXT: %[[TID13:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID9:.*]], i32 0, i32 9 +// CHECK6-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TID13:.*]], i8 0, i64 8, i1 false) +// CHECK6-NEXT: %[[TID14:.*]] = load i64, ptr [[TID12:.*]], align 8 +// CHECK6-NEXT: call void @__kmpc_taskloop(ptr @1, i32 %[[TID0:.*]], ptr %[[TID8:.*]], i32 1, ptr %[[TID10:.*]], ptr %[[TID11:.*]], i64 %[[TID14:.*]], i32 1, i32 0, i64 0, ptr null) +// CHECK6-NEXT: call void @__kmpc_end_taskgroup(ptr @1, i32 %[[TID0:.*]]) +// CHECK6-NEXT: ret void + #endif diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index fc27fd29da933..08776d9bcabfc 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2406,6 +2406,8 @@ void OMPClauseEnqueue::VisitOMPCompareClause(const OMPCompareClause *) {} void OMPClauseEnqueue::VisitOMPFailClause(const OMPFailClause *) {} +void OMPClauseEnqueue::VisitOMPThreadsetClause(const OMPThreadsetClause *) {} + void OMPClauseEnqueue::VisitOMPAbsentClause(const OMPAbsentClause *) {} void OMPClauseEnqueue::VisitOMPHoldsClause(const OMPHoldsClause *) {} diff --git a/flang/include/flang/Lower/OpenMP/Clauses.h b/flang/include/flang/Lower/OpenMP/Clauses.h index 74924661d9a03..688d01704370d 100644 --- a/flang/include/flang/Lower/OpenMP/Clauses.h +++ b/flang/include/flang/Lower/OpenMP/Clauses.h @@ -294,6 +294,7 @@ using Permutation = tomp::clause::PermutationT; using TaskReduction = tomp::clause::TaskReductionT; using ThreadLimit = tomp::clause::ThreadLimitT; using Threads = tomp::clause::ThreadsT; +using Threadset = tomp::clause::ThreadsetT; using Transparent = tomp::clause::TransparentT; using To = tomp::clause::ToT; using UnifiedAddress = tomp::clause::UnifiedAddressT; diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index bb970691c85c9..a7398a4ef970f 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -685,6 +685,8 @@ class ParseTreeDumper { NODE_ENUM(OmpTaskDependenceType, Value) NODE(parser, OmpTaskReductionClause) NODE(OmpTaskReductionClause, Modifier) + NODE(parser, OmpThreadsetClause) + NODE_ENUM(OmpThreadsetClause, ThreadsetPolicy) NODE(parser, OmpToClause) NODE(OmpToClause, Modifier) NODE(parser, OmpTraitProperty) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index c3a8c2eab15f2..375790af90b74 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -4825,6 +4825,14 @@ struct OmpTaskReductionClause { std::tuple t; }; +// Ref: [6.0:442] +// threadset-clause -> +// THREADSET(omp_pool|omp_team) +struct OmpThreadsetClause { + ENUM_CLASS(ThreadsetPolicy, Omp_Pool, Omp_Team) + WRAPPER_CLASS_BOILERPLATE(OmpThreadsetClause, ThreadsetPolicy); +}; + // Ref: [4.5:107-109], [5.0:176-180], [5.1:205-210], [5.2:167-168] // // to-clause (in DECLARE TARGET) -> diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index d39f9dda92a28..0f60b47991004 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -1482,6 +1482,21 @@ ThreadLimit make(const parser::OmpClause::ThreadLimit &inp, return ThreadLimit{/*Threadlim=*/makeExpr(inp.v, semaCtx)}; } +Threadset make(const parser::OmpClause::Threadset &inp, + semantics::SemanticsContext &semaCtx) { + // inp.v -> parser::OmpThreadsetClause + using wrapped = parser::OmpThreadsetClause; + + CLAUSET_ENUM_CONVERT( // + convert, wrapped::ThreadsetPolicy, Threadset::ThreadsetPolicy, + // clang-format off + MS(Omp_Pool, Omp_Pool) + MS(Omp_Team, Omp_Team) + // clang-format on + ); + return Threadset{/*ThreadsetPolicy=*/convert(inp.v.v)}; +} + // Threadprivate: empty // Threads: empty diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index e094458f001e3..aaaf1ec5d4626 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -3390,6 +3390,7 @@ CHECK_SIMPLE_CLAUSE(Read, OMPC_read) CHECK_SIMPLE_CLAUSE(Threadprivate, OMPC_threadprivate) CHECK_SIMPLE_CLAUSE(Groupprivate, OMPC_groupprivate) CHECK_SIMPLE_CLAUSE(Threads, OMPC_threads) +CHECK_SIMPLE_CLAUSE(Threadset, OMPC_threadset) CHECK_SIMPLE_CLAUSE(Inbranch, OMPC_inbranch) CHECK_SIMPLE_CLAUSE(Link, OMPC_link) CHECK_SIMPLE_CLAUSE(Indirect, OMPC_indirect) diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h index 87b95200b2459..d7f0e3a3d49da 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h @@ -1167,6 +1167,14 @@ struct ThreadsT { using EmptyTrait = std::true_type; }; +// V6.0: [14.8] `threadset` clause +template // +struct ThreadsetT { + ENUM(ThreadsetPolicy, Omp_Pool, Omp_Team); + using WrapperTrait = std::true_type; + ThreadsetPolicy v; +}; + // V5.2: [5.9.1] `to` clause template // struct ToT { @@ -1352,9 +1360,9 @@ using WrapperClausesT = std::variant< ProcBindT, ReverseOffloadT, SafelenT, SelfMapsT, SeverityT, SharedT, SimdlenT, SizesT, PermutationT, ThreadLimitT, - UnifiedAddressT, UnifiedSharedMemoryT, UniformT, - UpdateT, UseDeviceAddrT, UseDevicePtrT, - UsesAllocatorsT>; + ThreadsetT, UnifiedAddressT, + UnifiedSharedMemoryT, UniformT, UpdateT, + UseDeviceAddrT, UseDevicePtrT, UsesAllocatorsT>; template using UnionOfAllClausesT = typename type::Union< // diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 61a1a05f6e904..208609f64f418 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -539,6 +539,10 @@ def OMPC_GroupPrivate : Clause<[Spelling<"groupprivate">]> { def OMPC_Threads : Clause<[Spelling<"threads">]> { let clangClass = "OMPThreadsClause"; } +def OMPC_Threadset : Clause<[Spelling<"threadset">]> { + let clangClass = "OMPThreadsetClause"; + let flangClass = "OmpThreadsetClause"; +} def OMPC_To : Clause<[Spelling<"to">]> { let clangClass = "OMPToClause"; let flangClass = "OmpToClause"; @@ -1254,6 +1258,7 @@ def OMP_Task : Directive<[Spelling<"task">]> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, ]; @@ -1297,6 +1302,7 @@ def OMP_TaskLoop : Directive<[Spelling<"taskloop">]> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, ]; From f205be095609aa61dfac3ae729406e0af2dcd15f Mon Sep 17 00:00:00 2001 From: David Spickett Date: Thu, 30 Oct 2025 10:26:27 +0000 Subject: [PATCH 17/21] Revert "[lldb-dap] Improving consistency of tests by removing concurrency." (#165688) Reverts llvm/llvm-project#165496 Due to flaky failures on Arm 32-bit since this change. Detailed in https://github.com/llvm/llvm-project/pull/165496#issuecomment-3467209089. --- .../test/tools/lldb-dap/dap_server.py | 206 +++++++++++------- .../test/tools/lldb-dap/lldbdap_testcase.py | 2 +- .../TestDAP_breakpointEvents.py | 30 +-- .../tools/lldb-dap/launch/TestDAP_launch.py | 2 +- .../module-event/TestDAP_module_event.py | 88 ++++---- .../tools/lldb-dap/module/TestDAP_module.py | 8 +- .../restart/TestDAP_restart_console.py | 24 +- .../lldb-dap/send-event/TestDAP_sendEvent.py | 2 +- 8 files changed, 203 insertions(+), 159 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py index 8f3652172dfdf..d892c01f0bc71 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py @@ -10,8 +10,8 @@ import subprocess import signal import sys +import threading import warnings -import selectors import time from typing import ( Any, @@ -139,6 +139,35 @@ def dump_memory(base_addr, data, num_per_line, outfile): outfile.write("\n") +def read_packet( + f: IO[bytes], trace_file: Optional[IO[str]] = None +) -> Optional[ProtocolMessage]: + """Decode a JSON packet that starts with the content length and is + followed by the JSON bytes from a file 'f'. Returns None on EOF. + """ + line = f.readline().decode("utf-8") + if len(line) == 0: + return None # EOF. + + # Watch for line that starts with the prefix + prefix = "Content-Length: " + if line.startswith(prefix): + # Decode length of JSON bytes + length = int(line[len(prefix) :]) + # Skip empty line + separator = f.readline().decode() + if separator != "": + Exception("malformed DAP content header, unexpected line: " + separator) + # Read JSON bytes + json_str = f.read(length).decode() + if trace_file: + trace_file.write("from adapter:\n%s\n" % (json_str)) + # Decode the JSON bytes into a python dictionary + return json.loads(json_str) + + raise Exception("unexpected malformed message from lldb-dap: " + line) + + def packet_type_is(packet, packet_type): return "type" in packet and packet["type"] == packet_type @@ -170,8 +199,16 @@ def __init__( self.log_file = log_file self.send = send self.recv = recv - self.selector = selectors.DefaultSelector() - self.selector.register(recv, selectors.EVENT_READ) + + # Packets that have been received and processed but have not yet been + # requested by a test case. + self._pending_packets: List[Optional[ProtocolMessage]] = [] + # Received packets that have not yet been processed. + self._recv_packets: List[Optional[ProtocolMessage]] = [] + # Used as a mutex for _recv_packets and for notify when _recv_packets + # changes. + self._recv_condition = threading.Condition() + self._recv_thread = threading.Thread(target=self._read_packet_thread) # session state self.init_commands = init_commands @@ -197,6 +234,9 @@ def __init__( # keyed by breakpoint id self.resolved_breakpoints: dict[str, Breakpoint] = {} + # trigger enqueue thread + self._recv_thread.start() + @classmethod def encode_content(cls, s: str) -> bytes: return ("Content-Length: %u\r\n\r\n%s" % (len(s), s)).encode("utf-8") @@ -212,46 +252,17 @@ def validate_response(cls, command, response): f"seq mismatch in response {command['seq']} != {response['request_seq']}" ) - def _read_packet( - self, - timeout: float = DEFAULT_TIMEOUT, - ) -> Optional[ProtocolMessage]: - """Decode a JSON packet that starts with the content length and is - followed by the JSON bytes from self.recv. Returns None on EOF. - """ - - ready = self.selector.select(timeout) - if not ready: - warnings.warn( - "timeout occurred waiting for a packet, check if the test has a" - " negative assertion and see if it can be inverted.", - stacklevel=4, - ) - return None # timeout - - line = self.recv.readline().decode("utf-8") - if len(line) == 0: - return None # EOF. - - # Watch for line that starts with the prefix - prefix = "Content-Length: " - if line.startswith(prefix): - # Decode length of JSON bytes - length = int(line[len(prefix) :]) - # Skip empty line - separator = self.recv.readline().decode() - if separator != "": - Exception("malformed DAP content header, unexpected line: " + separator) - # Read JSON bytes - json_str = self.recv.read(length).decode() - if self.trace_file: - self.trace_file.write( - "%s from adapter:\n%s\n" % (time.time(), json_str) - ) - # Decode the JSON bytes into a python dictionary - return json.loads(json_str) - - raise Exception("unexpected malformed message from lldb-dap: " + line) + def _read_packet_thread(self): + try: + while True: + packet = read_packet(self.recv, trace_file=self.trace_file) + # `packet` will be `None` on EOF. We want to pass it down to + # handle_recv_packet anyway so the main thread can handle unexpected + # termination of lldb-dap and stop waiting for new packets. + if not self._handle_recv_packet(packet): + break + finally: + dump_dap_log(self.log_file) def get_modules( self, start_module: Optional[int] = None, module_count: Optional[int] = None @@ -299,6 +310,34 @@ def collect_output( output += self.get_output(category, clear=clear) return output + def _enqueue_recv_packet(self, packet: Optional[ProtocolMessage]): + with self.recv_condition: + self.recv_packets.append(packet) + self.recv_condition.notify() + + def _handle_recv_packet(self, packet: Optional[ProtocolMessage]) -> bool: + """Handles an incoming packet. + + Called by the read thread that is waiting for all incoming packets + to store the incoming packet in "self._recv_packets" in a thread safe + way. This function will then signal the "self._recv_condition" to + indicate a new packet is available. + + Args: + packet: A new packet to store. + + Returns: + True if the caller should keep calling this function for more + packets. + """ + with self._recv_condition: + self._recv_packets.append(packet) + self._recv_condition.notify() + # packet is None on EOF + return packet is not None and not ( + packet["type"] == "response" and packet["command"] == "disconnect" + ) + def _recv_packet( self, *, @@ -322,34 +361,46 @@ def _recv_packet( The first matching packet for the given predicate, if specified, otherwise None. """ - deadline = time.time() + timeout - - while time.time() < deadline: - packet = self._read_packet(timeout=deadline - time.time()) - if packet is None: - return None - self._process_recv_packet(packet) - if not predicate or predicate(packet): - return packet - - def _process_recv_packet(self, packet) -> None: + assert ( + threading.current_thread != self._recv_thread + ), "Must not be called from the _recv_thread" + + def process_until_match(): + self._process_recv_packets() + for i, packet in enumerate(self._pending_packets): + if packet is None: + # We need to return a truthy value to break out of the + # wait_for, use `EOFError` as an indicator of EOF. + return EOFError() + if predicate and predicate(packet): + self._pending_packets.pop(i) + return packet + + with self._recv_condition: + packet = self._recv_condition.wait_for(process_until_match, timeout) + return None if isinstance(packet, EOFError) else packet + + def _process_recv_packets(self) -> None: """Process received packets, updating the session state.""" - if packet and ("seq" not in packet or packet["seq"] == 0): - warnings.warn( - f"received a malformed packet, expected 'seq != 0' for {packet!r}" - ) - # Handle events that may modify any stateful properties of - # the DAP session. - if packet and packet["type"] == "event": - self._handle_event(packet) - elif packet and packet["type"] == "request": - # Handle reverse requests and keep processing. - self._handle_reverse_request(packet) + with self._recv_condition: + for packet in self._recv_packets: + if packet and ("seq" not in packet or packet["seq"] == 0): + warnings.warn( + f"received a malformed packet, expected 'seq != 0' for {packet!r}" + ) + # Handle events that may modify any stateful properties of + # the DAP session. + if packet and packet["type"] == "event": + self._handle_event(packet) + elif packet and packet["type"] == "request": + # Handle reverse requests and keep processing. + self._handle_reverse_request(packet) + # Move the packet to the pending queue. + self._pending_packets.append(packet) + self._recv_packets.clear() def _handle_event(self, packet: Event) -> None: """Handle any events that modify debug session state we track.""" - self.events.append(packet) - event = packet["event"] body: Optional[Dict] = packet.get("body", None) @@ -402,8 +453,6 @@ def _handle_event(self, packet: Event) -> None: self.invalidated_event = packet elif event == "memory": self.memory_event = packet - elif event == "module": - self.module_events.append(packet) def _handle_reverse_request(self, request: Request) -> None: if request in self.reverse_requests: @@ -472,14 +521,18 @@ def send_packet(self, packet: ProtocolMessage) -> int: Returns the seq number of the request. """ - packet["seq"] = self.sequence - self.sequence += 1 + # Set the seq for requests. + if packet["type"] == "request": + packet["seq"] = self.sequence + self.sequence += 1 + else: + packet["seq"] = 0 # Encode our command dictionary as a JSON string json_str = json.dumps(packet, separators=(",", ":")) if self.trace_file: - self.trace_file.write("%s to adapter:\n%s\n" % (time.time(), json_str)) + self.trace_file.write("to adapter:\n%s\n" % (json_str)) length = len(json_str) if length > 0: @@ -860,8 +913,6 @@ def request_restart(self, restartArguments=None): if restartArguments: command_dict["arguments"] = restartArguments - # Clear state, the process is about to restart... - self._process_continued(True) response = self._send_recv(command_dict) # Caller must still call wait_for_stopped. return response @@ -1428,10 +1479,8 @@ def request_testGetTargetBreakpoints(self): def terminate(self): self.send.close() - self.recv.close() - self.selector.close() - if self.log_file: - dump_dap_log(self.log_file) + if self._recv_thread.is_alive(): + self._recv_thread.join() def request_setInstructionBreakpoints(self, memory_reference=[]): breakpoints = [] @@ -1528,7 +1577,6 @@ def launch( stdout=subprocess.PIPE, stderr=sys.stderr, env=adapter_env, - bufsize=0, ) if connection is None: diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py index fd07324d2ddda..29935bb8046ff 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py @@ -416,7 +416,7 @@ def continue_to_next_stop(self): return self.dap_server.wait_for_stopped() def continue_to_breakpoint(self, breakpoint_id: str): - self.continue_to_breakpoints([breakpoint_id]) + self.continue_to_breakpoints((breakpoint_id)) def continue_to_breakpoints(self, breakpoint_ids): self.do_continue() diff --git a/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py b/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py index 7b78541fb4f8e..beab4d6c1f5a6 100644 --- a/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py +++ b/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py @@ -81,20 +81,24 @@ def test_breakpoint_events(self): breakpoint["verified"], "expect foo breakpoint to not be verified" ) + # Flush the breakpoint events. + self.dap_server.wait_for_breakpoint_events() + # Continue to the breakpoint - self.continue_to_breakpoint(foo_bp_id) - self.continue_to_next_stop() # foo_bp2 - self.continue_to_breakpoint(main_bp_id) - self.continue_to_exit() + self.continue_to_breakpoints(dap_breakpoint_ids) - bp_events = [e for e in self.dap_server.events if e["event"] == "breakpoint"] + verified_breakpoint_ids = [] + unverified_breakpoint_ids = [] + for breakpoint_event in self.dap_server.wait_for_breakpoint_events(): + breakpoint = breakpoint_event["body"]["breakpoint"] + id = breakpoint["id"] + if breakpoint["verified"]: + verified_breakpoint_ids.append(id) + else: + unverified_breakpoint_ids.append(id) - main_bp_events = [ - e for e in bp_events if e["body"]["breakpoint"]["id"] == main_bp_id - ] - foo_bp_events = [ - e for e in bp_events if e["body"]["breakpoint"]["id"] == foo_bp_id - ] + self.assertIn(main_bp_id, unverified_breakpoint_ids) + self.assertIn(foo_bp_id, unverified_breakpoint_ids) - self.assertTrue(main_bp_events) - self.assertTrue(foo_bp_events) + self.assertIn(main_bp_id, verified_breakpoint_ids) + self.assertIn(foo_bp_id, verified_breakpoint_ids) diff --git a/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py b/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py index dc6bf38303204..8db2316e73fc8 100644 --- a/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py +++ b/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py @@ -156,7 +156,6 @@ def test_debuggerRoot(self): self.build_and_launch( program, debuggerRoot=program_parent_dir, initCommands=commands ) - self.continue_to_exit() output = self.get_console() self.assertTrue(output and len(output) > 0, "expect console output") lines = output.splitlines() @@ -172,6 +171,7 @@ def test_debuggerRoot(self): % (program_parent_dir, line[len(prefix) :]), ) self.assertTrue(found, "verified lldb-dap working directory") + self.continue_to_exit() def test_sourcePath(self): """ diff --git a/lldb/test/API/tools/lldb-dap/module-event/TestDAP_module_event.py b/lldb/test/API/tools/lldb-dap/module-event/TestDAP_module_event.py index 9d1d17b704f76..1f4afabbd161e 100644 --- a/lldb/test/API/tools/lldb-dap/module-event/TestDAP_module_event.py +++ b/lldb/test/API/tools/lldb-dap/module-event/TestDAP_module_event.py @@ -1,58 +1,58 @@ -""" -Test 'module' events for dynamically loaded libraries. -""" - +import dap_server from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil import lldbdap_testcase +import re class TestDAP_module_event(lldbdap_testcase.DAPTestCaseBase): - def lookup_module_id(self, name): - """Returns the identifier for the first module event starting with the given name.""" - for event in self.dap_server.module_events: - if self.get_dict_value(event, ["body", "module", "name"]).startswith(name): - return self.get_dict_value(event, ["body", "module", "id"]) - self.fail(f"No module events matching name={name}") - - def module_events(self, id): - """Finds all module events by identifier.""" - return [ - event - for event in self.dap_server.module_events - if self.get_dict_value(event, ["body", "module", "id"]) == id - ] - - def module_reasons(self, events): - """Returns the list of 'reason' values from the given events.""" - return [event["body"]["reason"] for event in events] - @skipIfWindows def test_module_event(self): - """ - Test that module events are fired on target load and when the list of - dynamic libraries updates while running. - """ program = self.getBuildArtifact("a.out") self.build_and_launch(program) - # We can analyze the order of events after the process exits. - self.continue_to_exit() - a_out_id = self.lookup_module_id("a.out") - a_out_events = self.module_events(id=a_out_id) + source = "main.cpp" + breakpoint1_line = line_number(source, "// breakpoint 1") + breakpoint2_line = line_number(source, "// breakpoint 2") + breakpoint3_line = line_number(source, "// breakpoint 3") - self.assertIn( - "new", - self.module_reasons(a_out_events), - "Expected a.out to load during the debug session.", + breakpoint_ids = self.set_source_breakpoints( + source, [breakpoint1_line, breakpoint2_line, breakpoint3_line] ) + self.continue_to_breakpoints(breakpoint_ids) + + # We're now stopped at breakpoint 1 before the dlopen. Flush all the module events. + event = self.dap_server.wait_for_event(["module"]) + while event is not None: + event = self.dap_server.wait_for_event(["module"]) + + # Continue to the second breakpoint, before the dlclose. + self.continue_to_breakpoints(breakpoint_ids) + + # Make sure we got a module event for libother. + event = self.dap_server.wait_for_event(["module"]) + self.assertIsNotNone(event, "didn't get a module event") + module_name = event["body"]["module"]["name"] + module_id = event["body"]["module"]["id"] + self.assertEqual(event["body"]["reason"], "new") + self.assertIn("libother", module_name) + + # Continue to the third breakpoint, after the dlclose. + self.continue_to_breakpoints(breakpoint_ids) + + # Make sure we got a module event for libother. + event = self.dap_server.wait_for_event(["module"]) + self.assertIsNotNone(event, "didn't get a module event") + reason = event["body"]["reason"] + self.assertEqual(reason, "removed") + self.assertEqual(event["body"]["module"]["id"], module_id) + + # The removed module event should omit everything but the module id and name + # as they are required fields. + module_data = event["body"]["module"] + required_keys = ["id", "name"] + self.assertListEqual(list(module_data.keys()), required_keys) + self.assertEqual(module_data["name"], "", "expects empty name.") - libother_id = self.lookup_module_id( - "libother." # libother.so or libother.dylib based on OS. - ) - libother_events = self.module_events(id=libother_id) - self.assertEqual( - self.module_reasons(libother_events), - ["new", "removed"], - "Expected libother to be loaded then unloaded during the debug session.", - ) + self.continue_to_exit() diff --git a/lldb/test/API/tools/lldb-dap/module/TestDAP_module.py b/lldb/test/API/tools/lldb-dap/module/TestDAP_module.py index 2d00c512721c6..0ed53dac5d869 100644 --- a/lldb/test/API/tools/lldb-dap/module/TestDAP_module.py +++ b/lldb/test/API/tools/lldb-dap/module/TestDAP_module.py @@ -64,18 +64,19 @@ def check_symbols_loaded_with_size(): self.assertEqual(program, program_module["path"]) self.assertIn("addressRange", program_module) - self.continue_to_exit() - # Collect all the module names we saw as events. module_new_names = [] module_changed_names = [] - for module_event in self.dap_server.module_events: + module_event = self.dap_server.wait_for_event(["module"]) + while module_event is not None: reason = module_event["body"]["reason"] if reason == "new": module_new_names.append(module_event["body"]["module"]["name"]) elif reason == "changed": module_changed_names.append(module_event["body"]["module"]["name"]) + module_event = self.dap_server.wait_for_event(["module"]) + # Make sure we got an event for every active module. self.assertNotEqual(len(module_new_names), 0) for module in active_modules: @@ -85,6 +86,7 @@ def check_symbols_loaded_with_size(): # symbols got added. self.assertNotEqual(len(module_changed_names), 0) self.assertIn(program_module["name"], module_changed_names) + self.continue_to_exit() @skipIfWindows def test_modules(self): diff --git a/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_console.py b/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_console.py index fa62ec243f5c5..e1ad1425a993d 100644 --- a/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_console.py +++ b/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_console.py @@ -30,11 +30,7 @@ def verify_stopped_on_entry(self, stopped_events: List[Dict[str, Any]]): if reason == "entry": seen_stopped_event += 1 - self.assertEqual( - seen_stopped_event, - 1, - f"expect only one stopped entry event in {stopped_events}", - ) + self.assertEqual(seen_stopped_event, 1, "expect only one stopped entry event.") @skipIfAsan @skipIfWindows @@ -96,13 +92,11 @@ def test_stopOnEntry(self): self.build_and_launch(program, console="integratedTerminal", stopOnEntry=True) [bp_main] = self.set_function_breakpoints(["main"]) - self.dap_server.request_configurationDone() - stopped_threads = list(self.dap_server.thread_stop_reasons.values()) + self.dap_server.request_continue() # sends configuration done + stopped_events = self.dap_server.wait_for_stopped() # We should be stopped at the entry point. - self.assertEqual( - len(stopped_threads), 1, "Expected the main thread to be stopped on entry." - ) - self.assertEqual(stopped_threads[0]["reason"], "entry") + self.assertGreaterEqual(len(stopped_events), 0, "expect stopped events") + self.verify_stopped_on_entry(stopped_events) # Then, if we continue, we should hit the breakpoint at main. self.dap_server.request_continue() @@ -111,12 +105,8 @@ def test_stopOnEntry(self): # Restart and check that we still get a stopped event before reaching # main. self.dap_server.request_restart() - stopped_threads = list(self.dap_server.thread_stop_reasons.values()) - # We should be stopped at the entry point. - self.assertEqual( - len(stopped_threads), 1, "Expected the main thread to be stopped on entry." - ) - self.assertEqual(stopped_threads[0]["reason"], "entry") + stopped_events = self.dap_server.wait_for_stopped() + self.verify_stopped_on_entry(stopped_events) # continue to main self.dap_server.request_continue() diff --git a/lldb/test/API/tools/lldb-dap/send-event/TestDAP_sendEvent.py b/lldb/test/API/tools/lldb-dap/send-event/TestDAP_sendEvent.py index 0184020589176..a01845669666f 100644 --- a/lldb/test/API/tools/lldb-dap/send-event/TestDAP_sendEvent.py +++ b/lldb/test/API/tools/lldb-dap/send-event/TestDAP_sendEvent.py @@ -32,7 +32,7 @@ def test_send_event(self): ], ) self.set_source_breakpoints(source, [breakpoint_line]) - self.do_continue() + self.continue_to_next_stop() custom_event = self.dap_server.wait_for_event( filter=["my-custom-event-no-body"] From 838f643ebb4083b34ac4671541188754ac3b0c50 Mon Sep 17 00:00:00 2001 From: Ebuka Ezike Date: Thu, 30 Oct 2025 10:41:17 +0000 Subject: [PATCH 18/21] [lldb-dap][test] skip io_redirection in debug builds (#165593) Currently all `runInTerminal` test are skipped in debug builds because, when attaching it times out parsing the debug symbols of lldb-dap. Add this test since it is running in teminal. --- lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py b/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py index 8db2316e73fc8..ca881f1d817c5 100644 --- a/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py +++ b/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py @@ -642,6 +642,7 @@ def test_stdio_redirection(self): @skipIfAsan @skipIfWindows @skipIf(oslist=["linux"], archs=no_match(["x86_64"])) + @skipIfBuildType(["debug"]) def test_stdio_redirection_and_console(self): """ Test stdio redirection and console. From d929146b3fcd7bafe364a053355bfe35b5e1fdbf Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Thu, 30 Oct 2025 10:46:37 +0000 Subject: [PATCH 19/21] [Clang][AArch64] Lower NEON vaddv/vminv/vmaxv builtins to llvm.vector.reduce intrinsics. (#165400) This is the first step in removing some NEON reduction intrinsics that duplicate the behaviour of their llvm.vector.reduce counterpart. NOTE: The i8/i16 variants differ in that the NEON versions return an i32 result. However, this looks more about making their code generation convenient with SelectionDAG disgarding the extra bits. This is only relevant for the next phase because the Clang usage always truncate their result, making llvm.vector.reduce a drop in replacement. --- clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 249 +++---------------- clang/test/CodeGen/AArch64/neon-across.c | 132 ++++------ clang/test/CodeGen/AArch64/neon-intrinsics.c | 20 +- 3 files changed, 104 insertions(+), 297 deletions(-) diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index 60f9b86333670..15fa78ddba715 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -1193,14 +1193,22 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = { NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), - NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), - NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), + NEONMAP1(vaddv_s16, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddv_s32, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddv_s8, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddv_u16, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddv_u32, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddv_u8, vector_reduce_add, Add1ArgType), NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType), - NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), - NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType), - NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), - NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), + NEONMAP1(vaddvq_s16, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddvq_s32, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddvq_s64, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddvq_s8, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddvq_u16, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddvq_u32, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddvq_u64, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddvq_u8, vector_reduce_add, Add1ArgType), NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType), NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType), NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), @@ -1243,27 +1251,43 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = { NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), - NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), - NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), + NEONMAP1(vmaxv_s16, vector_reduce_smax, Add1ArgType), + NEONMAP1(vmaxv_s32, vector_reduce_smax, Add1ArgType), + NEONMAP1(vmaxv_s8, vector_reduce_smax, Add1ArgType), + NEONMAP1(vmaxv_u16, vector_reduce_umax, Add1ArgType), + NEONMAP1(vmaxv_u32, vector_reduce_umax, Add1ArgType), + NEONMAP1(vmaxv_u8, vector_reduce_umax, Add1ArgType), NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), - NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), - NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), + NEONMAP1(vmaxvq_s16, vector_reduce_smax, Add1ArgType), + NEONMAP1(vmaxvq_s32, vector_reduce_smax, Add1ArgType), + NEONMAP1(vmaxvq_s8, vector_reduce_smax, Add1ArgType), + NEONMAP1(vmaxvq_u16, vector_reduce_umax, Add1ArgType), + NEONMAP1(vmaxvq_u32, vector_reduce_umax, Add1ArgType), + NEONMAP1(vmaxvq_u8, vector_reduce_umax, Add1ArgType), NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), - NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), - NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), + NEONMAP1(vminv_s16, vector_reduce_smin, Add1ArgType), + NEONMAP1(vminv_s32, vector_reduce_smin, Add1ArgType), + NEONMAP1(vminv_s8, vector_reduce_smin, Add1ArgType), + NEONMAP1(vminv_u16, vector_reduce_umin, Add1ArgType), + NEONMAP1(vminv_u32, vector_reduce_umin, Add1ArgType), + NEONMAP1(vminv_u8, vector_reduce_umin, Add1ArgType), NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), - NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), - NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), + NEONMAP1(vminvq_s16, vector_reduce_smin, Add1ArgType), + NEONMAP1(vminvq_s32, vector_reduce_smin, Add1ArgType), + NEONMAP1(vminvq_s8, vector_reduce_smin, Add1ArgType), + NEONMAP1(vminvq_u16, vector_reduce_umin, Add1ArgType), + NEONMAP1(vminvq_u32, vector_reduce_umin, Add1ArgType), + NEONMAP1(vminvq_u8, vector_reduce_umin, Add1ArgType), NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0), NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType), NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType), - NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType), - NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), + NEONMAP1(vpaddd_s64, vector_reduce_add, Add1ArgType), + NEONMAP1(vpaddd_u64, vector_reduce_add, Add1ArgType), NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), @@ -7067,127 +7091,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Int = Intrinsic::bitreverse; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit"); } - case NEON::BI__builtin_neon_vaddv_u8: - // FIXME: These are handled by the AArch64 scalar code. - usgn = true; - [[fallthrough]]; - case NEON::BI__builtin_neon_vaddv_s8: { - Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; - Ty = Int32Ty; - VTy = llvm::FixedVectorType::get(Int8Ty, 8); - llvm::Type *Tys[2] = { Ty, VTy }; - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); - return Builder.CreateTrunc(Ops[0], Int8Ty); - } - case NEON::BI__builtin_neon_vaddv_u16: - usgn = true; - [[fallthrough]]; - case NEON::BI__builtin_neon_vaddv_s16: { - Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; - Ty = Int32Ty; - VTy = llvm::FixedVectorType::get(Int16Ty, 4); - llvm::Type *Tys[2] = { Ty, VTy }; - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); - return Builder.CreateTrunc(Ops[0], Int16Ty); - } - case NEON::BI__builtin_neon_vaddvq_u8: - usgn = true; - [[fallthrough]]; - case NEON::BI__builtin_neon_vaddvq_s8: { - Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; - Ty = Int32Ty; - VTy = llvm::FixedVectorType::get(Int8Ty, 16); - llvm::Type *Tys[2] = { Ty, VTy }; - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); - return Builder.CreateTrunc(Ops[0], Int8Ty); - } - case NEON::BI__builtin_neon_vaddvq_u16: - usgn = true; - [[fallthrough]]; - case NEON::BI__builtin_neon_vaddvq_s16: { - Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; - Ty = Int32Ty; - VTy = llvm::FixedVectorType::get(Int16Ty, 8); - llvm::Type *Tys[2] = { Ty, VTy }; - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); - return Builder.CreateTrunc(Ops[0], Int16Ty); - } - case NEON::BI__builtin_neon_vmaxv_u8: { - Int = Intrinsic::aarch64_neon_umaxv; - Ty = Int32Ty; - VTy = llvm::FixedVectorType::get(Int8Ty, 8); - llvm::Type *Tys[2] = { Ty, VTy }; - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); - return Builder.CreateTrunc(Ops[0], Int8Ty); - } - case NEON::BI__builtin_neon_vmaxv_u16: { - Int = Intrinsic::aarch64_neon_umaxv; - Ty = Int32Ty; - VTy = llvm::FixedVectorType::get(Int16Ty, 4); - llvm::Type *Tys[2] = { Ty, VTy }; - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); - return Builder.CreateTrunc(Ops[0], Int16Ty); - } - case NEON::BI__builtin_neon_vmaxvq_u8: { - Int = Intrinsic::aarch64_neon_umaxv; - Ty = Int32Ty; - VTy = llvm::FixedVectorType::get(Int8Ty, 16); - llvm::Type *Tys[2] = { Ty, VTy }; - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); - return Builder.CreateTrunc(Ops[0], Int8Ty); - } - case NEON::BI__builtin_neon_vmaxvq_u16: { - Int = Intrinsic::aarch64_neon_umaxv; - Ty = Int32Ty; - VTy = llvm::FixedVectorType::get(Int16Ty, 8); - llvm::Type *Tys[2] = { Ty, VTy }; - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); - return Builder.CreateTrunc(Ops[0], Int16Ty); - } - case NEON::BI__builtin_neon_vmaxv_s8: { - Int = Intrinsic::aarch64_neon_smaxv; - Ty = Int32Ty; - VTy = llvm::FixedVectorType::get(Int8Ty, 8); - llvm::Type *Tys[2] = { Ty, VTy }; - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); - return Builder.CreateTrunc(Ops[0], Int8Ty); - } - case NEON::BI__builtin_neon_vmaxv_s16: { - Int = Intrinsic::aarch64_neon_smaxv; - Ty = Int32Ty; - VTy = llvm::FixedVectorType::get(Int16Ty, 4); - llvm::Type *Tys[2] = { Ty, VTy }; - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); - return Builder.CreateTrunc(Ops[0], Int16Ty); - } - case NEON::BI__builtin_neon_vmaxvq_s8: { - Int = Intrinsic::aarch64_neon_smaxv; - Ty = Int32Ty; - VTy = llvm::FixedVectorType::get(Int8Ty, 16); - llvm::Type *Tys[2] = { Ty, VTy }; - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); - return Builder.CreateTrunc(Ops[0], Int8Ty); - } - case NEON::BI__builtin_neon_vmaxvq_s16: { - Int = Intrinsic::aarch64_neon_smaxv; - Ty = Int32Ty; - VTy = llvm::FixedVectorType::get(Int16Ty, 8); - llvm::Type *Tys[2] = { Ty, VTy }; - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); - return Builder.CreateTrunc(Ops[0], Int16Ty); - } case NEON::BI__builtin_neon_vmaxv_f16: { Int = Intrinsic::aarch64_neon_fmaxv; Ty = HalfTy; @@ -7206,78 +7109,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); return Builder.CreateTrunc(Ops[0], HalfTy); } - case NEON::BI__builtin_neon_vminv_u8: { - Int = Intrinsic::aarch64_neon_uminv; - Ty = Int32Ty; - VTy = llvm::FixedVectorType::get(Int8Ty, 8); - llvm::Type *Tys[2] = { Ty, VTy }; - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); - return Builder.CreateTrunc(Ops[0], Int8Ty); - } - case NEON::BI__builtin_neon_vminv_u16: { - Int = Intrinsic::aarch64_neon_uminv; - Ty = Int32Ty; - VTy = llvm::FixedVectorType::get(Int16Ty, 4); - llvm::Type *Tys[2] = { Ty, VTy }; - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); - return Builder.CreateTrunc(Ops[0], Int16Ty); - } - case NEON::BI__builtin_neon_vminvq_u8: { - Int = Intrinsic::aarch64_neon_uminv; - Ty = Int32Ty; - VTy = llvm::FixedVectorType::get(Int8Ty, 16); - llvm::Type *Tys[2] = { Ty, VTy }; - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); - return Builder.CreateTrunc(Ops[0], Int8Ty); - } - case NEON::BI__builtin_neon_vminvq_u16: { - Int = Intrinsic::aarch64_neon_uminv; - Ty = Int32Ty; - VTy = llvm::FixedVectorType::get(Int16Ty, 8); - llvm::Type *Tys[2] = { Ty, VTy }; - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); - return Builder.CreateTrunc(Ops[0], Int16Ty); - } - case NEON::BI__builtin_neon_vminv_s8: { - Int = Intrinsic::aarch64_neon_sminv; - Ty = Int32Ty; - VTy = llvm::FixedVectorType::get(Int8Ty, 8); - llvm::Type *Tys[2] = { Ty, VTy }; - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); - return Builder.CreateTrunc(Ops[0], Int8Ty); - } - case NEON::BI__builtin_neon_vminv_s16: { - Int = Intrinsic::aarch64_neon_sminv; - Ty = Int32Ty; - VTy = llvm::FixedVectorType::get(Int16Ty, 4); - llvm::Type *Tys[2] = { Ty, VTy }; - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); - return Builder.CreateTrunc(Ops[0], Int16Ty); - } - case NEON::BI__builtin_neon_vminvq_s8: { - Int = Intrinsic::aarch64_neon_sminv; - Ty = Int32Ty; - VTy = llvm::FixedVectorType::get(Int8Ty, 16); - llvm::Type *Tys[2] = { Ty, VTy }; - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); - return Builder.CreateTrunc(Ops[0], Int8Ty); - } - case NEON::BI__builtin_neon_vminvq_s16: { - Int = Intrinsic::aarch64_neon_sminv; - Ty = Int32Ty; - VTy = llvm::FixedVectorType::get(Int16Ty, 8); - llvm::Type *Tys[2] = { Ty, VTy }; - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); - return Builder.CreateTrunc(Ops[0], Int16Ty); - } case NEON::BI__builtin_neon_vminv_f16: { Int = Intrinsic::aarch64_neon_fminv; Ty = HalfTy; diff --git a/clang/test/CodeGen/AArch64/neon-across.c b/clang/test/CodeGen/AArch64/neon-across.c index aa0387d89dfef..aae5097da7789 100644 --- a/clang/test/CodeGen/AArch64/neon-across.c +++ b/clang/test/CodeGen/AArch64/neon-across.c @@ -113,9 +113,8 @@ uint64_t test_vaddlvq_u32(uint32x4_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vmaxv_s8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> [[A]]) -// CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 -// CHECK-NEXT: ret i8 [[TMP0]] +// CHECK-NEXT: [[VMAXV_S8_I:%.*]] = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> [[A]]) +// CHECK-NEXT: ret i8 [[VMAXV_S8_I]] // int8_t test_vmaxv_s8(int8x8_t a) { return vmaxv_s8(a); @@ -124,9 +123,8 @@ int8_t test_vmaxv_s8(int8x8_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vmaxv_s16 // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> [[A]]) -// CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i16 -// CHECK-NEXT: ret i16 [[TMP0]] +// CHECK-NEXT: [[VMAXV_S16_I:%.*]] = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> [[A]]) +// CHECK-NEXT: ret i16 [[VMAXV_S16_I]] // int16_t test_vmaxv_s16(int16x4_t a) { return vmaxv_s16(a); @@ -135,9 +133,8 @@ int16_t test_vmaxv_s16(int16x4_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vmaxv_u8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> [[A]]) -// CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 -// CHECK-NEXT: ret i8 [[TMP0]] +// CHECK-NEXT: [[VMAXV_U8_I:%.*]] = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> [[A]]) +// CHECK-NEXT: ret i8 [[VMAXV_U8_I]] // uint8_t test_vmaxv_u8(uint8x8_t a) { return vmaxv_u8(a); @@ -146,9 +143,8 @@ uint8_t test_vmaxv_u8(uint8x8_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vmaxv_u16 // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> [[A]]) -// CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i16 -// CHECK-NEXT: ret i16 [[TMP0]] +// CHECK-NEXT: [[VMAXV_U16_I:%.*]] = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> [[A]]) +// CHECK-NEXT: ret i16 [[VMAXV_U16_I]] // uint16_t test_vmaxv_u16(uint16x4_t a) { return vmaxv_u16(a); @@ -157,9 +153,8 @@ uint16_t test_vmaxv_u16(uint16x4_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_s8 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> [[A]]) -// CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 -// CHECK-NEXT: ret i8 [[TMP0]] +// CHECK-NEXT: [[VMAXVQ_S8_I:%.*]] = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> [[A]]) +// CHECK-NEXT: ret i8 [[VMAXVQ_S8_I]] // int8_t test_vmaxvq_s8(int8x16_t a) { return vmaxvq_s8(a); @@ -168,9 +163,8 @@ int8_t test_vmaxvq_s8(int8x16_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_s16 // CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> [[A]]) -// CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i16 -// CHECK-NEXT: ret i16 [[TMP0]] +// CHECK-NEXT: [[VMAXVQ_S16_I:%.*]] = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> [[A]]) +// CHECK-NEXT: ret i16 [[VMAXVQ_S16_I]] // int16_t test_vmaxvq_s16(int16x8_t a) { return vmaxvq_s16(a); @@ -179,7 +173,7 @@ int16_t test_vmaxvq_s16(int16x8_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_s32 // CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VMAXVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> [[A]]) +// CHECK-NEXT: [[VMAXVQ_S32_I:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[A]]) // CHECK-NEXT: ret i32 [[VMAXVQ_S32_I]] // int32_t test_vmaxvq_s32(int32x4_t a) { @@ -189,9 +183,8 @@ int32_t test_vmaxvq_s32(int32x4_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_u8 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> [[A]]) -// CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 -// CHECK-NEXT: ret i8 [[TMP0]] +// CHECK-NEXT: [[VMAXVQ_U8_I:%.*]] = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> [[A]]) +// CHECK-NEXT: ret i8 [[VMAXVQ_U8_I]] // uint8_t test_vmaxvq_u8(uint8x16_t a) { return vmaxvq_u8(a); @@ -200,9 +193,8 @@ uint8_t test_vmaxvq_u8(uint8x16_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_u16 // CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> [[A]]) -// CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i16 -// CHECK-NEXT: ret i16 [[TMP0]] +// CHECK-NEXT: [[VMAXVQ_U16_I:%.*]] = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> [[A]]) +// CHECK-NEXT: ret i16 [[VMAXVQ_U16_I]] // uint16_t test_vmaxvq_u16(uint16x8_t a) { return vmaxvq_u16(a); @@ -211,7 +203,7 @@ uint16_t test_vmaxvq_u16(uint16x8_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_u32 // CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VMAXVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> [[A]]) +// CHECK-NEXT: [[VMAXVQ_U32_I:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[A]]) // CHECK-NEXT: ret i32 [[VMAXVQ_U32_I]] // uint32_t test_vmaxvq_u32(uint32x4_t a) { @@ -221,9 +213,8 @@ uint32_t test_vmaxvq_u32(uint32x4_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vminv_s8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> [[A]]) -// CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 -// CHECK-NEXT: ret i8 [[TMP0]] +// CHECK-NEXT: [[VMINV_S8_I:%.*]] = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> [[A]]) +// CHECK-NEXT: ret i8 [[VMINV_S8_I]] // int8_t test_vminv_s8(int8x8_t a) { return vminv_s8(a); @@ -232,9 +223,8 @@ int8_t test_vminv_s8(int8x8_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vminv_s16 // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> [[A]]) -// CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i16 -// CHECK-NEXT: ret i16 [[TMP0]] +// CHECK-NEXT: [[VMINV_S16_I:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[A]]) +// CHECK-NEXT: ret i16 [[VMINV_S16_I]] // int16_t test_vminv_s16(int16x4_t a) { return vminv_s16(a); @@ -243,9 +233,8 @@ int16_t test_vminv_s16(int16x4_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vminv_u8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> [[A]]) -// CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 -// CHECK-NEXT: ret i8 [[TMP0]] +// CHECK-NEXT: [[VMINV_U8_I:%.*]] = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> [[A]]) +// CHECK-NEXT: ret i8 [[VMINV_U8_I]] // uint8_t test_vminv_u8(uint8x8_t a) { return vminv_u8(a); @@ -254,9 +243,8 @@ uint8_t test_vminv_u8(uint8x8_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vminv_u16 // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> [[A]]) -// CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i16 -// CHECK-NEXT: ret i16 [[TMP0]] +// CHECK-NEXT: [[VMINV_U16_I:%.*]] = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> [[A]]) +// CHECK-NEXT: ret i16 [[VMINV_U16_I]] // uint16_t test_vminv_u16(uint16x4_t a) { return vminv_u16(a); @@ -265,9 +253,8 @@ uint16_t test_vminv_u16(uint16x4_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vminvq_s8 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> [[A]]) -// CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 -// CHECK-NEXT: ret i8 [[TMP0]] +// CHECK-NEXT: [[VMINVQ_S8_I:%.*]] = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> [[A]]) +// CHECK-NEXT: ret i8 [[VMINVQ_S8_I]] // int8_t test_vminvq_s8(int8x16_t a) { return vminvq_s8(a); @@ -276,9 +263,8 @@ int8_t test_vminvq_s8(int8x16_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vminvq_s16 // CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> [[A]]) -// CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i16 -// CHECK-NEXT: ret i16 [[TMP0]] +// CHECK-NEXT: [[VMINVQ_S16_I:%.*]] = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> [[A]]) +// CHECK-NEXT: ret i16 [[VMINVQ_S16_I]] // int16_t test_vminvq_s16(int16x8_t a) { return vminvq_s16(a); @@ -287,7 +273,7 @@ int16_t test_vminvq_s16(int16x8_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vminvq_s32 // CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VMINVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> [[A]]) +// CHECK-NEXT: [[VMINVQ_S32_I:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[A]]) // CHECK-NEXT: ret i32 [[VMINVQ_S32_I]] // int32_t test_vminvq_s32(int32x4_t a) { @@ -297,9 +283,8 @@ int32_t test_vminvq_s32(int32x4_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vminvq_u8 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> [[A]]) -// CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 -// CHECK-NEXT: ret i8 [[TMP0]] +// CHECK-NEXT: [[VMINVQ_U8_I:%.*]] = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> [[A]]) +// CHECK-NEXT: ret i8 [[VMINVQ_U8_I]] // uint8_t test_vminvq_u8(uint8x16_t a) { return vminvq_u8(a); @@ -308,9 +293,8 @@ uint8_t test_vminvq_u8(uint8x16_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vminvq_u16 // CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> [[A]]) -// CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i16 -// CHECK-NEXT: ret i16 [[TMP0]] +// CHECK-NEXT: [[VMINVQ_U16_I:%.*]] = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> [[A]]) +// CHECK-NEXT: ret i16 [[VMINVQ_U16_I]] // uint16_t test_vminvq_u16(uint16x8_t a) { return vminvq_u16(a); @@ -319,7 +303,7 @@ uint16_t test_vminvq_u16(uint16x8_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vminvq_u32 // CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VMINVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> [[A]]) +// CHECK-NEXT: [[VMINVQ_U32_I:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[A]]) // CHECK-NEXT: ret i32 [[VMINVQ_U32_I]] // uint32_t test_vminvq_u32(uint32x4_t a) { @@ -329,9 +313,8 @@ uint32_t test_vminvq_u32(uint32x4_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vaddv_s8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> [[A]]) -// CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 -// CHECK-NEXT: ret i8 [[TMP0]] +// CHECK-NEXT: [[VADDV_S8_I:%.*]] = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> [[A]]) +// CHECK-NEXT: ret i8 [[VADDV_S8_I]] // int8_t test_vaddv_s8(int8x8_t a) { return vaddv_s8(a); @@ -340,9 +323,8 @@ int8_t test_vaddv_s8(int8x8_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vaddv_s16 // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> [[A]]) -// CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16 -// CHECK-NEXT: ret i16 [[TMP0]] +// CHECK-NEXT: [[VADDV_S16_I:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[A]]) +// CHECK-NEXT: ret i16 [[VADDV_S16_I]] // int16_t test_vaddv_s16(int16x4_t a) { return vaddv_s16(a); @@ -351,9 +333,8 @@ int16_t test_vaddv_s16(int16x4_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vaddv_u8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> [[A]]) -// CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 -// CHECK-NEXT: ret i8 [[TMP0]] +// CHECK-NEXT: [[VADDV_U8_I:%.*]] = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> [[A]]) +// CHECK-NEXT: ret i8 [[VADDV_U8_I]] // uint8_t test_vaddv_u8(uint8x8_t a) { return vaddv_u8(a); @@ -362,9 +343,8 @@ uint8_t test_vaddv_u8(uint8x8_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vaddv_u16 // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> [[A]]) -// CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16 -// CHECK-NEXT: ret i16 [[TMP0]] +// CHECK-NEXT: [[VADDV_U16_I:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[A]]) +// CHECK-NEXT: ret i16 [[VADDV_U16_I]] // uint16_t test_vaddv_u16(uint16x4_t a) { return vaddv_u16(a); @@ -373,9 +353,8 @@ uint16_t test_vaddv_u16(uint16x4_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_s8 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> [[A]]) -// CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 -// CHECK-NEXT: ret i8 [[TMP0]] +// CHECK-NEXT: [[VADDVQ_S8_I:%.*]] = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> [[A]]) +// CHECK-NEXT: ret i8 [[VADDVQ_S8_I]] // int8_t test_vaddvq_s8(int8x16_t a) { return vaddvq_s8(a); @@ -384,9 +363,8 @@ int8_t test_vaddvq_s8(int8x16_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_s16 // CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> [[A]]) -// CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16 -// CHECK-NEXT: ret i16 [[TMP0]] +// CHECK-NEXT: [[VADDVQ_S16_I:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[A]]) +// CHECK-NEXT: ret i16 [[VADDVQ_S16_I]] // int16_t test_vaddvq_s16(int16x8_t a) { return vaddvq_s16(a); @@ -395,7 +373,7 @@ int16_t test_vaddvq_s16(int16x8_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_s32 // CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VADDVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> [[A]]) +// CHECK-NEXT: [[VADDVQ_S32_I:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[A]]) // CHECK-NEXT: ret i32 [[VADDVQ_S32_I]] // int32_t test_vaddvq_s32(int32x4_t a) { @@ -405,9 +383,8 @@ int32_t test_vaddvq_s32(int32x4_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_u8 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8> [[A]]) -// CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 -// CHECK-NEXT: ret i8 [[TMP0]] +// CHECK-NEXT: [[VADDVQ_U8_I:%.*]] = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> [[A]]) +// CHECK-NEXT: ret i8 [[VADDVQ_U8_I]] // uint8_t test_vaddvq_u8(uint8x16_t a) { return vaddvq_u8(a); @@ -416,9 +393,8 @@ uint8_t test_vaddvq_u8(uint8x16_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_u16 // CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16> [[A]]) -// CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16 -// CHECK-NEXT: ret i16 [[TMP0]] +// CHECK-NEXT: [[VADDVQ_U16_I:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[A]]) +// CHECK-NEXT: ret i16 [[VADDVQ_U16_I]] // uint16_t test_vaddvq_u16(uint16x8_t a) { return vaddvq_u16(a); @@ -427,7 +403,7 @@ uint16_t test_vaddvq_u16(uint16x8_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_u32 // CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VADDVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> [[A]]) +// CHECK-NEXT: [[VADDVQ_U32_I:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[A]]) // CHECK-NEXT: ret i32 [[VADDVQ_U32_I]] // uint32_t test_vaddvq_u32(uint32x4_t a) { diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c index 035e1ca1b45e8..1c628bbba483f 100644 --- a/clang/test/CodeGen/AArch64/neon-intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c @@ -12643,7 +12643,7 @@ uint64_t test_vqrshld_u64(uint64_t a, int64_t b) { // CHECK-LABEL: define dso_local i64 @test_vpaddd_s64( // CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[A]]) +// CHECK-NEXT: [[VPADDD_S64_I:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[A]]) // CHECK-NEXT: ret i64 [[VPADDD_S64_I]] // int64_t test_vpaddd_s64(int64x2_t a) { @@ -23227,7 +23227,7 @@ uint64x2_t test_vpaddq_u64(uint64x2_t a, uint64x2_t b) { // CHECK-LABEL: define dso_local i64 @test_vpaddd_u64( // CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VPADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[A]]) +// CHECK-NEXT: [[VPADDD_U64_I:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[A]]) // CHECK-NEXT: ret i64 [[VPADDD_U64_I]] // uint64_t test_vpaddd_u64(uint64x2_t a) { @@ -23237,7 +23237,7 @@ uint64_t test_vpaddd_u64(uint64x2_t a) { // CHECK-LABEL: define dso_local i64 @test_vaddvq_s64( // CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> [[A]]) +// CHECK-NEXT: [[VADDVQ_S64_I:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[A]]) // CHECK-NEXT: ret i64 [[VADDVQ_S64_I]] // int64_t test_vaddvq_s64(int64x2_t a) { @@ -23247,7 +23247,7 @@ int64_t test_vaddvq_s64(int64x2_t a) { // CHECK-LABEL: define dso_local i64 @test_vaddvq_u64( // CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[A]]) +// CHECK-NEXT: [[VADDVQ_U64_I:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[A]]) // CHECK-NEXT: ret i64 [[VADDVQ_U64_I]] // uint64_t test_vaddvq_u64(uint64x2_t a) { @@ -23878,7 +23878,7 @@ float64x1_t test_vrsqrts_f64(float64x1_t a, float64x1_t b) { // CHECK-LABEL: define dso_local i32 @test_vminv_s32( // CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VMINV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> [[A]]) +// CHECK-NEXT: [[VMINV_S32_I:%.*]] = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> [[A]]) // CHECK-NEXT: ret i32 [[VMINV_S32_I]] // int32_t test_vminv_s32(int32x2_t a) { @@ -23888,7 +23888,7 @@ int32_t test_vminv_s32(int32x2_t a) { // CHECK-LABEL: define dso_local i32 @test_vminv_u32( // CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VMINV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> [[A]]) +// CHECK-NEXT: [[VMINV_U32_I:%.*]] = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> [[A]]) // CHECK-NEXT: ret i32 [[VMINV_U32_I]] // uint32_t test_vminv_u32(uint32x2_t a) { @@ -23898,7 +23898,7 @@ uint32_t test_vminv_u32(uint32x2_t a) { // CHECK-LABEL: define dso_local i32 @test_vmaxv_s32( // CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VMAXV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> [[A]]) +// CHECK-NEXT: [[VMAXV_S32_I:%.*]] = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> [[A]]) // CHECK-NEXT: ret i32 [[VMAXV_S32_I]] // int32_t test_vmaxv_s32(int32x2_t a) { @@ -23908,7 +23908,7 @@ int32_t test_vmaxv_s32(int32x2_t a) { // CHECK-LABEL: define dso_local i32 @test_vmaxv_u32( // CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> [[A]]) +// CHECK-NEXT: [[VMAXV_U32_I:%.*]] = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> [[A]]) // CHECK-NEXT: ret i32 [[VMAXV_U32_I]] // uint32_t test_vmaxv_u32(uint32x2_t a) { @@ -23918,7 +23918,7 @@ uint32_t test_vmaxv_u32(uint32x2_t a) { // CHECK-LABEL: define dso_local i32 @test_vaddv_s32( // CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VADDV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> [[A]]) +// CHECK-NEXT: [[VADDV_S32_I:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[A]]) // CHECK-NEXT: ret i32 [[VADDV_S32_I]] // int32_t test_vaddv_s32(int32x2_t a) { @@ -23928,7 +23928,7 @@ int32_t test_vaddv_s32(int32x2_t a) { // CHECK-LABEL: define dso_local i32 @test_vaddv_u32( // CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VADDV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> [[A]]) +// CHECK-NEXT: [[VADDV_U32_I:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[A]]) // CHECK-NEXT: ret i32 [[VADDV_U32_I]] // uint32_t test_vaddv_u32(uint32x2_t a) { From 0e2b89037a94436b9e342dd2d297119e2a39d2f3 Mon Sep 17 00:00:00 2001 From: Ryotaro Kasuga Date: Thu, 30 Oct 2025 19:49:57 +0900 Subject: [PATCH 20/21] [DA] Add tests where dependencies are missed due to overflow (NFC) (#164246) This patch adds test cases that demonstrate missing dependencies in DA caused by the lack of overflow handling. These issues will be addressed by properly inserting overflow checks and bailing out when one is detected. It covers the following dependence test functions: - Strong SIV - Weak-Crossing SIV - Weak-Zero SIV - Symbolic RDIV - GCD MIV It does NOT cover: - Exact SIV - Exact RDIV - Banerjee MIV --- .../DependenceAnalysis/gcd-miv-overflow.ll | 66 +++++++++ .../DependenceAnalysis/strong-siv-overflow.ll | 68 +++++++++ .../symbolic-rdiv-overflow.ll | 137 ++++++++++++++++++ .../weak-crossing-siv-overflow.ll | 125 ++++++++++++++++ .../weak-zero-siv-overflow.ll | 122 ++++++++++++++++ 5 files changed, 518 insertions(+) create mode 100644 llvm/test/Analysis/DependenceAnalysis/gcd-miv-overflow.ll create mode 100644 llvm/test/Analysis/DependenceAnalysis/strong-siv-overflow.ll create mode 100644 llvm/test/Analysis/DependenceAnalysis/symbolic-rdiv-overflow.ll create mode 100644 llvm/test/Analysis/DependenceAnalysis/weak-crossing-siv-overflow.ll create mode 100644 llvm/test/Analysis/DependenceAnalysis/weak-zero-siv-overflow.ll diff --git a/llvm/test/Analysis/DependenceAnalysis/gcd-miv-overflow.ll b/llvm/test/Analysis/DependenceAnalysis/gcd-miv-overflow.ll new file mode 100644 index 0000000000000..43f66dd7d0974 --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/gcd-miv-overflow.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -disable-output "-passes=print" 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-ALL +; RUN: opt < %s -disable-output "-passes=print" -da-enable-dependence-test=gcd-miv 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-GCD-MIV + +; offset0 = 4; +; offset1 = 0; +; for (i = 0; i < 100; i++) { +; A[offset0] = 1; +; A[offset1] = 2; +; offset0 += 3*m; +; offset1 += 3; +; } +; +; FIXME: DependenceAnalysis currently detects no dependency between the two +; stores, but it does exist. E.g., consider `m` is 12297829382473034411, which +; is a modular multiplicative inverse of 3 under modulo 2^64. Then `offset0` is +; effectively `i + 4`, so accesses will be as follows: +; +; - A[offset0] : A[4], A[5], A[6], ... +; - A[offset1] : A[0], A[3], A[6], ... +; +; The root cause is that DA interprets `3*m` in non-modular arithmetic, which +; isn't necessarily true due to overflow. +; +define void @gcdmiv_coef_ovfl(ptr %A, i64 %m) { +; CHECK-ALL-LABEL: 'gcdmiv_coef_ovfl' +; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1 +; CHECK-ALL-NEXT: da analyze - none! +; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 +; CHECK-ALL-NEXT: da analyze - none! +; CHECK-ALL-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 +; CHECK-ALL-NEXT: da analyze - none! +; +; CHECK-GCD-MIV-LABEL: 'gcdmiv_coef_ovfl' +; CHECK-GCD-MIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1 +; CHECK-GCD-MIV-NEXT: da analyze - consistent output [*]! +; CHECK-GCD-MIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 +; CHECK-GCD-MIV-NEXT: da analyze - none! +; CHECK-GCD-MIV-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 +; CHECK-GCD-MIV-NEXT: da analyze - consistent output [*]! +; +entry: + %step = mul i64 3, %m + br label %loop + +loop: + %i = phi i64 [ 0, %entry ], [ %i.inc, %loop ] + %offset.0 = phi i64 [ 4, %entry ] , [ %offset.0.next, %loop ] + %offset.1 = phi i64 [ 0, %entry ] , [ %offset.1.next, %loop ] + %gep.0 = getelementptr inbounds i8, ptr %A, i64 %offset.0 + %gep.1 = getelementptr inbounds i8, ptr %A, i64 %offset.1 + store i8 1, ptr %gep.0 + store i8 2, ptr %gep.1 + %i.inc = add nuw nsw i64 %i, 1 + %offset.0.next = add nsw i64 %offset.0, %step + %offset.1.next = add nsw i64 %offset.1, 3 + %ec = icmp eq i64 %i.inc, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/Analysis/DependenceAnalysis/strong-siv-overflow.ll b/llvm/test/Analysis/DependenceAnalysis/strong-siv-overflow.ll new file mode 100644 index 0000000000000..bf0fafcbfd6c9 --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/strong-siv-overflow.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -disable-output "-passes=print" 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-ALL +; RUN: opt < %s -disable-output "-passes=print" -da-enable-dependence-test=strong-siv 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-STRONG-SIV + +; for (i = 0; i < (1LL << 62); i++) { +; if (0 <= 2*i - 2) +; A[2*i - 2] = 1; +; +; if (0 <= 2*i - 4) +; A[2*i - 4] = 2; +; } +; +; FIXME: DependenceAnalysis currently detects no dependency between the two +; stores, but it does exist. For example, each store will access A[0] when i +; is 1 and 2 respectively. +; The root cause is that the product of the BTC and the coefficient +; ((1LL << 62) - 1 and 2) overflows in a signed sense. +define void @strongsiv_const_ovfl(ptr %A) { +; CHECK-LABEL: 'strongsiv_const_ovfl' +; CHECK-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1 +; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 +; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 +; CHECK-NEXT: da analyze - none! +; +entry: + br label %loop.header + +loop.header: + %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.latch ] + %offset.0 = phi i64 [ -2, %entry ], [ %offset.0.next, %loop.latch ] + %offset.1 = phi i64 [ -4, %entry ], [ %offset.1.next, %loop.latch ] + %ec = icmp eq i64 %i, 4611686018427387904 + br i1 %ec, label %exit, label %loop.body + +loop.body: + %cond.0 = icmp sge i64 %offset.0, 0 + %cond.1 = icmp sge i64 %offset.1, 0 + br i1 %cond.0, label %if.then.0, label %loop.middle + +if.then.0: + %gep.0 = getelementptr inbounds i8, ptr %A, i64 %offset.0 + store i8 1, ptr %gep.0 + br label %loop.middle + +loop.middle: + br i1 %cond.1, label %if.then.1, label %loop.latch + +if.then.1: + %gep.1 = getelementptr inbounds i8, ptr %A, i64 %offset.1 + store i8 2, ptr %gep.1 + br label %loop.latch + +loop.latch: + %i.inc = add nuw nsw i64 %i, 1 + %offset.0.next = add nsw i64 %offset.0, 2 + %offset.1.next = add nsw i64 %offset.1, 2 + br label %loop.header + +exit: + ret void +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-ALL: {{.*}} +; CHECK-STRONG-SIV: {{.*}} diff --git a/llvm/test/Analysis/DependenceAnalysis/symbolic-rdiv-overflow.ll b/llvm/test/Analysis/DependenceAnalysis/symbolic-rdiv-overflow.ll new file mode 100644 index 0000000000000..c5ff9884a0c62 --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/symbolic-rdiv-overflow.ll @@ -0,0 +1,137 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -disable-output "-passes=print" 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-ALL +; RUN: opt < %s -disable-output "-passes=print" -da-enable-dependence-test=symbolic-rdiv 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-SYMBOLIC-RDIV + +; for (i = 0; i < (1LL << 62); i++) { +; if (0 <= 2*i - 2) +; A[2*i - 2] = 1; +; A[i] = 2; +; } +; +; FIXME: DependenceAnalysis currently detects no dependency between the two +; stores, but it does exist. For example, each store will access A[0] when i +; is 1 and 0 respectively. +; The root cause is that the product of the BTC and the coefficient +; ((1LL << 62) - 1 and 2) overflows in a signed sense. +define void @symbolicrdiv_prod_ovfl(ptr %A) { +; CHECK-ALL-LABEL: 'symbolicrdiv_prod_ovfl' +; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1 +; CHECK-ALL-NEXT: da analyze - none! +; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 +; CHECK-ALL-NEXT: da analyze - none! +; CHECK-ALL-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 +; CHECK-ALL-NEXT: da analyze - none! +; +; CHECK-SYMBOLIC-RDIV-LABEL: 'symbolicrdiv_prod_ovfl' +; CHECK-SYMBOLIC-RDIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1 +; CHECK-SYMBOLIC-RDIV-NEXT: da analyze - none! +; CHECK-SYMBOLIC-RDIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 +; CHECK-SYMBOLIC-RDIV-NEXT: da analyze - none! +; CHECK-SYMBOLIC-RDIV-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 +; CHECK-SYMBOLIC-RDIV-NEXT: da analyze - consistent output [*]! +; +entry: + br label %loop.header + +loop.header: + %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.latch ] + %offset = phi i64 [ -2, %entry ], [ %offset.next, %loop.latch ] + %ec = icmp eq i64 %i, 4611686018427387904 + br i1 %ec, label %exit, label %loop.body + +loop.body: + %cond = icmp sge i64 %offset, 0 + br i1 %cond, label %if.then, label %loop.latch + +if.then: + %gep.0 = getelementptr inbounds i8, ptr %A, i64 %offset + store i8 1, ptr %gep.0 + br label %loop.latch + +loop.latch: + %gep.1 = getelementptr inbounds i8, ptr %A, i64 %i + store i8 2, ptr %gep.1 + %i.inc = add nuw nsw i64 %i, 1 + %offset.next = add nsw i64 %offset, 2 + br label %loop.header + +exit: + ret void +} + +; offset0 = -4611686018427387904; // -2^62 +; offset1 = 4611686018427387904; // 2^62 +; for (i = 0; i < (1LL << 62) - 100; i++) { +; if (0 <= offset0) +; A[offset0] = 1; +; if (0 <= offset1) +; A[offset1] = 2; +; offset0 += 2; +; offset1 -= 1; +; } +; +; FIXME: DependenceAnalysis currently detects no dependency between the two +; stores, but it does exist. For example, +; +; memory access | i == 2^61 | i == 2^61 + 2^59 | i == 2^61 + 2^60 +; -------------------------|-----------|------------------|------------------- +; A[2*i - 2^62] (offset0) | | A[2^60] | A[2^61] +; A[-i + 2^62] (offset1) | A[2^61] | | A[2^60] +; +; The root cause is that the calculation of the differenct between the two +; constants (-2^62 and 2^62) overflows in a signed sense. +define void @symbolicrdiv_delta_ovfl(ptr %A) { +; CHECK-ALL-LABEL: 'symbolicrdiv_delta_ovfl' +; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1 +; CHECK-ALL-NEXT: da analyze - none! +; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 +; CHECK-ALL-NEXT: da analyze - none! +; CHECK-ALL-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 +; CHECK-ALL-NEXT: da analyze - none! +; +; CHECK-SYMBOLIC-RDIV-LABEL: 'symbolicrdiv_delta_ovfl' +; CHECK-SYMBOLIC-RDIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1 +; CHECK-SYMBOLIC-RDIV-NEXT: da analyze - consistent output [*]! +; CHECK-SYMBOLIC-RDIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 +; CHECK-SYMBOLIC-RDIV-NEXT: da analyze - none! +; CHECK-SYMBOLIC-RDIV-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 +; CHECK-SYMBOLIC-RDIV-NEXT: da analyze - consistent output [*]! +; +entry: + br label %loop.header + +loop.header: + %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.latch ] + %offset.0 = phi i64 [ -4611686018427387904, %entry ], [ %offset.0.next, %loop.latch ] + %offset.1 = phi i64 [ 4611686018427387904, %entry ], [ %offset.1.next, %loop.latch ] + %cond.0 = icmp sge i64 %offset.0, 0 + %cond.1 = icmp sge i64 %offset.1, 0 + br i1 %cond.0, label %if.then.0, label %loop.middle + +if.then.0: + %gep.0 = getelementptr inbounds i8, ptr %A, i64 %offset.0 + store i8 1, ptr %gep.0 + br label %loop.middle + +loop.middle: + br i1 %cond.1, label %if.then.1, label %loop.latch + +if.then.1: + %gep.1 = getelementptr inbounds i8, ptr %A, i64 %offset.1 + store i8 2, ptr %gep.1 + br label %loop.latch + +loop.latch: + %i.inc = add nuw nsw i64 %i, 1 + %offset.0.next = add nsw i64 %offset.0, 2 + %offset.1.next = sub nsw i64 %offset.1, 1 + %ec = icmp eq i64 %i.inc, 4611686018427387804 ; 2^62 - 100 + br i1 %ec, label %exit, label %loop.header + +exit: + ret void +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/Analysis/DependenceAnalysis/weak-crossing-siv-overflow.ll b/llvm/test/Analysis/DependenceAnalysis/weak-crossing-siv-overflow.ll new file mode 100644 index 0000000000000..ba57c7bf5736a --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/weak-crossing-siv-overflow.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -disable-output "-passes=print" 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-ALL +; RUN: opt < %s -disable-output "-passes=print" -da-enable-dependence-test=weak-crossing-siv 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-WEAK-CROSSING-SIV + +; max_i = INT64_MAX/3 // 3074457345618258602 +; for (long long i = 0; i <= max_i; i++) { +; A[-3*i + INT64_MAX] = 0; +; if (i) +; A[3*i - 2] = 1; +; } +; +; FIXME: DependenceAnalysis currently detects no dependency between +; `A[-3*i + INT64_MAX]` and `A[3*i - 2]`, but it does exist. For example, +; +; memory access | i == 1 | i == max_i +; ---------------------|------------------|------------------ +; A[-3*i + INT64_MAX] | A[INT64_MAX - 3] | A[1] +; A[3*i - 2] | A[1] | A[INT64_MAX - 3] +; +; The root cause is that the calculation of the differenct between the two +; constants (INT64_MAX and -2) triggers an overflow. + +define void @weakcorssing_delta_ovfl(ptr %A) { +; CHECK-ALL-LABEL: 'weakcorssing_delta_ovfl' +; CHECK-ALL-NEXT: Src: store i8 0, ptr %idx.0, align 1 --> Dst: store i8 0, ptr %idx.0, align 1 +; CHECK-ALL-NEXT: da analyze - none! +; CHECK-ALL-NEXT: Src: store i8 0, ptr %idx.0, align 1 --> Dst: store i8 1, ptr %idx.1, align 1 +; CHECK-ALL-NEXT: da analyze - none! +; CHECK-ALL-NEXT: Src: store i8 1, ptr %idx.1, align 1 --> Dst: store i8 1, ptr %idx.1, align 1 +; CHECK-ALL-NEXT: da analyze - none! +; +; CHECK-WEAK-CROSSING-SIV-LABEL: 'weakcorssing_delta_ovfl' +; CHECK-WEAK-CROSSING-SIV-NEXT: Src: store i8 0, ptr %idx.0, align 1 --> Dst: store i8 0, ptr %idx.0, align 1 +; CHECK-WEAK-CROSSING-SIV-NEXT: da analyze - consistent output [*]! +; CHECK-WEAK-CROSSING-SIV-NEXT: Src: store i8 0, ptr %idx.0, align 1 --> Dst: store i8 1, ptr %idx.1, align 1 +; CHECK-WEAK-CROSSING-SIV-NEXT: da analyze - none! +; CHECK-WEAK-CROSSING-SIV-NEXT: Src: store i8 1, ptr %idx.1, align 1 --> Dst: store i8 1, ptr %idx.1, align 1 +; CHECK-WEAK-CROSSING-SIV-NEXT: da analyze - consistent output [*]! +; +entry: + br label %loop.header + +loop.header: + %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.latch ] + %subscript.0 = phi i64 [ 9223372036854775807, %entry ], [ %subscript.0.next, %loop.latch ] + %subscript.1 = phi i64 [ -2, %entry ], [ %subscript.1.next, %loop.latch ] + %idx.0 = getelementptr inbounds i8, ptr %A, i64 %subscript.0 + store i8 0, ptr %idx.0 + %cond.store = icmp ne i64 %i, 0 + br i1 %cond.store, label %if.store, label %loop.latch + +if.store: + %idx.1 = getelementptr inbounds i8, ptr %A, i64 %subscript.1 + store i8 1, ptr %idx.1 + br label %loop.latch + +loop.latch: + %i.inc = add nuw nsw i64 %i, 1 + %subscript.0.next = add nsw i64 %subscript.0, -3 + %subscript.1.next = add nsw i64 %subscript.1, 3 + %ec = icmp sgt i64 %i.inc, 3074457345618258602 + br i1 %ec, label %exit, label %loop.header + +exit: + ret void +} + +; max_i = INT64_MAX/3 // 3074457345618258602 +; for (long long i = 0; i <= max_i; i++) { +; A[-3*i + INT64_MAX] = 0; +; A[3*i + 1] = 1; +; } +; +; FIXME: DependenceAnalysis currently detects no dependency between +; `A[-3*i + INT64_MAX]` and `A[3*i - 2]`, but it does exist. For example, +; +; memory access | i == 0 | i == 1 | i == max_i - 1 | i == max_i +; ---------------------|--------|------------------|----------------|------------------ +; A[-3*i + INT64_MAX] | | A[INT64_MAX - 3] | A[1] | +; A[3*i + 1] | A[1] | | | A[INT64_MAX - 3] +; +; The root cause is that the product of the BTC, the coefficient, and 2 +; triggers an overflow. +; +define void @weakcorssing_prod_ovfl(ptr %A) { +; CHECK-ALL-LABEL: 'weakcorssing_prod_ovfl' +; CHECK-ALL-NEXT: Src: store i8 0, ptr %idx.0, align 1 --> Dst: store i8 0, ptr %idx.0, align 1 +; CHECK-ALL-NEXT: da analyze - none! +; CHECK-ALL-NEXT: Src: store i8 0, ptr %idx.0, align 1 --> Dst: store i8 1, ptr %idx.1, align 1 +; CHECK-ALL-NEXT: da analyze - none! +; CHECK-ALL-NEXT: Src: store i8 1, ptr %idx.1, align 1 --> Dst: store i8 1, ptr %idx.1, align 1 +; CHECK-ALL-NEXT: da analyze - none! +; +; CHECK-WEAK-CROSSING-SIV-LABEL: 'weakcorssing_prod_ovfl' +; CHECK-WEAK-CROSSING-SIV-NEXT: Src: store i8 0, ptr %idx.0, align 1 --> Dst: store i8 0, ptr %idx.0, align 1 +; CHECK-WEAK-CROSSING-SIV-NEXT: da analyze - consistent output [*]! +; CHECK-WEAK-CROSSING-SIV-NEXT: Src: store i8 0, ptr %idx.0, align 1 --> Dst: store i8 1, ptr %idx.1, align 1 +; CHECK-WEAK-CROSSING-SIV-NEXT: da analyze - none! +; CHECK-WEAK-CROSSING-SIV-NEXT: Src: store i8 1, ptr %idx.1, align 1 --> Dst: store i8 1, ptr %idx.1, align 1 +; CHECK-WEAK-CROSSING-SIV-NEXT: da analyze - consistent output [*]! +; +entry: + br label %loop + +loop: + %i = phi i64 [ 0, %entry ], [ %i.inc, %loop ] + %subscript.0 = phi i64 [ 9223372036854775807, %entry ], [ %subscript.0.next, %loop ] + %subscript.1 = phi i64 [ 1, %entry ], [ %subscript.1.next, %loop ] + %idx.0 = getelementptr inbounds i8, ptr %A, i64 %subscript.0 + %idx.1 = getelementptr inbounds i8, ptr %A, i64 %subscript.1 + store i8 0, ptr %idx.0 + store i8 1, ptr %idx.1 + %i.inc = add nuw nsw i64 %i, 1 + %subscript.0.next = add nsw i64 %subscript.0, -3 + %subscript.1.next = add nsw i64 %subscript.1, 3 + %ec = icmp sgt i64 %i.inc, 3074457345618258602 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/Analysis/DependenceAnalysis/weak-zero-siv-overflow.ll b/llvm/test/Analysis/DependenceAnalysis/weak-zero-siv-overflow.ll new file mode 100644 index 0000000000000..6317c387858d3 --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/weak-zero-siv-overflow.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -disable-output "-passes=print" 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-ALL +; RUN: opt < %s -disable-output "-passes=print" -da-enable-dependence-test=weak-zero-siv 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-WEAK-ZERO-SIV + +; for (i = 0; i < (1LL << 62); i++) { +; if (0 <= 2*i - 2) +; A[2*i - 2] = 1; +; A[2] = 2; +; } +; +; FIXME: DependenceAnalysis currently detects no dependency between the two +; stores, but it does exist. The root cause is that the product of the BTC and +; the coefficient ((1LL << 62) - 1 and 2) overflows in a signed sense. +; +define void @weakzero_dst_siv_prod_ovfl(ptr %A) { +; CHECK-ALL-LABEL: 'weakzero_dst_siv_prod_ovfl' +; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1 +; CHECK-ALL-NEXT: da analyze - none! +; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 +; CHECK-ALL-NEXT: da analyze - none! +; CHECK-ALL-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 +; CHECK-ALL-NEXT: da analyze - consistent output [S]! +; +; CHECK-WEAK-ZERO-SIV-LABEL: 'weakzero_dst_siv_prod_ovfl' +; CHECK-WEAK-ZERO-SIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1 +; CHECK-WEAK-ZERO-SIV-NEXT: da analyze - consistent output [*]! +; CHECK-WEAK-ZERO-SIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 +; CHECK-WEAK-ZERO-SIV-NEXT: da analyze - none! +; CHECK-WEAK-ZERO-SIV-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 +; CHECK-WEAK-ZERO-SIV-NEXT: da analyze - consistent output [S]! +; +entry: + br label %loop.header + +loop.header: + %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.latch ] + %offset = phi i64 [ -2, %entry ], [ %offset.next, %loop.latch ] + %ec = icmp eq i64 %i, 4611686018427387904 + br i1 %ec, label %exit, label %loop.body + +loop.body: + %cond = icmp sge i64 %offset, 0 + br i1 %cond, label %if.then, label %loop.latch + +if.then: + %gep.0 = getelementptr inbounds i8, ptr %A, i64 %offset + store i8 1, ptr %gep.0 + br label %loop.latch + +loop.latch: + %gep.1 = getelementptr inbounds i8, ptr %A, i64 2 + store i8 2, ptr %gep.1 + %i.inc = add nuw nsw i64 %i, 1 + %offset.next = add nsw i64 %offset, 2 + br label %loop.header + +exit: + ret void +} + +; for (i = 0; i < n; i++) { +; if (0 <= 2*i - 1) +; A[2*i - 1] = 1; +; A[INT64_MAX] = 2; +; } +; +; FIXME: DependenceAnalysis currently detects no dependency between the two +; stores, but it does exist. When `%n` is 2^62, the value of `%offset` will be +; the same as INT64_MAX at the last iteration. +; The root cause is that the calculation of the difference between the two +; constants (INT64_MAX and -1) overflows in a signed sense. +; +define void @weakzero_dst_siv_delta_ovfl(ptr %A, i64 %n) { +; CHECK-ALL-LABEL: 'weakzero_dst_siv_delta_ovfl' +; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1 +; CHECK-ALL-NEXT: da analyze - none! +; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 +; CHECK-ALL-NEXT: da analyze - none! +; CHECK-ALL-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 +; CHECK-ALL-NEXT: da analyze - consistent output [S]! +; +; CHECK-WEAK-ZERO-SIV-LABEL: 'weakzero_dst_siv_delta_ovfl' +; CHECK-WEAK-ZERO-SIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1 +; CHECK-WEAK-ZERO-SIV-NEXT: da analyze - consistent output [*]! +; CHECK-WEAK-ZERO-SIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 +; CHECK-WEAK-ZERO-SIV-NEXT: da analyze - none! +; CHECK-WEAK-ZERO-SIV-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 +; CHECK-WEAK-ZERO-SIV-NEXT: da analyze - consistent output [S]! +; +entry: + %guard = icmp sgt i64 %n, 0 + br i1 %guard, label %loop.header, label %exit + +loop.header: + %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.latch ] + %offset = phi i64 [ -2, %entry ], [ %offset.next, %loop.latch ] + %ec = icmp eq i64 %i, %n + br i1 %ec, label %exit, label %loop.body + +loop.body: + %cond = icmp sge i64 %offset, 0 + br i1 %cond, label %if.then, label %loop.latch + +if.then: + %gep.0 = getelementptr inbounds i8, ptr %A, i64 %offset + store i8 1, ptr %gep.0 + br label %loop.latch + +loop.latch: + %gep.1 = getelementptr inbounds i8, ptr %A, i64 9223372036854775807 + store i8 2, ptr %gep.1 + %i.inc = add nuw nsw i64 %i, 1 + %offset.next = add nsw i64 %offset, 2 + br label %loop.header + +exit: + ret void +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} From 63d59cbe5440f8f3a7115472baa118b5b5b58f2b Mon Sep 17 00:00:00 2001 From: Ron Lieberman Date: Thu, 30 Oct 2025 06:22:13 -0500 Subject: [PATCH 21/21] Xfail: downstream test:readfirstlanes llvm/test/CodeGen/AMDGPU/fold-redundant-sgpr-vgpr-rw-across-bb.ll --- .../CodeGen/AMDGPU/fold-redundant-sgpr-vgpr-rw-across-bb.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/fold-redundant-sgpr-vgpr-rw-across-bb.ll b/llvm/test/CodeGen/AMDGPU/fold-redundant-sgpr-vgpr-rw-across-bb.ll index 2ad9c0e71c5f3..7adf218f5210e 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-redundant-sgpr-vgpr-rw-across-bb.ll +++ b/llvm/test/CodeGen/AMDGPU/fold-redundant-sgpr-vgpr-rw-across-bb.ll @@ -1,6 +1,6 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefix=CHECK1 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -amdgpu-si-fold-operands-preheader-threshold=10 < %s | FileCheck --check-prefix=CHECK2 %s - +; XFAIL: * define protected amdgpu_kernel void @main(ptr addrspace(1) noundef %args.coerce, ptr addrspace(1) noundef %args.coerce2, ptr addrspace(1) noundef %args.coerce4, i32 noundef %args12) { ; CHECK1-LABEL: main: ; check that non-redundant readfirstlanes are not removed @@ -76,4 +76,4 @@ for.body.i: ; preds = %for.body.i, %for.bo add.exit: ; preds = %for.body.i, %entry ret void -} \ No newline at end of file +}