Skip to content

Commit

Permalink
[DAG] SimplifyDemandedBits - ensure we demand the high bits for shl n…
Browse files Browse the repository at this point in the history
…sw/nuw ops

Matches InstCombinerImpl::SimplifyDemandedUseBits

Exposes an issue with AND(CTPOP(X),1) -> PARITY(X) fold which fails to correctly demand known zero upper bits

Fixes llvm#69965
  • Loading branch information
RKSimon committed Oct 24, 2023
1 parent 72561b3 commit cd17571
Show file tree
Hide file tree
Showing 15 changed files with 339 additions and 257 deletions.
16 changes: 12 additions & 4 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1785,14 +1785,22 @@ bool TargetLowering::SimplifyDemandedBits(
}

APInt InDemandedMask = DemandedBits.lshr(ShAmt);

// If the shift is NUW/NSW, then it does demand the high bits.
if (Op->getFlags().hasNoSignedWrap())
InDemandedMask.setHighBits(ShAmt + 1);
else if (Op->getFlags().hasNoUnsignedWrap())
InDemandedMask.setHighBits(ShAmt);

if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero <<= ShAmt;
Known.One <<= ShAmt;
// low bits known zero.
Known.Zero.setLowBits(ShAmt);

Known = KnownBits::shl(Known,
KnownBits::makeConstant(APInt(BitWidth, ShAmt)),
/* NUW */ Op->getFlags().hasNoUnsignedWrap(),
/* NSW */ Op->getFlags().hasNoSignedWrap());

// Attempt to avoid multi-use ops if we don't need anything from them.
if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
Expand Down
5 changes: 3 additions & 2 deletions llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,9 @@ entry:
define i32 @extendedLeftShiftshortTointBy16(i16 signext %a) nounwind readnone ssp {
; CHECK-LABEL: extendedLeftShiftshortTointBy16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: lsl w8, w0, #16
; CHECK-NEXT: add w0, w8, #16, lsl #12 ; =65536
; CHECK-NEXT: add w8, w0, #1
; CHECK-NEXT: and w8, w8, #0xffff
; CHECK-NEXT: lsl w0, w8, #16
; CHECK-NEXT: ret
entry:
%inc = add i16 %a, 1
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/load-combine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,7 @@ define void @short_vector_to_i32_unused_low_i8(ptr %in, ptr %out, ptr %p) {
; CHECK-NEXT: umov w10, v0.h[3]
; CHECK-NEXT: lsl w8, w8, #16
; CHECK-NEXT: bfi w8, w9, #8, #8
; CHECK-NEXT: orr w8, w8, w10, lsl #24
; CHECK-NEXT: bfi w8, w10, #24, #8
; CHECK-NEXT: str w8, [x1]
; CHECK-NEXT: ret
%ld = load <4 x i8>, ptr %in, align 4
Expand Down Expand Up @@ -609,8 +609,8 @@ define void @short_vector_to_i32_unused_high_i8(ptr %in, ptr %out, ptr %p) {
; CHECK-NEXT: ldrh w9, [x0]
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: umov w8, v0.h[2]
; CHECK-NEXT: orr w8, w9, w8, lsl #16
; CHECK-NEXT: str w8, [x1]
; CHECK-NEXT: bfi w9, w8, #16, #8
; CHECK-NEXT: str w9, [x1]
; CHECK-NEXT: ret
%ld = load <4 x i8>, ptr %in, align 4

Expand Down Expand Up @@ -640,7 +640,7 @@ define void @short_vector_to_i32_unused_low_i16(ptr %in, ptr %out, ptr %p) {
; CHECK-NEXT: umov w8, v0.h[3]
; CHECK-NEXT: umov w9, v0.h[2]
; CHECK-NEXT: lsl w8, w8, #24
; CHECK-NEXT: orr w8, w8, w9, lsl #16
; CHECK-NEXT: bfi w8, w9, #16, #8
; CHECK-NEXT: str w8, [x1]
; CHECK-NEXT: ret
%ld = load <4 x i8>, ptr %in, align 4
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/shl.ll
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,7 @@ define amdgpu_kernel void @shl_i16_i_s(ptr addrspace(1) %out, i16 zeroext %a) {
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_and_b32 s4, s4, 15
; VI-NEXT: s_lshl_b32 s4, s4, 12
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
Expand Down
10 changes: 2 additions & 8 deletions llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,7 @@ define amdgpu_gfx void @strict_wwm_no_cfg(ptr addrspace(8) inreg %tmp14) {
; GFX9-O0-NEXT: s_mov_b64 exec, s[40:41]
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0
; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[40:41], v3, v4
; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[40:41]
; GFX9-O0-NEXT: s_mov_b32 s35, 1
; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s35, v3
; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[40:41]
; GFX9-O0-NEXT: s_mov_b32 s35, 2
; GFX9-O0-NEXT: v_and_b32_e64 v3, v3, s35
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[36:39], s34 offset:4
Expand Down Expand Up @@ -101,7 +99,6 @@ define amdgpu_gfx void @strict_wwm_no_cfg(ptr addrspace(8) inreg %tmp14) {
; GFX9-O3-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5
; GFX9-O3-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; GFX9-O3-NEXT: v_lshlrev_b32_e32 v4, 1, v4
; GFX9-O3-NEXT: v_and_b32_e32 v4, 2, v4
; GFX9-O3-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:4
; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1
; GFX9-O3-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
Expand Down Expand Up @@ -235,9 +232,7 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg)
; GFX9-O0-NEXT: v_readlane_b32 s35, v0, 3
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[36:37], v3, v4
; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[36:37]
; GFX9-O0-NEXT: s_mov_b32 s36, 1
; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s36, v3
; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[36:37]
; GFX9-O0-NEXT: s_mov_b32 s36, 2
; GFX9-O0-NEXT: v_and_b32_e64 v3, v3, s36
; GFX9-O0-NEXT: s_mov_b32 s40, s35
Expand Down Expand Up @@ -302,7 +297,6 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg)
; GFX9-O3-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
; GFX9-O3-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX9-O3-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX9-O3-NEXT: v_and_b32_e32 v0, 2, v0
; GFX9-O3-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:4
; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1
; GFX9-O3-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
Expand Down
56 changes: 23 additions & 33 deletions llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,16 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9LE-LABEL: test64:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: add 5, 3, 4
; P9LE-NEXT: lfdx 0, 3, 4
; P9LE-NEXT: lxsdx 2, 3, 4
; P9LE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
; P9LE-NEXT: xxlxor 2, 2, 2
; P9LE-NEXT: xxlxor 1, 1, 1
; P9LE-NEXT: vspltisw 4, 8
; P9LE-NEXT: lxsd 3, 4(5)
; P9LE-NEXT: addi 3, 3, .LCPI0_0@toc@l
; P9LE-NEXT: vadduwm 4, 4, 4
; P9LE-NEXT: lxv 1, 0(3)
; P9LE-NEXT: addis 3, 2, .LCPI0_1@toc@ha
; P9LE-NEXT: addi 3, 3, .LCPI0_1@toc@l
; P9LE-NEXT: xxperm 2, 0, 1
; P9LE-NEXT: lxv 0, 0(3)
; P9LE-NEXT: xxperm 3, 3, 0
; P9LE-NEXT: xxperm 3, 1, 0
; P9LE-NEXT: xxperm 2, 1, 0
; P9LE-NEXT: vnegw 3, 3
; P9LE-NEXT: vslw 3, 3, 4
; P9LE-NEXT: vsubuwm 2, 3, 2
Expand All @@ -50,11 +47,8 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-NEXT: addi 3, 3, .LCPI0_0@toc@l
; P9BE-NEXT: vadduwm 4, 4, 4
; P9BE-NEXT: lxv 0, 0(3)
; P9BE-NEXT: addis 3, 2, .LCPI0_1@toc@ha
; P9BE-NEXT: addi 3, 3, .LCPI0_1@toc@l
; P9BE-NEXT: xxperm 3, 1, 0
; P9BE-NEXT: xxperm 2, 1, 0
; P9BE-NEXT: lxv 0, 0(3)
; P9BE-NEXT: xxperm 3, 3, 0
; P9BE-NEXT: vnegw 3, 3
; P9BE-NEXT: vslw 3, 3, 4
; P9BE-NEXT: vsubuwm 2, 3, 2
Expand All @@ -71,11 +65,9 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-AIX-NEXT: vspltisw 4, 8
; P9BE-AIX-NEXT: lxsd 3, 4(5)
; P9BE-AIX-NEXT: lxv 0, 0(3)
; P9BE-AIX-NEXT: ld 3, L..C1(2) # %const.1
; P9BE-AIX-NEXT: vadduwm 4, 4, 4
; P9BE-AIX-NEXT: xxperm 3, 1, 0
; P9BE-AIX-NEXT: xxperm 2, 1, 0
; P9BE-AIX-NEXT: lxv 0, 0(3)
; P9BE-AIX-NEXT: xxperm 3, 3, 0
; P9BE-AIX-NEXT: vnegw 3, 3
; P9BE-AIX-NEXT: vslw 3, 3, 4
; P9BE-AIX-NEXT: vsubuwm 2, 3, 2
Expand All @@ -86,25 +78,23 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-AIX32-LABEL: test64:
; P9BE-AIX32: # %bb.0: # %entry
; P9BE-AIX32-NEXT: lwzux 4, 3, 4
; P9BE-AIX32-NEXT: xxlxor 2, 2, 2
; P9BE-AIX32-NEXT: vspltisw 4, 8
; P9BE-AIX32-NEXT: stw 4, -48(1)
; P9BE-AIX32-NEXT: vadduwm 4, 4, 4
; P9BE-AIX32-NEXT: stw 4, -48(1)
; P9BE-AIX32-NEXT: lwz 4, 4(3)
; P9BE-AIX32-NEXT: lxv 0, -48(1)
; P9BE-AIX32-NEXT: stw 4, -32(1)
; P9BE-AIX32-NEXT: lwz 4, L..C0(2) # %const.0
; P9BE-AIX32-NEXT: lxv 1, -32(1)
; P9BE-AIX32-NEXT: lwz 3, 8(3)
; P9BE-AIX32-NEXT: lxv 1, -32(1)
; P9BE-AIX32-NEXT: stw 3, -16(1)
; P9BE-AIX32-NEXT: lwz 3, L..C1(2) # %const.1
; P9BE-AIX32-NEXT: lxv 2, 0(4)
; P9BE-AIX32-NEXT: lxv 3, -16(1)
; P9BE-AIX32-NEXT: xxmrghw 2, 0, 1
; P9BE-AIX32-NEXT: lxv 0, 0(4)
; P9BE-AIX32-NEXT: xxperm 2, 2, 0
; P9BE-AIX32-NEXT: lxv 0, -16(1)
; P9BE-AIX32-NEXT: xxmrghw 3, 1, 0
; P9BE-AIX32-NEXT: lxv 0, 0(3)
; P9BE-AIX32-NEXT: xxperm 3, 3, 0
; P9BE-AIX32-NEXT: xxlxor 0, 0, 0
; P9BE-AIX32-NEXT: xxperm 2, 0, 2
; P9BE-AIX32-NEXT: xxmrghw 3, 1, 3
; P9BE-AIX32-NEXT: xxperm 3, 0, 2
; P9BE-AIX32-NEXT: vnegw 3, 3
; P9BE-AIX32-NEXT: vslw 3, 3, 4
; P9BE-AIX32-NEXT: vsubuwm 2, 3, 2
Expand Down Expand Up @@ -180,7 +170,7 @@ define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-AIX: # %bb.0: # %entry
; P9BE-AIX-NEXT: add 5, 3, 4
; P9BE-AIX-NEXT: lxsiwzx 2, 3, 4
; P9BE-AIX-NEXT: ld 3, L..C2(2) # %const.0
; P9BE-AIX-NEXT: ld 3, L..C1(2) # %const.0
; P9BE-AIX-NEXT: xxlxor 0, 0, 0
; P9BE-AIX-NEXT: vspltisw 4, 8
; P9BE-AIX-NEXT: lxv 1, 0(3)
Expand All @@ -200,7 +190,7 @@ define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-AIX32: # %bb.0: # %entry
; P9BE-AIX32-NEXT: add 5, 3, 4
; P9BE-AIX32-NEXT: lxsiwzx 2, 3, 4
; P9BE-AIX32-NEXT: lwz 3, L..C2(2) # %const.0
; P9BE-AIX32-NEXT: lwz 3, L..C1(2) # %const.0
; P9BE-AIX32-NEXT: xxlxor 0, 0, 0
; P9BE-AIX32-NEXT: vspltisw 4, 8
; P9BE-AIX32-NEXT: lxv 1, 0(3)
Expand Down Expand Up @@ -297,9 +287,9 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signex
; P9BE-AIX-NEXT: li 7, 16
; P9BE-AIX-NEXT: add 6, 3, 4
; P9BE-AIX-NEXT: lxsihzx 1, 3, 4
; P9BE-AIX-NEXT: ld 3, L..C3(2) # %const.1
; P9BE-AIX-NEXT: ld 3, L..C2(2) # %const.1
; P9BE-AIX-NEXT: lxsihzx 2, 6, 7
; P9BE-AIX-NEXT: ld 6, L..C4(2) # %const.0
; P9BE-AIX-NEXT: ld 6, L..C3(2) # %const.0
; P9BE-AIX-NEXT: lxv 0, 0(6)
; P9BE-AIX-NEXT: li 6, 0
; P9BE-AIX-NEXT: mtvsrwz 3, 6
Expand Down Expand Up @@ -328,7 +318,7 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signex
; P9BE-AIX32-NEXT: sth 4, -48(1)
; P9BE-AIX32-NEXT: lxv 4, -48(1)
; P9BE-AIX32-NEXT: sth 3, -32(1)
; P9BE-AIX32-NEXT: lwz 3, L..C3(2) # %const.0
; P9BE-AIX32-NEXT: lwz 3, L..C2(2) # %const.0
; P9BE-AIX32-NEXT: lxv 3, -32(1)
; P9BE-AIX32-NEXT: vmrghh 4, 2, 4
; P9BE-AIX32-NEXT: lxv 0, 0(3)
Expand Down Expand Up @@ -437,9 +427,9 @@ define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext
; P9BE-AIX-NEXT: add 6, 3, 4
; P9BE-AIX-NEXT: li 7, 8
; P9BE-AIX-NEXT: lxsibzx 3, 3, 4
; P9BE-AIX-NEXT: ld 3, L..C5(2) # %const.1
; P9BE-AIX-NEXT: ld 3, L..C4(2) # %const.1
; P9BE-AIX-NEXT: lxsibzx 0, 6, 7
; P9BE-AIX-NEXT: ld 6, L..C6(2) # %const.0
; P9BE-AIX-NEXT: ld 6, L..C5(2) # %const.0
; P9BE-AIX-NEXT: lxv 1, 0(6)
; P9BE-AIX-NEXT: li 6, 0
; P9BE-AIX-NEXT: mtvsrwz 2, 6
Expand All @@ -464,9 +454,9 @@ define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext
; P9BE-AIX32-NEXT: add 6, 3, 4
; P9BE-AIX32-NEXT: li 7, 8
; P9BE-AIX32-NEXT: lxsibzx 3, 3, 4
; P9BE-AIX32-NEXT: lwz 3, L..C4(2) # %const.1
; P9BE-AIX32-NEXT: lwz 3, L..C3(2) # %const.1
; P9BE-AIX32-NEXT: lxsibzx 0, 6, 7
; P9BE-AIX32-NEXT: lwz 6, L..C5(2) # %const.0
; P9BE-AIX32-NEXT: lwz 6, L..C4(2) # %const.0
; P9BE-AIX32-NEXT: lxv 1, 0(6)
; P9BE-AIX32-NEXT: li 6, 0
; P9BE-AIX32-NEXT: mtvsrwz 2, 6
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/RISCV/rv64i-complex-float.ll
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ define i64 @complex_float_add(i64 %a.coerce, i64 %b.coerce) nounwind {
; CHECK-NEXT: mv a0, s0
; CHECK-NEXT: mv a1, s1
; CHECK-NEXT: call __addsf3@plt
; CHECK-NEXT: andi a0, a0, -1
; CHECK-NEXT: slli a0, a0, 32
; CHECK-NEXT: slli s2, s2, 32
; CHECK-NEXT: srli a1, s2, 32
Expand Down
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/pr61561.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@ define <vscale x 4 x i8> @foo(ptr %p) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: vl1re16.v v8, (a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vsll.vi v8, v8, 3
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
; CHECK-NEXT: vsll.vi v8, v10, 3
; CHECK-NEXT: li a0, 248
; CHECK-NEXT: vand.vx v8, v10, a0
; CHECK-NEXT: vand.vx v8, v8, a0
; CHECK-NEXT: lui a0, 4
; CHECK-NEXT: vmv.v.x v10, a0
; CHECK-NEXT: lui a0, 1
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/CodeGen/RISCV/split-store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ define void @int32_int32_pair(i32 %tmp1, i32 %tmp2, ptr %ref.tmp) {
;
; RV64-LABEL: int32_int32_pair:
; RV64: # %bb.0:
; RV64-NEXT: andi a1, a1, -1
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: slli a0, a0, 32
; RV64-NEXT: srli a0, a0, 32
Expand All @@ -138,6 +139,7 @@ define void @int32_int32_pair(i32 %tmp1, i32 %tmp2, ptr %ref.tmp) {
;
; RV64D-LABEL: int32_int32_pair:
; RV64D: # %bb.0:
; RV64D-NEXT: andi a1, a1, -1
; RV64D-NEXT: slli a1, a1, 32
; RV64D-NEXT: slli a0, a0, 32
; RV64D-NEXT: srli a0, a0, 32
Expand Down

0 comments on commit cd17571

Please sign in to comment.