Skip to content

Commit

Permalink
[DAG] mergeStore - peek through truncates when finding dead store(tru…
Browse files Browse the repository at this point in the history
…nc(load())) patterns

Extend the existing store(load()) removal code to account for intermediate truncates that some targets won't remove with canCombineTruncStore - we only care about the load/store MemoryVT.

Fixes regression from D146121
  • Loading branch information
RKSimon committed Mar 15, 2023
1 parent 7501e53 commit c1f81e7
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 37 deletions.
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/SelectionDAGNodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -1712,6 +1712,10 @@ SDValue peekThroughOneUseBitcasts(SDValue V);
/// If \p V is not an extracted subvector, it is returned as-is.
SDValue peekThroughExtractSubvectors(SDValue V);

/// Return the non-truncated source operand of \p V if it exists.
/// If \p V is not a truncation, it is returned as-is.
SDValue peekThroughTruncates(SDValue V);

/// Returns true if \p V is a bitwise not operation. Assumes that an all ones
/// constant is canonicalized to be operand 1.
bool isBitwiseNot(SDValue V, bool AllowUndefs = false);
Expand Down
8 changes: 6 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20391,9 +20391,13 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
}

// If this is a load followed by a store to the same location, then the store
// is dead/noop.
// is dead/noop. Peek through any truncates if canCombineTruncStore failed.
// TODO: Add big-endian truncate support with test coverage.
// TODO: Can relax for unordered atomics (see D66309)
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
? peekThroughTruncates(Value)
: Value;
if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
ST->isUnindexed() && ST->isSimple() &&
Ld->getAddressSpace() == ST->getAddressSpace() &&
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11040,6 +11040,12 @@ SDValue llvm::peekThroughExtractSubvectors(SDValue V) {
return V;
}

SDValue llvm::peekThroughTruncates(SDValue V) {
while (V.getOpcode() == ISD::TRUNCATE)
V = V.getOperand(0);
return V;
}

bool llvm::isBitwiseNot(SDValue V, bool AllowUndefs) {
if (V.getOpcode() != ISD::XOR)
return false;
Expand Down
58 changes: 23 additions & 35 deletions llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,19 @@
define void @i24_or(ptr %a) {
; X86-LABEL: i24_or:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl (%ecx), %edx
; X86-NEXT: movzbl 2(%ecx), %eax
; X86-NEXT: movb %al, 2(%ecx)
; X86-NEXT: shll $16, %eax
; X86-NEXT: orl %edx, %eax
; X86-NEXT: orl $384, %eax # imm = 0x180
; X86-NEXT: movw %ax, (%ecx)
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl (%eax), %ecx
; X86-NEXT: movzbl 2(%eax), %edx
; X86-NEXT: shll $16, %edx
; X86-NEXT: orl %ecx, %edx
; X86-NEXT: orl $384, %edx # imm = 0x180
; X86-NEXT: movw %dx, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: i24_or:
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzbl 2(%rdi), %ecx
; X64-NEXT: movb %cl, 2(%rdi)
; X64-NEXT: shll $16, %ecx
; X64-NEXT: orl %eax, %ecx
; X64-NEXT: orl $384, %ecx # imm = 0x180
Expand All @@ -35,21 +33,19 @@ define void @i24_and_or(ptr %a) {
; X86-LABEL: i24_and_or:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl (%eax), %edx
; X86-NEXT: movzbl 2(%eax), %ecx
; X86-NEXT: movb %cl, 2(%eax)
; X86-NEXT: shll $16, %ecx
; X86-NEXT: orl %edx, %ecx
; X86-NEXT: orl $384, %ecx # imm = 0x180
; X86-NEXT: andl $-128, %ecx
; X86-NEXT: movw %cx, (%eax)
; X86-NEXT: movzwl (%eax), %ecx
; X86-NEXT: movzbl 2(%eax), %edx
; X86-NEXT: shll $16, %edx
; X86-NEXT: orl %ecx, %edx
; X86-NEXT: orl $384, %edx # imm = 0x180
; X86-NEXT: andl $-128, %edx
; X86-NEXT: movw %dx, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: i24_and_or:
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzbl 2(%rdi), %ecx
; X64-NEXT: movb %cl, 2(%rdi)
; X64-NEXT: shll $16, %ecx
; X64-NEXT: orl %eax, %ecx
; X64-NEXT: orl $384, %ecx # imm = 0x180
Expand All @@ -66,29 +62,27 @@ define void @i24_and_or(ptr %a) {
define void @i24_insert_bit(ptr %a, i1 zeroext %bit) {
; X86-LABEL: i24_insert_bit:
; X86: # %bb.0:
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: .cfi_offset %ebx, -8
; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl (%eax), %edx
; X86-NEXT: movzbl 2(%eax), %ebx
; X86-NEXT: movb %bl, 2(%eax)
; X86-NEXT: shll $16, %ebx
; X86-NEXT: orl %edx, %ebx
; X86-NEXT: movzbl 2(%eax), %esi
; X86-NEXT: shll $16, %esi
; X86-NEXT: orl %edx, %esi
; X86-NEXT: shll $13, %ecx
; X86-NEXT: andl $16769023, %ebx # imm = 0xFFDFFF
; X86-NEXT: orl %ecx, %ebx
; X86-NEXT: movw %bx, (%eax)
; X86-NEXT: popl %ebx
; X86-NEXT: andl $16769023, %esi # imm = 0xFFDFFF
; X86-NEXT: orl %ecx, %esi
; X86-NEXT: movw %si, (%eax)
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X64-LABEL: i24_insert_bit:
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzbl 2(%rdi), %ecx
; X64-NEXT: movb %cl, 2(%rdi)
; X64-NEXT: shll $16, %ecx
; X64-NEXT: orl %eax, %ecx
; X64-NEXT: shll $13, %esi
Expand All @@ -114,8 +108,6 @@ define void @i56_or(ptr %a) {
;
; X64-LABEL: i56_or:
; X64: # %bb.0:
; X64-NEXT: movzwl 4(%rdi), %eax
; X64-NEXT: movw %ax, 4(%rdi)
; X64-NEXT: orl $384, (%rdi) # imm = 0x180
; X64-NEXT: retq
%aa = load i56, ptr %a, align 1
Expand All @@ -138,8 +130,6 @@ define void @i56_and_or(ptr %a) {
; X64: # %bb.0:
; X64-NEXT: movzwl 4(%rdi), %eax
; X64-NEXT: movzbl 6(%rdi), %ecx
; X64-NEXT: movb %cl, 6(%rdi)
; X64-NEXT: # kill: def $ecx killed $ecx def $rcx
; X64-NEXT: shll $16, %ecx
; X64-NEXT: orl %eax, %ecx
; X64-NEXT: shlq $32, %rcx
Expand Down Expand Up @@ -175,8 +165,6 @@ define void @i56_insert_bit(ptr %a, i1 zeroext %bit) {
; X64: # %bb.0:
; X64-NEXT: movzwl 4(%rdi), %eax
; X64-NEXT: movzbl 6(%rdi), %ecx
; X64-NEXT: movb %cl, 6(%rdi)
; X64-NEXT: # kill: def $ecx killed $ecx def $rcx
; X64-NEXT: shll $16, %ecx
; X64-NEXT: orl %eax, %ecx
; X64-NEXT: shlq $32, %rcx
Expand Down

0 comments on commit c1f81e7

Please sign in to comment.