[DAG] add (~a | x) & (a | y) -> (a & (x ^ y)) ^y for foldMaskedMerge #144342

woruyu · 2025-06-16T12:35:55Z

Summary

This PR resolves #143864

Add (~a | x) & (a | y) -> (a & (x ^ y)) ^y for foldMaskedMerge func using SDPatternMatch

aftering adding this pattern, run ninja check-llvm-codegen, all other cases remain unchanged, so I add a testcase(fold-masked-merge-demorgan.ll) for it

llvmbot · 2025-06-16T12:36:26Z

@llvm/pr-subscribers-llvm-selectiondag

@llvm/pr-subscribers-backend-x86

Author: woruyu (woruyu)

Changes

Summary

This PR resolves #143864

Add (~a | x) & (a | y) -> (a & (x ^ y)) ^y for foldMaskedMerge func using SDPatternMatch

aftering adding this pattern, run ninja check-llvm-codegen, all other cases remain unchanged, so I add a testcase(fold-masked-merge-demorgan.ll) for it

Full diff: https://github.com/llvm/llvm-project/pull/144342.diff

2 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+36-26)
(added) llvm/test/CodeGen/X86/fold-masked-merge-demorgan.ll (+267)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5d62ded171f4f..f131822e11468 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7204,6 +7204,38 @@ static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand,
   return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
 }
 
+/// Fold "masked merge" expressions like `(m & x) | (~m & y)` and its DeMorgan
+/// variant `(~m | x) & (m | y)` into the equivalent `((x ^ y) & m) ^ y)`
+/// pattern. This is typically a better representation for targets without a
+/// fused "and-not" operation.
+static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG,
+                               const TargetLowering &TLI, const SDLoc &DL) {
+  // Note that masked-merge variants using XOR or ADD expressions are
+  // normalized to OR by InstCombine so we only check for OR or AND.
+  assert(Node->getOpcode() == ISD::OR ||
+         Node->getOpcode() == ISD::AND &&
+             "Must be called with ISD::OR or ISD::AND node");
+
+  // If the target supports and-not, don't fold this.
+  if (TLI.hasAndNot(SDValue(Node, 0)))
+    return SDValue();
+
+  SDValue M, X, Y;
+
+  if (sd_match(Node,
+               m_Or(m_OneUse(m_And(m_OneUse(m_Not(m_Value(M))), m_Value(Y))),
+                    m_OneUse(m_And(m_Deferred(M), m_Value(X))))) ||
+      sd_match(Node, m_And(m_OneUse(m_Or(m_Not(m_Value(M)), m_Value(X))),
+                           m_OneUse(m_Or(m_Deferred(M), m_Value(Y)))))) {
+    EVT VT = M.getValueType();
+    SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, Y);
+    SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor, M);
+    return DAG.getNode(ISD::XOR, DL, VT, And, Y);
+  }
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitAND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -7644,6 +7676,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
       return R;
 
+  if (VT.isScalarInteger() && VT != MVT::i1)
+    if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL))
+      return R;
+
   return SDValue();
 }
 
@@ -8128,32 +8164,6 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
   return SDValue();
 }
 
-/// Fold "masked merge" expressions like `(m & x) | (~m & y)` into the
-/// equivalent `((x ^ y) & m) ^ y)` pattern.
-/// This is typically a better representation for targets without a fused
-/// "and-not" operation.
-static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG,
-                               const TargetLowering &TLI, const SDLoc &DL) {
-  // Note that masked-merge variants using XOR or ADD expressions are
-  // normalized to OR by InstCombine so we only check for OR.
-  assert(Node->getOpcode() == ISD::OR && "Must be called with ISD::OR node");
-
-  // If the target supports and-not, don't fold this.
-  if (TLI.hasAndNot(SDValue(Node, 0)))
-    return SDValue();
-
-  SDValue M, X, Y;
-  if (sd_match(Node,
-               m_Or(m_OneUse(m_And(m_OneUse(m_Not(m_Value(M))), m_Value(Y))),
-                    m_OneUse(m_And(m_Deferred(M), m_Value(X)))))) {
-    EVT VT = M.getValueType();
-    SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, Y);
-    SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor, M);
-    return DAG.getNode(ISD::XOR, DL, VT, And, Y);
-  }
-  return SDValue();
-}
-
 SDValue DAGCombiner::visitOR(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
diff --git a/llvm/test/CodeGen/X86/fold-masked-merge-demorgan.ll b/llvm/test/CodeGen/X86/fold-masked-merge-demorgan.ll
new file mode 100644
index 0000000000000..6357680f7586e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fold-masked-merge-demorgan.ll
@@ -0,0 +1,267 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -o - %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=CHECK,NOBMI
+; RUN: llc -o - %s -mtriple=x86_64-- -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,BMI
+;
+; test that masked-merge code is generated as "xor;and;xor" sequence or
+; "andn ; and; or" if and-not is available.
+
+define i32 @masked_merge0_demorgan(i32 %a0, i32 %a1, i32 %a2) {
+; NOBMI-LABEL: masked_merge0_demorgan:
+; NOBMI:       # %bb.0:
+; NOBMI-NEXT:    movl %esi, %eax
+; NOBMI-NEXT:    xorl %edx, %eax
+; NOBMI-NEXT:    andl %edi, %eax
+; NOBMI-NEXT:    xorl %edx, %eax
+; NOBMI-NEXT:    retq
+;
+; BMI-LABEL: masked_merge0_demorgan:
+; BMI:       # %bb.0:
+; BMI-NEXT:    orl	%edi, %edx
+; BMI-NEXT:    andnl	%edi, %esi, %eax
+; BMI-NEXT:    andnl	%edx, %eax, %eax
+; BMI-NEXT:    retq
+  %not = xor i32 %a0, -1
+  %or0 = or i32 %not, %a1
+  %or1 = or i32 %a0, %a2
+  %and = and i32 %or0, %or1
+  ret i32 %and
+}
+
+define i16 @masked_merge1_demorgan(i16 %a0, i16 %a1, i16 %a2) {
+; NOBMI-LABEL: masked_merge1_demorgan:
+; NOBMI:       # %bb.0:
+; NOBMI-NEXT:    movl %esi, %eax
+; NOBMI-NEXT:    xorl %edx, %eax
+; NOBMI-NEXT:    andl %edi, %eax
+; NOBMI-NEXT:    xorl %edx, %eax
+; NOBMI-NEXT:    # kill: def $ax killed $ax killed $eax
+; NOBMI-NEXT:    retq
+;
+; BMI-LABEL: masked_merge1_demorgan:
+; BMI:       # %bb.0:
+; BMI-NEXT:    andnl %edx, %edi, %eax
+; BMI-NEXT:    andl %edi, %esi
+; BMI-NEXT:    orl %esi, %eax
+; BMI-NEXT:    # kill: def $ax killed $ax killed $eax
+; BMI-NEXT:    retq
+  %not = xor i16 %a0, -1
+  %or0 = or i16 %not, %a1
+  %or1 = or i16 %a0, %a2
+  %and = and i16 %or0, %or1
+  ret i16 %and
+}
+
+define i8 @masked_merge2_demorgan(i8 %a0, i8 %a1, i8 %a2) {
+; CHECK-LABEL: masked_merge2_demorgan:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
+; CHECK-NEXT:    retq
+  %not = xor i8 %a0, -1
+  %or0 = or i8 %not, %a1
+  %or1 = or i8 %a0, %a1
+  %and = and i8 %or0, %or1
+  ret i8 %and
+}
+
+define i64 @masked_merge3_demorgan(i64 %a0, i64 %a1, i64 %a2) {
+; NOBMI-LABEL: masked_merge3_demorgan:
+; NOBMI:       # %bb.0:
+; NOBMI-NEXT:    movq %rsi, %rax
+; NOBMI-NEXT:    notq %rdx
+; NOBMI-NEXT:    xorq %rdx, %rax
+; NOBMI-NEXT:    notq %rax
+; NOBMI-NEXT:    andq %rdi, %rax
+; NOBMI-NEXT:    xorq %rdx, %rax
+; NOBMI-NEXT:    retq
+;
+; BMI-LABEL: masked_merge3_demorgan:
+; BMI:       # %bb.0:
+; BMI-NEXT:    andnq	%rdx, %rdi, %rax
+; BMI-NEXT:    andq	%rdi, %rsi
+; BMI-NEXT:    notq	%rsi
+; BMI-NEXT:    andnq	%rsi, %rax, %rax
+; BMI-NEXT:    retq
+  %not_a0  = xor i64 %a0, -1
+  %not_a1  = xor i64 %a1, -1
+  %not_a2  = xor i64 %a2, -1
+  %or0     = or i64 %not_a0, %not_a1
+  %or1     = or i64 %a0, %not_a2
+  %and     = and i64 %or0, %or1
+  ret i64 %and
+}
+
+define i32 @not_a_masked_merge0_demorgan(i32 %a0, i32 %a1, i32 %a2) {
+; CHECK-LABEL: not_a_masked_merge0_demorgan:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    orl	%edi, %edx
+; CHECK-NEXT:    movl	%edi, %eax
+; CHECK-NEXT:    negl	%eax
+; CHECK-NEXT:    orl	%esi, %eax
+; CHECK-NEXT:    andl	%edx, %eax
+; CHECK-NEXT:    retq
+  %not_a_not = sub i32 0, %a0
+  %or0 = or i32 %not_a_not, %a1
+  %or1 = or i32 %a0, %a2
+  %and = and i32 %or0, %or1
+  ret i32 %and
+}
+
+; not a masked merge: `not` operand does not match another `and`-operand.
+define i32 @not_a_masked_merge1_demorgan(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
+; NOBMI-LABEL: not_a_masked_merge1_demorgan:
+; NOBMI:       # %bb.0:
+; NOBMI-NEXT:    movl	%ecx, %eax
+; NOBMI-NEXT:    orl	%edx, %edi
+; NOBMI-NEXT:    notl	%eax
+; NOBMI-NEXT:    orl	%esi, %eax
+; NOBMI-NEXT:    andl	%edi, %eax
+; NOBMI-NEXT:    retq
+;
+; BMI-LABEL: not_a_masked_merge1_demorgan:
+; BMI:       # %bb.0:
+; BMI-NEXT:    orl	%edx, %edi
+; BMI-NEXT:    andnl	%ecx, %esi, %eax
+; BMI-NEXT:    andnl	%edi, %eax, %eax
+; BMI-NEXT:    retq
+  %or1 = or i32 %a0, %a2
+  %not = xor i32 %a3, -1
+  %or0 = or i32 %not, %a1
+  %and = and i32 %or0, %or1
+  ret i32 %and
+}
+
+; not a masked merge: one of the operands of `and` is not an `or`.
+define i32 @not_a_masked_merge2_demorgan(i32 %a0, i32 %a1, i32 %a2) {
+; NOBMI-LABEL: not_a_masked_merge2_demorgan:
+; NOBMI:       # %bb.0:
+; NOBMI-NEXT:    movl	%edi, %eax
+; NOBMI-NEXT:    andl	%edi, %edx
+; NOBMI-NEXT:    notl	%eax
+; NOBMI-NEXT:    orl	%esi, %eax
+; NOBMI-NEXT:    andl	%edx, %eax
+; NOBMI-NEXT:    retq
+;
+; BMI-LABEL: not_a_masked_merge2_demorgan:
+; BMI:       # %bb.0:
+; BMI-NEXT:    andl	%edi, %edx
+; BMI-NEXT:    andnl	%edi, %esi, %eax
+; BMI-NEXT:    andnl	%edx, %eax, %eax
+; BMI-NEXT:    retq
+  %not_an_or1 = and i32 %a0, %a2
+  %not = xor i32 %a0, -1
+  %or0 = or i32 %not, %a1
+  %and = and i32 %or0, %not_an_or1
+  ret i32 %and
+}
+
+define i32 @not_a_masked_merge3_demorgan(i32 %a0, i32 %a1, i32 %a2) {
+; NOBMI-LABEL: not_a_masked_merge3_demorgan:
+; NOBMI:       # %bb.0:
+; NOBMI-NEXT:    movl	%esi, %eax
+; NOBMI-NEXT:    orl	%edi, %edx
+; NOBMI-NEXT:    xorl	%edi, %eax
+; NOBMI-NEXT:    notl	%eax
+; NOBMI-NEXT:    andl	%edx, %eax
+; NOBMI-NEXT:    retq
+;
+; BMI-LABEL: not_a_masked_merge3_demorgan:
+; BMI:       # %bb.0:
+; BMI-NEXT:    orl	%edi, %edx
+; BMI-NEXT:    xorl	%edi, %esi
+; BMI-NEXT:    andnl	%edx, %esi, %eax
+; BMI-NEXT:    retq
+  %or1 = or i32 %a0, %a2
+  %not = xor i32 %a0, -1
+  %not_an_or0 = xor i32 %not, %a1
+  %and = and i32 %not_an_or0, %or1
+  ret i32 %and
+}
+
+; not a masked merge: `not` operand must not be on same `or`.
+define i32 @not_a_masked_merge4_demorgan(i32 %a0, i32 %a1, i32 %a2) {
+; CHECK-LABEL: not_a_masked_merge4_demorgan:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl	%edi, %eax
+; CHECK-NEXT:    orl	%edx, %eax
+; CHECK-NEXT:    retq
+  %or1 = or i32 %a0, %a2
+  %not = xor i32 %a1, -1
+  %or0 = or i32 %not, %a1
+  %and = and i32 %or0, %or1
+  ret i32 %and
+}
+
+; should not transform when operands have multiple users.
+define i32 @masked_merge_no_transform0_demorgan(i32 %a0, i32 %a1, i32 %a2, ptr %p1) {
+; NOBMI-LABEL: masked_merge_no_transform0_demorgan:
+; NOBMI:       # %bb.0:
+; NOBMI-NEXT:    orl	%edi, %edx
+; NOBMI-NEXT:    movl	%edi, %eax
+; NOBMI-NEXT:    notl	%eax
+; NOBMI-NEXT:    orl	%esi, %eax
+; NOBMI-NEXT:    andl	%edx, %eax
+; NOBMI-NEXT:    movl	%edx, (%rcx)
+; NOBMI-NEXT:    retq
+;
+; BMI-LABEL: masked_merge_no_transform0_demorgan:
+; BMI:       # %bb.0:
+; BMI-NEXT:    orl	%edi, %edx
+; BMI-NEXT:    andnl	%edi, %esi, %eax
+; BMI-NEXT:    andnl	%edx, %eax, %eax
+; BMI-NEXT:    movl	%edx, (%rcx)
+; BMI-NEXT:    retq
+  %not = xor i32 %a0, -1
+  %or0 = or i32 %not, %a1
+  %or1 = or i32 %a0, %a2
+  %and = and i32 %or0, %or1
+  store i32 %or1, ptr %p1
+  ret i32 %and
+}
+
+; should not transform when operands have multiple users.
+define i32 @masked_merge_no_transform1_demorgan(i32 %a0, i32 %a1, i32 %a2, ptr %p1) {
+; NOBMI-LABEL: masked_merge_no_transform1_demorgan:
+; NOBMI:       # %bb.0:
+; NOBMI-NEXT:    movl	%esi, %eax
+; NOBMI-NEXT:    xorl	%edx, %eax
+; NOBMI-NEXT:    andl	%edi, %eax
+; NOBMI-NEXT:    notl	%edi
+; NOBMI-NEXT:    xorl	%edx, %eax
+; NOBMI-NEXT:    movl	%edi, (%rcx)
+; NOBMI-NEXT:    retq
+;
+; BMI-LABEL: masked_merge_no_transform1_demorgan:
+; BMI:       # %bb.0:
+; BMI-NEXT:    orl	%edi, %edx
+; BMI-NEXT:    andnl	%edi, %esi, %eax
+; BMI-NEXT:    notl	%edi
+; BMI-NEXT:    andnl	%edx, %eax, %eax
+; BMI-NEXT:    movl	%edi, (%rcx)
+; BMI-NEXT:    retq
+  %not = xor i32 %a0, -1
+  %or0 = or i32 %not, %a1
+  %or1 = or i32 %a0, %a2
+  %and = and i32 %or0, %or1
+  store i32 %not, ptr %p1
+  ret i32 %and
+}
+
+; should not transform when operands have multiple users.
+define i32 @masked_merge_no_transform2_demorgan(i32 %a0, i32 %a1, i32 %a2, ptr %p1) {
+; CHECK-LABEL: masked_merge_no_transform2_demorgan:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl	%edx, %eax
+; CHECK-NEXT:    orl	%edi, %eax
+; CHECK-NEXT:    notl	%edi
+; CHECK-NEXT:    orl	%esi, %edi
+; CHECK-NEXT:    andl	%edi, %eax
+; CHECK-NEXT:    movl	%edi, (%rcx)
+; CHECK-NEXT:    retq
+  %not = xor i32 %a0, -1
+  %or0 = or i32 %not, %a1
+  %or1 = or i32 %a0, %a2
+  %and = and i32 %or0, %or1
+  store i32 %or0, ptr %p1
+  ret i32 %and
+}
\ No newline at end of file

woruyu · 2025-06-16T12:41:58Z

Based on last experience(#143855), I know these test cases are related to (m & x) | (~m & y)

  LLVM :: CodeGen/AMDGPU/bfi_int.ll
  LLVM :: CodeGen/AMDGPU/unfold-masked-merge-scalar-variablemask.ll
  LLVM :: CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll
  LLVM :: CodeGen/PowerPC/misched.ll
  LLVM :: CodeGen/RISCV/fold-masked-merge.ll
  LLVM :: CodeGen/RISCV/unfold-masked-merge-scalar-variablemask.ll
  LLVM :: CodeGen/SystemZ/fold-masked-merge.ll
  LLVM :: CodeGen/WebAssembly/simd-arith.ll
  LLVM :: CodeGen/X86/bitselect.ll
  LLVM :: CodeGen/X86/fold-masked-merge.ll
  LLVM :: CodeGen/X86/or-lea.ll
  LLVM :: CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll
  LLVM :: CodeGen/X86/unfold-masked-merge-vector-variablemask.ll

in order to wirte (~a | x) & (a | y) testcase, i pick up fold-masked-merge.ll as reference to write fold-masked-merge-demorgan.ll, any suggestions on testcase coverage here?

woruyu · 2025-06-16T12:43:08Z

@RKSimon, hello, any suggestions for modifications?

RKSimon

Please can you pull out the new tests into a pre-commit patch against current trunk codgen? I'll get that committed and then you can update this patch to show the codegen changes

woruyu · 2025-06-17T02:21:02Z

@RKSimon , hello, pre-commit patch is in the first commit, codegen changes is on the second.

Reduces diff in #144342

RKSimon · 2025-06-17T10:30:28Z

Thanks @woruyu - I've moved foldMaskedMerge into place so the diff is clearer - please can you merge against trunk?

Reduces diff in llvm#144342

llvmbot added backend:X86 llvm:SelectionDAG SelectionDAGISel as well labels Jun 16, 2025

woruyu mentioned this pull request Jun 16, 2025

[DAG] foldMaskedMerge - add (~a | x) & (a | y) -> (a & (x ^ y)) ^y fold #143864

Open

RKSimon self-requested a review June 16, 2025 12:49

RKSimon reviewed Jun 16, 2025

View reviewed changes

woruyu added 2 commits June 17, 2025 09:38

test: add fold-masked-merge-demorgan testcase

3f80009

[DAG] add (~a | x) & (a | y) -> (a & (x ^ y)) ^y for foldMaskedMerge

dd1ad26

woruyu force-pushed the feat/add-foldMaskedMerge-demorgan branch from 8610ae3 to dd1ad26 Compare June 17, 2025 02:18

RKSimon added a commit that referenced this pull request Jun 17, 2025

[DAG] Move foldMaskedMerge before visitAND. NFC.

71f72f4

Reduces diff in #144342

ajaden-codes pushed a commit to Jaddyen/llvm-project that referenced this pull request Jun 17, 2025

[DAG] Move foldMaskedMerge before visitAND. NFC.

a6bfc3a

Reduces diff in llvm#144342

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[DAG] add (~a | x) & (a | y) -> (a & (x ^ y)) ^y for foldMaskedMerge #144342

[DAG] add (~a | x) & (a | y) -> (a & (x ^ y)) ^y for foldMaskedMerge #144342

Uh oh!

woruyu commented Jun 16, 2025

Uh oh!

llvmbot commented Jun 16, 2025 •

edited

Loading

Summary

Uh oh!

woruyu commented Jun 16, 2025

Uh oh!

woruyu commented Jun 16, 2025

Uh oh!

RKSimon left a comment

Uh oh!

woruyu commented Jun 17, 2025

Uh oh!

RKSimon commented Jun 17, 2025

Uh oh!

Uh oh!

[DAG] add (~a | x) & (a | y) -> (a & (x ^ y)) ^y for foldMaskedMerge #144342

Are you sure you want to change the base?

[DAG] add (~a | x) & (a | y) -> (a & (x ^ y)) ^y for foldMaskedMerge #144342

Uh oh!

Conversation

woruyu commented Jun 16, 2025

Summary

Uh oh!

llvmbot commented Jun 16, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Summary

Uh oh!

woruyu commented Jun 16, 2025

Uh oh!

woruyu commented Jun 16, 2025

Uh oh!

RKSimon left a comment

Choose a reason for hiding this comment

Uh oh!

woruyu commented Jun 17, 2025

Uh oh!

RKSimon commented Jun 17, 2025

Uh oh!

Uh oh!

llvmbot commented Jun 16, 2025 •

edited

Loading