-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[InstCombine] Combine and->cmp->sel->or-disjoint into and->mul #135274
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-llvm-transforms Author: Jeffrey Byrnes (jrbyrnes) ChangesWhile and->cmp->set combines into and->mul may result in worse code on some targets, this combine should be uniformly beneficial. https://alive2.llvm.org/ce/z/3Dnw2u Full diff: https://github.com/llvm/llvm-project/pull/135274.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 6cc241781d112..6dc4b97686f97 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3643,6 +3643,48 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
foldAddLikeCommutative(I.getOperand(1), I.getOperand(0),
/*NSW=*/true, /*NUW=*/true))
return R;
+
+ Value *Cond0 = nullptr, *Cond1 = nullptr;
+ ConstantInt *Op0True = nullptr, *Op0False = nullptr;
+ ConstantInt *Op1True = nullptr, *Op1False = nullptr;
+
+ // (!(A & N) ? 0 : N * C) + (!(A & M) ? 0 : M * C) -> A & (N + M) * C
+ if (match(I.getOperand(0), m_Select(m_Value(Cond0), m_ConstantInt(Op0True),
+ m_ConstantInt(Op0False))) &&
+ match(I.getOperand(1), m_Select(m_Value(Cond1), m_ConstantInt(Op1True),
+ m_ConstantInt(Op1False))) &&
+ Op0True->isZero() && Op1True->isZero() &&
+ Op0False->getValue().tryZExtValue() &&
+ Op1False->getValue().tryZExtValue()) {
+ CmpPredicate Pred0, Pred1;
+ Value *CmpOp0 = nullptr, *CmpOp1 = nullptr;
+ ConstantInt *Op0Cond = nullptr, *Op1Cond = nullptr;
+ if (match(Cond0,
+ m_c_ICmp(Pred0, m_Value(CmpOp0), m_ConstantInt(Op0Cond))) &&
+ match(Cond1,
+ m_c_ICmp(Pred1, m_Value(CmpOp1), m_ConstantInt(Op1Cond))) &&
+ Pred0 == ICmpInst::ICMP_EQ && Pred1 == ICmpInst::ICMP_EQ &&
+ Op0Cond->isZero() && Op1Cond->isZero()) {
+ Value *AndSrc0 = nullptr, *AndSrc1 = nullptr;
+ ConstantInt *BitSel0 = nullptr, *BitSel1 = nullptr;
+ if (match(CmpOp0, m_And(m_Value(AndSrc0), m_ConstantInt(BitSel0))) &&
+ match(CmpOp1, m_And(m_Value(AndSrc1), m_ConstantInt(BitSel1))) &&
+ AndSrc0 == AndSrc1 && BitSel0->getValue().tryZExtValue() &&
+ BitSel1->getValue().tryZExtValue()) {
+ unsigned Out0 = Op0False->getValue().getZExtValue();
+ unsigned Out1 = Op1False->getValue().getZExtValue();
+ unsigned Sel0 = BitSel0->getValue().getZExtValue();
+ unsigned Sel1 = BitSel1->getValue().getZExtValue();
+ if (!(Out0 % Sel0) && !(Out1 % Sel1) &&
+ ((Out0 / Sel0) == (Out1 / Sel1))) {
+ auto NewAnd = Builder.CreateAnd(
+ AndSrc0, ConstantInt::get(AndSrc0->getType(), Sel0 + Sel1));
+ return BinaryOperator::CreateMul(
+ NewAnd, ConstantInt::get(NewAnd->getType(), (Out1 / Sel1)));
+ }
+ }
+ }
+ }
}
Value *X, *Y;
diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll
index 95f89e4ce11cd..f2b21ca966592 100644
--- a/llvm/test/Transforms/InstCombine/or.ll
+++ b/llvm/test/Transforms/InstCombine/or.ll
@@ -1281,10 +1281,10 @@ define <16 x i1> @test51(<16 x i1> %arg, <16 x i1> %arg1) {
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i1> [[ARG:%.*]], <16 x i1> [[ARG1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 5, i32 6, i32 23, i32 24, i32 9, i32 10, i32 27, i32 28, i32 29, i32 30, i32 31>
; CHECK-NEXT: ret <16 x i1> [[TMP3]]
;
- %tmp = and <16 x i1> %arg, <i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>
- %tmp2 = and <16 x i1> %arg1, <i1 false, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true>
- %tmp3 = or <16 x i1> %tmp, %tmp2
- ret <16 x i1> %tmp3
+ %temp = and <16 x i1> %arg, <i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>
+ %temp2 = and <16 x i1> %arg1, <i1 false, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true>
+ %temp3 = or <16 x i1> %temp, %temp2
+ ret <16 x i1> %temp3
}
; This would infinite loop because it reaches a transform
@@ -2035,3 +2035,109 @@ define i32 @or_xor_and_commuted3(i32 %x, i32 %y, i32 %z) {
%or1 = or i32 %xor, %yy
ret i32 %or1
}
+
+define i32 @add_select_cmp_and1(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and1(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT: [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %bitop0 = and i32 %in, 1
+ %cmp0 = icmp eq i32 %bitop0, 0
+ %bitop1 = and i32 %in, 2
+ %cmp1 = icmp eq i32 %bitop1, 0
+ %sel0 = select i1 %cmp0, i32 0, i32 72
+ %sel1 = select i1 %cmp1, i32 0, i32 144
+ %out = or disjoint i32 %sel0, %sel1
+ ret i32 %out
+}
+
+define i32 @add_select_cmp_and2(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and2(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 5
+; CHECK-NEXT: [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %bitop0 = and i32 %in, 1
+ %cmp0 = icmp eq i32 %bitop0, 0
+ %bitop1 = and i32 %in, 4
+ %cmp1 = icmp eq i32 %bitop1, 0
+ %sel0 = select i1 %cmp0, i32 0, i32 72
+ %sel1 = select i1 %cmp1, i32 0, i32 288
+ %out = or disjoint i32 %sel0, %sel1
+ ret i32 %out
+}
+
+define i32 @add_select_cmp_and3(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and3(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT: [[TEMP:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: [[BITOP2:%.*]] = and i32 [[IN]], 4
+; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[BITOP2]], 0
+; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i32 0, i32 288
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TEMP]], [[SEL2]]
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %bitop0 = and i32 %in, 1
+ %cmp0 = icmp eq i32 %bitop0, 0
+ %bitop1 = and i32 %in, 2
+ %cmp1 = icmp eq i32 %bitop1, 0
+ %sel0 = select i1 %cmp0, i32 0, i32 72
+ %sel1 = select i1 %cmp1, i32 0, i32 144
+ %temp = or disjoint i32 %sel0, %sel1
+ %bitop2 = and i32 %in, 4
+ %cmp2 = icmp eq i32 %bitop2, 0
+ %sel2 = select i1 %cmp2, i32 0, i32 288
+ %out = or disjoint i32 %temp, %sel2
+ ret i32 %out
+}
+
+define i32 @add_select_cmp_and4(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and4(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT: [[TEMP:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[IN]], 12
+; CHECK-NEXT: [[TEMP2:%.*]] = mul nuw nsw i32 [[TMP2]], 72
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TEMP]], [[TEMP2]]
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %bitop0 = and i32 %in, 1
+ %cmp0 = icmp eq i32 %bitop0, 0
+ %bitop1 = and i32 %in, 2
+ %cmp1 = icmp eq i32 %bitop1, 0
+ %sel0 = select i1 %cmp0, i32 0, i32 72
+ %sel1 = select i1 %cmp1, i32 0, i32 144
+ %temp = or disjoint i32 %sel0, %sel1
+ %bitop2 = and i32 %in, 4
+ %cmp2 = icmp eq i32 %bitop2, 0
+ %bitop3 = and i32 %in, 8
+ %cmp3 = icmp eq i32 %bitop3, 0
+ %sel2 = select i1 %cmp2, i32 0, i32 288
+ %sel3 = select i1 %cmp3, i32 0, i32 576
+ %temp2 = or disjoint i32 %sel2, %sel3
+ %out = or disjoint i32 %temp, %temp2
+ ret i32 %out
+}
+
+
+
+define i32 @add_select_cmp_and_mismatch(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and_mismatch(
+; CHECK-NEXT: [[BITOP0:%.*]] = and i32 [[IN:%.*]], 1
+; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i32 [[BITOP0]], 0
+; CHECK-NEXT: [[BITOP1:%.*]] = and i32 [[IN]], 3
+; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[BITOP1]], 0
+; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 72
+; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 288
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[SEL0]], [[SEL1]]
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %bitop0 = and i32 %in, 1
+ %cmp0 = icmp eq i32 %bitop0, 0
+ %bitop1 = and i32 %in, 3
+ %cmp1 = icmp eq i32 %bitop1, 0
+ %sel0 = select i1 %cmp0, i32 0, i32 72
+ %sel1 = select i1 %cmp1, i32 0, i32 288
+ %out = or disjoint i32 %sel0, %sel1
+ ret i32 %out
+}
|
Links for the other changed tests: https://alive2.llvm.org/ce/z/cDSsrr |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do not use fixed-length integers.
Can you please provide a generalized alive2 proof?
Generalized proof: https://alive2.llvm.org/ce/z/MibAcN |
✅ With the latest revision this PR passed the C/C++ code formatter. |
Change-Id: I389bba8bec937236e16eecc87688440878469472
force-push to rebase for #136367 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I thought the conclusion in a previous PR was to avoid introducing mul?
I would say its more of an open concern rather than a conclusion. My thought was that
was the canonical form of
given certain conditions. In #133139 (comment) we discussed the case where %BitMask is 1 (should be lowered into trunc). And for the general case there was concern over the That said, this PR most certainly produces canonical form as its combining 2 |
Co-authored-by: Matt Arsenault <arsenm2@gmail.com>
Ping -- any other concerns here? This is blocking a stack of reviews we would like to land asap (next PR is #136013 ) |
Co-authored-by: Yingwei Zheng <dtcxzyw@qq.com>
Change-Id: Id573c17c5abe064e75295ee8cf6d79fc0e71afad
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thank you!
Please wait for additional approval from other reviewers :)
Ping wondering if I can land this? |
Ping -- seems like there are no bites on the secondary approval. I will wait a few more days for any objections to landing this. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Change-Id: I6642854a7a191ec364d4430ae70eeddf31f52685
CI Check failure does not look related to this. Thanks all for review. |
Looks like this caused an assertion failure https://lab.llvm.org/buildbot/#/builders/25/builds/8959 I've reverted for now 46828d2 Will open a reapply once it's resolved. |
While and->cmp->sel combines into and->mul may result in worse code on some targets, this combine should be uniformly beneficial. Proof: https://alive2.llvm.org/ce/z/MibAcN --------- Co-authored-by: Matt Arsenault <arsenm2@gmail.com> Co-authored-by: Yingwei Zheng <dtcxzyw@qq.com>
…135274) While and->cmp->sel combines into and->mul may result in worse code on some targets, this combine should be uniformly beneficial. Proof: https://alive2.llvm.org/ce/z/MibAcN --------- Co-authored-by: Matt Arsenault <arsenm2@gmail.com> Co-authored-by: Yingwei Zheng <dtcxzyw@qq.com>
llvm#135274)" This reverts commit c49c7dd.
#142035) Reland of #135274 The commit to land the original PR was blamelisted for two types of failures: https://lab.llvm.org/buildbot/#/builders/24/builds/8932 https://lab.llvm.org/buildbot/#/builders/198/builds/4844 The second of which seems to be unrelated to the PR and seemingly fixed by 6ee2453 I've addressed the fix to the other issue with the latest commit in this PR b24f473 . This is the only difference between this PR and the previously accepted PR. --------- Co-authored-by: Matt Arsenault <arsenm2@gmail.com> Co-authored-by: Yingwei Zheng <dtcxzyw@qq.com>
…nto and->mul (#142035) Reland of llvm/llvm-project#135274 The commit to land the original PR was blamelisted for two types of failures: https://lab.llvm.org/buildbot/#/builders/24/builds/8932 https://lab.llvm.org/buildbot/#/builders/198/builds/4844 The second of which seems to be unrelated to the PR and seemingly fixed by llvm/llvm-project@6ee2453 I've addressed the fix to the other issue with the latest commit in this PR b24f4731aaeb753c9269dbd9926cc83c7456f98e . This is the only difference between this PR and the previously accepted PR. --------- Co-authored-by: Matt Arsenault <arsenm2@gmail.com> Co-authored-by: Yingwei Zheng <dtcxzyw@qq.com>
…135274) While and->cmp->sel combines into and->mul may result in worse code on some targets, this combine should be uniformly beneficial. Proof: https://alive2.llvm.org/ce/z/MibAcN --------- Co-authored-by: Matt Arsenault <arsenm2@gmail.com> Co-authored-by: Yingwei Zheng <dtcxzyw@qq.com>
llvm#135274)" This reverts commit c49c7dd.
…136013) The canonical pattern for bitmasked mul is currently ``` %val = and %x, %bitMask // where %bitMask is some constant %cmp = icmp eq %val, 0 %sel = select %cmp, 0, %C // where %C is some constant = C' * %bitMask ``` In certain cases, where we are combining multiple of these bitmasked muls with common factors, we are able to optimize into and->mul (see #135274 ) This optimization lends itself to further optimizations. This PR addresses one of such optimizations. In cases where we have `or-disjoint ( mul(and (X, C1), D) , mul (and (X, C2), D))` we can combine into `mul( and (X, (C1 + C2)), D) ` provided C1 and C2 are disjoint. Generalized proof: https://alive2.llvm.org/ce/z/MQYMui
…o and->mul (#136013) The canonical pattern for bitmasked mul is currently ``` %val = and %x, %bitMask // where %bitMask is some constant %cmp = icmp eq %val, 0 %sel = select %cmp, 0, %C // where %C is some constant = C' * %bitMask ``` In certain cases, where we are combining multiple of these bitmasked muls with common factors, we are able to optimize into and->mul (see llvm/llvm-project#135274 ) This optimization lends itself to further optimizations. This PR addresses one of such optimizations. In cases where we have `or-disjoint ( mul(and (X, C1), D) , mul (and (X, C2), D))` we can combine into `mul( and (X, (C1 + C2)), D) ` provided C1 and C2 are disjoint. Generalized proof: https://alive2.llvm.org/ce/z/MQYMui
…lvm#136013) The canonical pattern for bitmasked mul is currently ``` %val = and %x, %bitMask // where %bitMask is some constant %cmp = icmp eq %val, 0 %sel = select %cmp, 0, %C // where %C is some constant = C' * %bitMask ``` In certain cases, where we are combining multiple of these bitmasked muls with common factors, we are able to optimize into and->mul (see llvm#135274 ) This optimization lends itself to further optimizations. This PR addresses one of such optimizations. In cases where we have `or-disjoint ( mul(and (X, C1), D) , mul (and (X, C2), D))` we can combine into `mul( and (X, (C1 + C2)), D) ` provided C1 and C2 are disjoint. Generalized proof: https://alive2.llvm.org/ce/z/MQYMui
…lvm#136013) The canonical pattern for bitmasked mul is currently ``` %val = and %x, %bitMask // where %bitMask is some constant %cmp = icmp eq %val, 0 %sel = select %cmp, 0, %C // where %C is some constant = C' * %bitMask ``` In certain cases, where we are combining multiple of these bitmasked muls with common factors, we are able to optimize into and->mul (see llvm#135274 ) This optimization lends itself to further optimizations. This PR addresses one of such optimizations. In cases where we have `or-disjoint ( mul(and (X, C1), D) , mul (and (X, C2), D))` we can combine into `mul( and (X, (C1 + C2)), D) ` provided C1 and C2 are disjoint. Generalized proof: https://alive2.llvm.org/ce/z/MQYMui
llvm#142035) Reland of llvm#135274 The commit to land the original PR was blamelisted for two types of failures: https://lab.llvm.org/buildbot/#/builders/24/builds/8932 https://lab.llvm.org/buildbot/#/builders/198/builds/4844 The second of which seems to be unrelated to the PR and seemingly fixed by llvm@6ee2453 I've addressed the fix to the other issue with the latest commit in this PR b24f473 . This is the only difference between this PR and the previously accepted PR. --------- Co-authored-by: Matt Arsenault <arsenm2@gmail.com> Co-authored-by: Yingwei Zheng <dtcxzyw@qq.com>
…lvm#136013) The canonical pattern for bitmasked mul is currently ``` %val = and %x, %bitMask // where %bitMask is some constant %cmp = icmp eq %val, 0 %sel = select %cmp, 0, %C // where %C is some constant = C' * %bitMask ``` In certain cases, where we are combining multiple of these bitmasked muls with common factors, we are able to optimize into and->mul (see llvm#135274 ) This optimization lends itself to further optimizations. This PR addresses one of such optimizations. In cases where we have `or-disjoint ( mul(and (X, C1), D) , mul (and (X, C2), D))` we can combine into `mul( and (X, (C1 + C2)), D) ` provided C1 and C2 are disjoint. Generalized proof: https://alive2.llvm.org/ce/z/MQYMui
While and->cmp->set combines into and->mul may result in worse code on some targets, this combine should be uniformly beneficial.
Proof: https://alive2.llvm.org/ce/z/MibAcN