-
Notifications
You must be signed in to change notification settings - Fork 14k
[ValueTracking][InstCombine] Generalize ignoreSignBitOfZero/NaN to handle more cases #141015
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-llvm-transforms Author: Yingwei Zheng (dtcxzyw) ChangesThis patch was originally part of #139861. It generalizes BTW, I find it mitigates performance regressions caused by #141010 (IR diff https://github.com/dtcxzyw/llvm-opt-benchmark/pull/2365/files). We don't need to propagate FMF from fcmp into select, since we can infer demanded properties from the user of select. Full diff: https://github.com/llvm/llvm-project/pull/141015.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 2ef233bc25d72..13cbf7b215557 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -2783,15 +2783,41 @@ static bool ignoreSignBitOfZero(Instruction &I) {
if (!I.hasOneUse())
return false;
Instruction *User = I.user_back();
+ if (auto *FPOp = dyn_cast<FPMathOperator>(User)) {
+ if (FPOp->hasNoSignedZeros())
+ return true;
+ }
- // fcmp treats both positive and negative zero as equal.
- if (User->getOpcode() == Instruction::FCmp)
+ switch (User->getOpcode()) {
+ case Instruction::FPToSI:
+ case Instruction::FPToUI:
return true;
-
- if (auto *FPOp = dyn_cast<FPMathOperator>(User))
- return FPOp->hasNoSignedZeros();
-
- return false;
+ case Instruction::FCmp:
+ // fcmp treats both positive and negative zero as equal.
+ return true;
+ case Instruction::Call:
+ if (auto *II = dyn_cast<IntrinsicInst>(User)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::fabs:
+ return true;
+ case Intrinsic::copysign:
+ return II->getArgOperand(0) == &I;
+ case Intrinsic::is_fpclass:
+ case Intrinsic::vp_is_fpclass: {
+ auto Test =
+ static_cast<FPClassTest>(
+ cast<ConstantInt>(II->getArgOperand(1))->getZExtValue()) &
+ FPClassTest::fcZero;
+ return Test == FPClassTest::fcZero || Test == FPClassTest::fcNone;
+ }
+ default:
+ return false;
+ }
+ }
+ return false;
+ default:
+ return false;
+ }
}
/// Return true if the sign bit of result can be ignored when the result is NaN.
@@ -2803,15 +2829,67 @@ static bool ignoreSignBitOfNaN(Instruction &I) {
if (!I.hasOneUse())
return false;
Instruction *User = I.user_back();
+ if (auto *FPOp = dyn_cast<FPMathOperator>(User)) {
+ if (FPOp->hasNoNaNs())
+ return true;
+ }
- // fcmp ignores the sign bit of NaN.
- if (User->getOpcode() == Instruction::FCmp)
+ switch (User->getOpcode()) {
+ case Instruction::FPToSI:
+ case Instruction::FPToUI:
return true;
+ // Proper FP math operations ignore the sign bit of NaN.
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::FCmp:
+ return true;
+ // Bitwise FP operations should preserve the sign bit of NaN.
+ case Instruction::FNeg:
+ case Instruction::Select:
+ case Instruction::PHI:
+ return false;
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ if (auto *II = dyn_cast<IntrinsicInst>(User)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::fabs:
+ return true;
+ case Intrinsic::copysign:
+ return II->getArgOperand(0) == &I;
+ // Other proper FP math intrinsics ignore the sign bit of NaN.
+ case Intrinsic::maxnum:
+ case Intrinsic::minnum:
+ case Intrinsic::maximum:
+ case Intrinsic::minimum:
+ case Intrinsic::maximumnum:
+ case Intrinsic::minimumnum:
+ case Intrinsic::canonicalize:
+ case Intrinsic::fma:
+ case Intrinsic::fmuladd:
+ case Intrinsic::sqrt:
+ case Intrinsic::pow:
+ case Intrinsic::powi:
+ case Intrinsic::fptoui_sat:
+ case Intrinsic::fptosi_sat:
+ case Intrinsic::is_fpclass:
+ return true;
+ default:
+ return false;
+ }
+ }
- if (auto *FPOp = dyn_cast<FPMathOperator>(User))
- return FPOp->hasNoNaNs();
-
- return false;
+ FPClassTest NoFPClass = cast<CallBase>(User)->getParamNoFPClass(
+ I.uses().begin()->getOperandNo());
+ return NoFPClass & FPClassTest::fcNan;
+ }
+ default:
+ return false;
+ }
}
// Canonicalize select with fcmp to fabs(). -0.0 makes this tricky. We need
diff --git a/llvm/test/Transforms/InstCombine/fabs.ll b/llvm/test/Transforms/InstCombine/fabs.ll
index ab4376bf78a67..b889f6c2c028c 100644
--- a/llvm/test/Transforms/InstCombine/fabs.ll
+++ b/llvm/test/Transforms/InstCombine/fabs.ll
@@ -1329,12 +1329,9 @@ define float @test_fabs_fsub_used_by_fpop_nnan(float %x, float %y) {
ret float %add
}
-; TODO: fadd ignores the sign bit of NaN.
define float @test_fabs_used_by_fpop_nsz(float %x, float %y) {
; CHECK-LABEL: @test_fabs_used_by_fpop_nsz(
-; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT: [[NEG:%.*]] = fneg float [[X]]
-; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[X]], float [[NEG]]
+; CHECK-NEXT: [[SEL:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
; CHECK-NEXT: [[ADD:%.*]] = fadd nsz float [[SEL]], [[Y:%.*]]
; CHECK-NEXT: ret float [[ADD]]
;
@@ -1345,13 +1342,9 @@ define float @test_fabs_used_by_fpop_nsz(float %x, float %y) {
ret float %add
}
-; TODO: copysign ignores the sign bit of NaN magnitude.
define float @test_fabs_used_by_fcopysign_mag(float %x, float %y) {
; CHECK-LABEL: @test_fabs_used_by_fcopysign_mag(
-; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[X1:%.*]], 0.000000e+00
-; CHECK-NEXT: [[NEG:%.*]] = fneg float [[X1]]
-; CHECK-NEXT: [[X:%.*]] = select i1 [[CMP]], float [[X1]], float [[NEG]]
-; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[X]], float [[Y:%.*]])
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[X:%.*]], float [[Y:%.*]])
; CHECK-NEXT: ret float [[COPYSIGN]]
;
%cmp = fcmp oge float %x, 0.000000e+00
@@ -1361,6 +1354,94 @@ define float @test_fabs_used_by_fcopysign_mag(float %x, float %y) {
ret float %copysign
}
+define float @test_fabs_nsz_used_by_canonicalize(float %x) {
+; CHECK-LABEL: @test_fabs_nsz_used_by_canonicalize(
+; CHECK-NEXT: [[SEL:%.*]] = call nsz float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT: [[CANON:%.*]] = call float @llvm.canonicalize.f32(float [[SEL]])
+; CHECK-NEXT: ret float [[CANON]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select nsz i1 %cmp, float %x, float %neg
+ %canon = call float @llvm.canonicalize.f32(float %sel)
+ ret float %canon
+}
+
+define void @test_fabs_used_by_nofpclass_nan(float %x) {
+; CHECK-LABEL: @test_fabs_used_by_nofpclass_nan(
+; CHECK-NEXT: [[SEL:%.*]] = call nsz float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT: call void @use(float nofpclass(nan) [[SEL]])
+; CHECK-NEXT: ret void
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select nsz i1 %cmp, float %x, float %neg
+ call void @use(float nofpclass(nan) %sel)
+ ret void
+}
+
+define i32 @test_fabs_used_fptosi(float %x) {
+; CHECK-LABEL: @test_fabs_used_fptosi(
+; CHECK-NEXT: [[SEL:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT: [[FPTOSI:%.*]] = fptosi float [[SEL]] to i32
+; CHECK-NEXT: ret i32 [[FPTOSI]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select i1 %cmp, float %x, float %neg
+ %fptosi = fptosi float %sel to i32
+ ret i32 %fptosi
+}
+
+define i32 @test_fabs_used_fptoui(float %x) {
+; CHECK-LABEL: @test_fabs_used_fptoui(
+; CHECK-NEXT: [[SEL:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT: [[FPTOSI:%.*]] = fptoui float [[SEL]] to i32
+; CHECK-NEXT: ret i32 [[FPTOSI]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select i1 %cmp, float %x, float %neg
+ %fptosi = fptoui float %sel to i32
+ ret i32 %fptosi
+}
+
+define float @test_fabs_nsz_used_by_maxnum(float %x, float %y) {
+; CHECK-LABEL: @test_fabs_nsz_used_by_maxnum(
+; CHECK-NEXT: [[SEL:%.*]] = call nsz float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.maxnum.f32(float [[Y:%.*]], float [[SEL]])
+; CHECK-NEXT: ret float [[MAX]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select nsz i1 %cmp, float %x, float %neg
+ %max = call float @llvm.maxnum.f32(float %y, float %sel)
+ ret float %max
+}
+
+define i1 @test_fabs_used_is_fpclass_pnorm_or_nan(float %x) {
+; CHECK-LABEL: @test_fabs_used_is_fpclass_pnorm_or_nan(
+; CHECK-NEXT: [[IS_FPCLASS:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 267)
+; CHECK-NEXT: ret i1 [[IS_FPCLASS]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select i1 %cmp, float %x, float %neg
+ %is_fpclass = call i1 @llvm.is.fpclass.f32(float %sel, i32 259)
+ ret i1 %is_fpclass
+}
+
+define i1 @test_fabs_used_is_fpclass_zero_or_pinf(float %x) {
+; CHECK-LABEL: @test_fabs_used_is_fpclass_zero_or_pinf(
+; CHECK-NEXT: [[IS_FPCLASS:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 612)
+; CHECK-NEXT: ret i1 [[IS_FPCLASS]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select i1 %cmp, float %x, float %neg
+ %is_fpclass = call i1 @llvm.is.fpclass.f32(float %sel, i32 608)
+ ret i1 %is_fpclass
+}
; Negative tests
@@ -1455,3 +1536,18 @@ define float @test_fabs_used_by_select(float %x, i1 %cond) {
%sel2 = select i1 %cond, float %sel, float 0.000000e+00
ret float %sel2
}
+
+define i1 @test_fabs_used_is_fpclass_pzero(float %x) {
+; CHECK-LABEL: @test_fabs_used_is_fpclass_pzero(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT: [[NEG:%.*]] = fneg float [[X]]
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[X]], float [[NEG]]
+; CHECK-NEXT: [[IS_FPCLASS:%.*]] = call i1 @llvm.is.fpclass.f32(float [[SEL]], i32 64)
+; CHECK-NEXT: ret i1 [[IS_FPCLASS]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select i1 %cmp, float %x, float %neg
+ %is_fpclass = call i1 @llvm.is.fpclass.f32(float %sel, i32 64)
+ ret i1 %is_fpclass
+}
|
Compile-time impact: https://llvm-compile-time-tracker.com/compare.php?from=5a3776af521b8ddc14a19d1954af64e2960d2397&to=bf96e741b99107ce14ef6e32751f73d98f8eb821&stat=instructions%3Au Note that stage2-O3 compile-time overhead is high (+0.07%). It is mainly caused by moving these two helpers into ValueTracking. |
Any more comments? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Compile-time impact looks ok.
case Instruction::FPToSI: | ||
case Instruction::FPToUI: | ||
return true; | ||
// Proper FP math operations ignore the sign bit of NaN. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For reference: https://llvm.org/docs/LangRef.html#behavior-of-floating-point-nan-values
the result has a non-deterministic sign
Previously, 3d6b539 propagates FMF from fcmp to avoid performance regressions. With the help of #139861, #141015, and #141914, we can still convert SPF into fabs/minnum/maxnum intrinsics even if some flags are missing. This patch propagates FMF from select to address the long-standing issue. Closes #140994.
…141010) Previously, llvm/llvm-project@3d6b539 propagates FMF from fcmp to avoid performance regressions. With the help of llvm/llvm-project#139861, llvm/llvm-project#141015, and llvm/llvm-project#141914, we can still convert SPF into fabs/minnum/maxnum intrinsics even if some flags are missing. This patch propagates FMF from select to address the long-standing issue. Closes llvm/llvm-project#140994.
…ndle more cases (llvm#141015) This patch was originally part of llvm#139861. It generalizes `ignoreSignBitOfZero/NaN` to handle more instructions/intrinsics. BTW, I find it mitigates performance regressions caused by llvm#141010 (IR diff https://github.com/dtcxzyw/llvm-opt-benchmark/pull/2365/files). We don't need to propagate FMF from fcmp into select, since we can infer demanded properties from the user of select.
Previously, llvm@3d6b539 propagates FMF from fcmp to avoid performance regressions. With the help of llvm#139861, llvm#141015, and llvm#141914, we can still convert SPF into fabs/minnum/maxnum intrinsics even if some flags are missing. This patch propagates FMF from select to address the long-standing issue. Closes llvm#140994.
This patch was originally part of #139861. It generalizes
ignoreSignBitOfZero/NaN
to handle more instructions/intrinsics.BTW, I find it mitigates performance regressions caused by #141010 (IR diff https://github.com/dtcxzyw/llvm-opt-benchmark/pull/2365/files). We don't need to propagate FMF from fcmp into select, since we can infer demanded properties from the user of select.