-
Notifications
You must be signed in to change notification settings - Fork 14k
[AArch64] Check for negative numbers when adjusting icmps #141151
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: AZero13 (AZero13) ChangesThis relies on the fact that we can use tst and ands for comparions as by emitConditional. I know there has to be a better way to do this, but this is the first draft because this is working and some feedback would be nice. Relies on #140999. Patch is 43.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141151.diff 14 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index b7f0bcfd015bc..15b9af83a293c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3647,6 +3647,16 @@ static bool isLegalArithImmed(uint64_t C) {
return IsLegal;
}
+bool isLegalCmpImmed(int64_t Immed) {
+ if (Immed == std::numeric_limits<int64_t>::min()) {
+ LLVM_DEBUG(dbgs() << "Illegal add imm " << Immed
+ << ": avoid UB for INT64_MIN\n");
+ return false;
+ }
+ // Same encoding for add/sub, just flip the sign.
+ return isLegalArithImmed((uint64_t)std::abs(Immed));
+}
+
static bool cannotBeIntMin(SDValue CheckedVal, SelectionDAG &DAG) {
KnownBits KnownSrc = DAG.computeKnownBits(CheckedVal);
return !KnownSrc.getSignedMinValue().isMinSignedValue();
@@ -4072,57 +4082,84 @@ static unsigned getCmpOperandFoldingProfit(SDValue Op) {
return 0;
}
+// emitComparison() converts comparison with one or negative one to comparison
+// with 0.
+static bool shouldBeAdjustedToZero(SDValue LHS, int64_t C, ISD::CondCode CC) {
+ // Only works for not signed values.
+ if (isUnsignedIntSetCC(CC))
+ return false;
+ // Only works for ANDS and AND.
+ if (LHS.getOpcode() != ISD::AND && (LHS.getOpcode() == ISD::ANDS))
+ return false;
+ if (C == 1 && (CC == ISD::SETLT || CC == ISD::SETGE))
+ return true;
+ if (C == -1 && (CC == ISD::SETLE || CC == ISD::SETGT))
+ return true;
+
+ return false;
+}
+
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &AArch64cc, SelectionDAG &DAG,
const SDLoc &dl) {
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
EVT VT = RHS.getValueType();
- uint64_t C = RHSC->getZExtValue();
- if (!isLegalArithImmed(C)) {
+ int64_t C = RHSC->getSExtValue();
+ // This is a special case for ands with cmn 1 so that emitComparison can
+ if (shouldBeAdjustedToZero(LHS, RHSC, CC)) {
+ if (C == 1) {
+ CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
+ } else {
+ // C is -1
+ CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
+ }
+ RHS = DAG.getConstant(0, dl, VT);
+ } else if (!isLegalCmpImmed(C)) {
// Constant does not fit, try adjusting it by one?
switch (CC) {
default:
break;
case ISD::SETLT:
case ISD::SETGE:
- if ((VT == MVT::i32 && C != 0x80000000 &&
- isLegalArithImmed((uint32_t)(C - 1))) ||
- (VT == MVT::i64 && C != 0x80000000ULL &&
- isLegalArithImmed(C - 1ULL))) {
+ if ((VT == MVT::i32 && C != INT32_MIN && isLegalCmpImmed(C - 1)) ||
+ (VT == MVT::i64 && C != INT64_MIN && isLegalCmpImmed(C - 1))) {
CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
- C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
+ C = C - 1;
+ if (VT == MVT::i32)
+ C &= 0xFFFFFFFF;
RHS = DAG.getConstant(C, dl, VT);
}
break;
case ISD::SETULT:
case ISD::SETUGE:
- if ((VT == MVT::i32 && C != 0 &&
- isLegalArithImmed((uint32_t)(C - 1))) ||
- (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
+ if ((VT == MVT::i32 && C != 0 && isLegalCmpImmed(C - 1)) ||
+ (VT == MVT::i64 && C != 0 && isLegalCmpImmed(C - 1))) {
CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
- C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
+ C = C - 1;
+ if (VT == MVT::i32)
+ C &= 0xFFFFFFFF;
RHS = DAG.getConstant(C, dl, VT);
}
break;
case ISD::SETLE:
case ISD::SETGT:
- if ((VT == MVT::i32 && C != INT32_MAX &&
- isLegalArithImmed((uint32_t)(C + 1))) ||
- (VT == MVT::i64 && C != INT64_MAX &&
- isLegalArithImmed(C + 1ULL))) {
+ if ((VT == MVT::i32 && C != INT32_MAX && isLegalCmpImmed(C + 1)) ||
+ (VT == MVT::i64 && C != INT64_MAX && isLegalCmpImmed(C + 1))) {
CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
- C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
+ C = C + 1;
+ if (VT == MVT::i32)
+ C &= 0xFFFFFFFF;
RHS = DAG.getConstant(C, dl, VT);
}
break;
case ISD::SETULE:
case ISD::SETUGT:
- if ((VT == MVT::i32 && C != UINT32_MAX &&
- isLegalArithImmed((uint32_t)(C + 1))) ||
- (VT == MVT::i64 && C != UINT64_MAX &&
- isLegalArithImmed(C + 1ULL))) {
+ if ((VT == MVT::i32 && C != -1 && isLegalCmpImmed(C + 1)) ||
+ (VT == MVT::i64 && C != -1 && isLegalCmpImmed(C + 1))) {
CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
- C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
+ C = C + 1;
+ if (VT == MVT::i32)
+ C &= 0xFFFFFFFF;
RHS = DAG.getConstant(C, dl, VT);
}
break;
@@ -4141,7 +4178,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
// can be turned into:
// cmp w12, w11, lsl #1
if (!isa<ConstantSDNode>(RHS) ||
- !isLegalArithImmed(RHS->getAsAPIntVal().abs().getZExtValue())) {
+ !isLegalCmpImmed(RHS->getAsAPIntVal().getSExtValue())) {
bool LHSIsCMN = isCMN(LHS, CC, DAG);
bool RHSIsCMN = isCMN(RHS, CC, DAG);
SDValue TheLHS = LHSIsCMN ? LHS.getOperand(1) : LHS;
@@ -17673,12 +17710,7 @@ bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
return false;
}
// Same encoding for add/sub, just flip the sign.
- Immed = std::abs(Immed);
- bool IsLegal = ((Immed >> 12) == 0 ||
- ((Immed & 0xfff) == 0 && Immed >> 24 == 0));
- LLVM_DEBUG(dbgs() << "Is " << Immed
- << " legal add imm: " << (IsLegal ? "yes" : "no") << "\n");
- return IsLegal;
+ return isLegalArithImmed((uint64_t)std::abs(Immed));
}
bool AArch64TargetLowering::isLegalAddScalableImmediate(int64_t Imm) const {
diff --git a/llvm/test/CodeGen/AArch64/arm64-csel.ll b/llvm/test/CodeGen/AArch64/arm64-csel.ll
index 1cf99d1b31a8b..69fad57a683ac 100644
--- a/llvm/test/CodeGen/AArch64/arm64-csel.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-csel.ll
@@ -100,9 +100,8 @@ define i32 @foo7(i32 %a, i32 %b) nounwind {
; CHECK-NEXT: subs w8, w0, w1
; CHECK-NEXT: cneg w9, w8, mi
; CHECK-NEXT: cmn w8, #1
-; CHECK-NEXT: csel w10, w9, w0, lt
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: csel w0, w10, w9, ge
+; CHECK-NEXT: csel w8, w9, w0, lt
+; CHECK-NEXT: csel w0, w8, w9, gt
; CHECK-NEXT: ret
entry:
%sub = sub nsw i32 %a, %b
diff --git a/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll b/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll
index 8fbed8bfdb3fd..1d60929f2b94c 100644
--- a/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll
+++ b/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll
@@ -14,8 +14,8 @@ define i32 @f_i8_sign_extend_inreg(i8 %in, i32 %a, i32 %b) nounwind {
; CHECK-LABEL: f_i8_sign_extend_inreg:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: csel w8, w1, w2, ge
+; CHECK-NEXT: cmn w8, #1
+; CHECK-NEXT: csel w8, w1, w2, gt
; CHECK-NEXT: add w0, w8, w0, uxtb
; CHECK-NEXT: ret
entry:
@@ -36,8 +36,8 @@ define i32 @f_i16_sign_extend_inreg(i16 %in, i32 %a, i32 %b) nounwind {
; CHECK-LABEL: f_i16_sign_extend_inreg:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: csel w8, w1, w2, ge
+; CHECK-NEXT: cmn w8, #1
+; CHECK-NEXT: csel w8, w1, w2, gt
; CHECK-NEXT: add w0, w8, w0, uxth
; CHECK-NEXT: ret
entry:
@@ -57,8 +57,8 @@ B:
define i64 @f_i32_sign_extend_inreg(i32 %in, i64 %a, i64 %b) nounwind {
; CHECK-LABEL: f_i32_sign_extend_inreg:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: csel x8, x1, x2, ge
+; CHECK-NEXT: cmn w0, #1
+; CHECK-NEXT: csel x8, x1, x2, gt
; CHECK-NEXT: add x0, x8, w0, uxtw
; CHECK-NEXT: ret
entry:
@@ -145,8 +145,8 @@ define i64 @f_i32_sign_extend_i64(i32 %in, i64 %a, i64 %b) nounwind {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x8, w0
-; CHECK-NEXT: cmp x8, #0
-; CHECK-NEXT: csel x8, x1, x2, ge
+; CHECK-NEXT: cmn x8, #1
+; CHECK-NEXT: csel x8, x1, x2, gt
; CHECK-NEXT: add x0, x8, w0, uxtw
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll b/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll
index e87d43161a895..c5fd9b63cce97 100644
--- a/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll
@@ -430,3 +430,175 @@ entry:
%cmp = icmp ne i32 %conv, %add
ret i1 %cmp
}
+
+define i1 @cmn_large_imm(i32 %a) {
+; CHECK-LABEL: cmn_large_imm:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #64765 // =0xfcfd
+; CHECK-NEXT: movk w8, #64764, lsl #16
+; CHECK-NEXT: cmp w0, w8
+; CHECK-NEXT: cset w0, gt
+; CHECK-NEXT: ret
+ %cmp = icmp sgt i32 %a, -50529027
+ ret i1 %cmp
+}
+
+define i1 @almost_immediate_neg_slt(i32 %x) {
+; CHECK-LABEL: almost_immediate_neg_slt:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmn w0, #4079, lsl #12 // =16707584
+; CHECK-NEXT: cset w0, le
+; CHECK-NEXT: ret
+ %cmp = icmp slt i32 %x, -16707583
+ ret i1 %cmp
+}
+
+define i1 @almost_immediate_neg_slt_64(i64 %x) {
+; CHECK-LABEL: almost_immediate_neg_slt_64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmn x0, #4079, lsl #12 // =16707584
+; CHECK-NEXT: cset w0, le
+; CHECK-NEXT: ret
+ %cmp = icmp slt i64 %x, -16707583
+ ret i1 %cmp
+}
+
+define i1 @almost_immediate_neg_sge(i32 %x) {
+; CHECK-LABEL: almost_immediate_neg_sge:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmn w0, #4079, lsl #12 // =16707584
+; CHECK-NEXT: cset w0, gt
+; CHECK-NEXT: ret
+ %cmp = icmp sge i32 %x, -16707583
+ ret i1 %cmp
+}
+
+define i1 @almost_immediate_neg_sge_64(i64 %x) {
+; CHECK-LABEL: almost_immediate_neg_sge_64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmn x0, #4079, lsl #12 // =16707584
+; CHECK-NEXT: cset w0, gt
+; CHECK-NEXT: ret
+ %cmp = icmp sge i64 %x, -16707583
+ ret i1 %cmp
+}
+
+define i1 @almost_immediate_neg_uge(i32 %x) {
+; CHECK-LABEL: almost_immediate_neg_uge:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmn w0, #4079, lsl #12 // =16707584
+; CHECK-NEXT: cset w0, hi
+; CHECK-NEXT: ret
+ %cmp = icmp uge i32 %x, -16707583
+ ret i1 %cmp
+}
+
+define i1 @almost_immediate_neg_uge_64(i64 %x) {
+; CHECK-LABEL: almost_immediate_neg_uge_64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmn x0, #4079, lsl #12 // =16707584
+; CHECK-NEXT: cset w0, hi
+; CHECK-NEXT: ret
+ %cmp = icmp uge i64 %x, -16707583
+ ret i1 %cmp
+}
+
+define i1 @almost_immediate_neg_ult(i32 %x) {
+; CHECK-LABEL: almost_immediate_neg_ult:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmn w0, #4079, lsl #12 // =16707584
+; CHECK-NEXT: cset w0, ls
+; CHECK-NEXT: ret
+ %cmp = icmp ult i32 %x, -16707583
+ ret i1 %cmp
+}
+
+define i1 @almost_immediate_neg_ult_64(i64 %x) {
+; CHECK-LABEL: almost_immediate_neg_ult_64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmn x0, #4079, lsl #12 // =16707584
+; CHECK-NEXT: cset w0, ls
+; CHECK-NEXT: ret
+ %cmp = icmp ult i64 %x, -16707583
+ ret i1 %cmp
+}
+
+define i1 @almost_immediate_neg_sle(i32 %x) {
+; CHECK-LABEL: almost_immediate_neg_sle:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmn w0, #4095, lsl #12 // =16773120
+; CHECK-NEXT: cset w0, lt
+; CHECK-NEXT: ret
+ %cmp = icmp sle i32 %x, -16773121
+ ret i1 %cmp
+}
+
+define i1 @almost_immediate_neg_sle_64(i64 %x) {
+; CHECK-LABEL: almost_immediate_neg_sle_64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmn x0, #4095, lsl #12 // =16773120
+; CHECK-NEXT: cset w0, lt
+; CHECK-NEXT: ret
+ %cmp = icmp sle i64 %x, -16773121
+ ret i1 %cmp
+}
+
+define i1 @almost_immediate_neg_sgt(i32 %x) {
+; CHECK-LABEL: almost_immediate_neg_sgt:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmn w0, #4095, lsl #12 // =16773120
+; CHECK-NEXT: cset w0, ge
+; CHECK-NEXT: ret
+ %cmp = icmp sgt i32 %x, -16773121
+ ret i1 %cmp
+}
+
+define i1 @almost_immediate_neg_sgt_64(i64 %x) {
+; CHECK-LABEL: almost_immediate_neg_sgt_64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmn x0, #4095, lsl #12 // =16773120
+; CHECK-NEXT: cset w0, ge
+; CHECK-NEXT: ret
+ %cmp = icmp sgt i64 %x, -16773121
+ ret i1 %cmp
+}
+
+define i1 @almost_immediate_neg_ule(i32 %x) {
+; CHECK-LABEL: almost_immediate_neg_ule:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmn w0, #4095, lsl #12 // =16773120
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: ret
+ %cmp = icmp ule i32 %x, -16773121
+ ret i1 %cmp
+}
+
+define i1 @almost_immediate_neg_ule_64(i64 %x) {
+; CHECK-LABEL: almost_immediate_neg_ule_64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmn x0, #4095, lsl #12 // =16773120
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: ret
+ %cmp = icmp ule i64 %x, -16773121
+ ret i1 %cmp
+}
+
+define i1 @almost_immediate_neg_ugt(i32 %x) {
+; CHECK-LABEL: almost_immediate_neg_ugt:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmn w0, #4095, lsl #12 // =16773120
+; CHECK-NEXT: cset w0, hs
+; CHECK-NEXT: ret
+ %cmp = icmp ugt i32 %x, -16773121
+ ret i1 %cmp
+}
+
+define i1 @almost_immediate_neg_ugt_64(i64 %x) {
+; CHECK-LABEL: almost_immediate_neg_ugt_64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmn x0, #4095, lsl #12 // =16773120
+; CHECK-NEXT: cset w0, hs
+; CHECK-NEXT: ret
+ %cmp = icmp ugt i64 %x, -16773121
+ ret i1 %cmp
+}
diff --git a/llvm/test/CodeGen/AArch64/csel-subs-swapped.ll b/llvm/test/CodeGen/AArch64/csel-subs-swapped.ll
index 3971da27cdddc..7d2c7854baf3d 100644
--- a/llvm/test/CodeGen/AArch64/csel-subs-swapped.ll
+++ b/llvm/test/CodeGen/AArch64/csel-subs-swapped.ll
@@ -44,10 +44,8 @@ define i32 @sge_i32(i32 %x) {
; CHECK-LABEL: sge_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-2097152 // =0xffe00000
-; CHECK-NEXT: mov w9, #-2097153 // =0xffdfffff
-; CHECK-NEXT: sub w8, w8, w0
-; CHECK-NEXT: cmp w0, w9
-; CHECK-NEXT: csel w0, w0, w8, gt
+; CHECK-NEXT: subs w8, w8, w0
+; CHECK-NEXT: csel w0, w0, w8, le
; CHECK-NEXT: ret
%cmp = icmp sge i32 %x, -2097152
%sub = sub i32 -2097152, %x
@@ -72,10 +70,8 @@ define i32 @sle_i32(i32 %x) {
; CHECK-LABEL: sle_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-2097152 // =0xffe00000
-; CHECK-NEXT: mov w9, #-2097151 // =0xffe00001
-; CHECK-NEXT: sub w8, w8, w0
-; CHECK-NEXT: cmp w0, w9
-; CHECK-NEXT: csel w0, w0, w8, lt
+; CHECK-NEXT: subs w8, w8, w0
+; CHECK-NEXT: csel w0, w0, w8, ge
; CHECK-NEXT: ret
%cmp = icmp sle i32 %x, -2097152
%sub = sub i32 -2097152, %x
@@ -128,10 +124,8 @@ define i32 @ule_i32(i32 %x) {
; CHECK-LABEL: ule_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-2097152 // =0xffe00000
-; CHECK-NEXT: mov w9, #-2097151 // =0xffe00001
-; CHECK-NEXT: sub w8, w8, w0
-; CHECK-NEXT: cmp w0, w9
-; CHECK-NEXT: csel w0, w0, w8, lo
+; CHECK-NEXT: subs w8, w8, w0
+; CHECK-NEXT: csel w0, w0, w8, hs
; CHECK-NEXT: ret
%cmp = icmp ule i32 %x, -2097152
%sub = sub i32 -2097152, %x
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
index 39e2db3a52d2c..b766da2a3a829 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
@@ -23,8 +23,9 @@ define i1 @test_signed_i1_f32(float %f) nounwind {
; CHECK-SD-LABEL: test_signed_i1_f32:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: fcvtzs w8, s0
-; CHECK-SD-NEXT: ands w8, w8, w8, asr #31
-; CHECK-SD-NEXT: csinv w8, w8, wzr, ge
+; CHECK-SD-NEXT: and w8, w8, w8, asr #31
+; CHECK-SD-NEXT: cmn w8, #1
+; CHECK-SD-NEXT: csinv w8, w8, wzr, gt
; CHECK-SD-NEXT: and w0, w8, #0x1
; CHECK-SD-NEXT: ret
;
@@ -268,8 +269,9 @@ define i1 @test_signed_i1_f64(double %f) nounwind {
; CHECK-SD-LABEL: test_signed_i1_f64:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: fcvtzs w8, d0
-; CHECK-SD-NEXT: ands w8, w8, w8, asr #31
-; CHECK-SD-NEXT: csinv w8, w8, wzr, ge
+; CHECK-SD-NEXT: and w8, w8, w8, asr #31
+; CHECK-SD-NEXT: cmn w8, #1
+; CHECK-SD-NEXT: csinv w8, w8, wzr, gt
; CHECK-SD-NEXT: and w0, w8, #0x1
; CHECK-SD-NEXT: ret
;
@@ -518,16 +520,18 @@ define i1 @test_signed_i1_f16(half %f) nounwind {
; CHECK-SD-CVT: // %bb.0:
; CHECK-SD-CVT-NEXT: fcvt s0, h0
; CHECK-SD-CVT-NEXT: fcvtzs w8, s0
-; CHECK-SD-CVT-NEXT: ands w8, w8, w8, asr #31
-; CHECK-SD-CVT-NEXT: csinv w8, w8, wzr, ge
+; CHECK-SD-CVT-NEXT: and w8, w8, w8, asr #31
+; CHECK-SD-CVT-NEXT: cmn w8, #1
+; CHECK-SD-CVT-NEXT: csinv w8, w8, wzr, gt
; CHECK-SD-CVT-NEXT: and w0, w8, #0x1
; CHECK-SD-CVT-NEXT: ret
;
; CHECK-SD-FP16-LABEL: test_signed_i1_f16:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzs w8, h0
-; CHECK-SD-FP16-NEXT: ands w8, w8, w8, asr #31
-; CHECK-SD-FP16-NEXT: csinv w8, w8, wzr, ge
+; CHECK-SD-FP16-NEXT: and w8, w8, w8, asr #31
+; CHECK-SD-FP16-NEXT: cmn w8, #1
+; CHECK-SD-FP16-NEXT: csinv w8, w8, wzr, gt
; CHECK-SD-FP16-NEXT: and w0, w8, #0x1
; CHECK-SD-FP16-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index a33b1ef569fc3..3d7bcf6409438 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -2371,10 +2371,12 @@ define <2 x i1> @test_signed_v2f64_v2i1(<2 x double> %f) {
; CHECK-SD-NEXT: mov d1, v0.d[1]
; CHECK-SD-NEXT: fcvtzs w9, d0
; CHECK-SD-NEXT: fcvtzs w8, d1
-; CHECK-SD-NEXT: ands w8, w8, w8, asr #31
-; CHECK-SD-NEXT: csinv w8, w8, wzr, ge
-; CHECK-SD-NEXT: ands w9, w9, w9, asr #31
-; CHECK-SD-NEXT: csinv w9, w9, wzr, ge
+; CHECK-SD-NEXT: and w9, w9, w9, asr #31
+; CHECK-SD-NEXT: and w8, w8, w8, asr #31
+; CHECK-SD-NEXT: cmn w8, #1
+; CHECK-SD-NEXT: csinv w8, w8, wzr, gt
+; CHECK-SD-NEXT: cmn w9, #1
+; CHECK-SD-NEXT: csinv w9, w9, wzr, gt
; CHECK-SD-NEXT: fmov s0, w9
; CHECK-SD-NEXT: mov v0.s[1], w8
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
diff --git a/llvm/test/CodeGen/AArch64/select-constant-xor.ll b/llvm/test/CodeGen/AArch64/select-constant-xor.ll
index 6803411f66896..fe9a2c0fad830 100644
--- a/llvm/test/CodeGen/AArch64/select-constant-xor.ll
+++ b/llvm/test/CodeGen/AArch64/select-constant-xor.ll
@@ -168,8 +168,8 @@ define i32 @icmpasreq(i32 %input, i32 %a, i32 %b) {
define i32 @icmpasrne(i32 %input, i32 %a, i32 %b) {
; CHECK-SD-LABEL: icmpasrne:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmp w0, #0
-; CHECK-SD-NEXT: csel w0, w1, w2, ge
+; CHECK-SD-NEXT: cmn w0, #1
+; CHECK-SD-NEXT: csel w0, w1, w2, gt
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: icmpasrne:
diff --git a/llvm/test/CodeGen/AArch64/signbit-shift.ll b/llvm/test/CodeGen/AArch64/signbit-shift.ll
index 253ea1cab91fb..0e6da326a31f4 100644
--- a/llvm/test/CodeGen/AArch64/signbit-shift.ll
+++ b/llvm/test/CodeGen/AArch64/signbit-shift.ll
@@ -43,8 +43,8 @@ define i32 @sel_ifpos_tval_bigger(i32 %x) {
; CHECK-LABEL: sel_ifpos_tval_bigger:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #41 // =0x29
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cinc w0, w8, ge
+; CHECK-NEXT: cmn w0, #1
+; CHECK-NEXT: cinc w0, w8, gt
; CHECK-NEXT: ret
%c = icmp sgt i32 %x, -1
%r = select i1 %c, i32 42, i32 41
@@ -91,8 +91,8 @@ define i32 @sel_ifpos_fval_bigger(i32 %x) {
; CHECK-LABEL: sel_ifpos_fval_bigger:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #41 // =0x29
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cinc w0, w8, lt
+; CHECK-NEXT: cmn w0, #1
+; CHECK-NEXT: cinc w0, w8, le
; CHECK-NEXT: ret
%c = icmp sgt i32 %x, -1
%r = select i1 %c, i32 41, i32 42
diff --git a/llvm/test/CodeGen/AArch64/signbit-test.ll b/llvm/test/CodeGen/AArch64/signbit-test.ll
index f5eaf80cf7f8d..c74a934ee09d8 100644
--- a/llvm/test/CodeGen/AArch64/signbit-test.ll
+++ b/llvm/test/CodeGen/AArch64/signbit-test.ll
@@ -4,9 +4,9 @@
define i64 @test...
[truncated]
|
Note: The patch works perfectly fine as-is, hence why it is not a draft; it is just that it is written in a way that does need improvements. |
5ea8baa
to
3359ac6
Compare
f5ed3fa
to
374f4a3
Compare
We can catch negatives that can be encoded in cmn this way!
27e6a0e
to
79ddb65
Compare
This relies on the fact that we can use tst and ands for comparisons as by emitComparison. Relies on llvm#140999. Fixes: llvm#141137.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks - LGTM
I don't have merge perms. Can you do it please @davemgreen |
There are SOME regressions here, but this isn't the patch's fault. Overall, the improvements outweigh the regressions Or rather, the cause of the regressions is not something the original code was ever doing for the sake of optimization, but rather luck. You see, the change of CMN 1 to cmp was in some places allowing better CSE to happen, but with this patch, that's undone, but this also means cse is happening in more places. The fix is not to reverse this patch, but it is exposing a huge problem with the design of this code: we should be taking CSE into account too. But how? Espcially when CSE is probably not looking at this from the DAG level and is a different pass? |
This is definitely a result of CSE not working because of the mistaken transforms we were doing was just so happening to result in better codegen for some functions. |
Do not revert the patch: it is not going to fix this issue because there are more improvements in other places and the proper fix for this might not involve modifying the patch at all, but rather I'm thinking of seeing if I can get CSE to try and group these together |
@davemgreen @arsenm wondering if you have any advice. |
Yeah the patch was OK - could it use getNodeIfExists to check for the duplicate nodes? It looks like just using |
#144380 Another PR to address the issue |
This relies on the fact that we can use tst and ands for comparisons as by emitComparison.
Also has mitigations for when comparing with -1 and 1 to avoid regressions.
Fixes: #141137