-
Notifications
You must be signed in to change notification settings - Fork 14k
[SimplifyCFG] Emit SelectInst when folding branches to common dest with different PHI incoming values #144434
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-powerpc Author: None (HighW4y2H3ll) Changes
For exampe:
After: (
Patch is 150.60 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/144434.diff 28 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 975ce3bef5176..3c4d14b169bfd 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1170,6 +1170,9 @@ static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
// Note that we expect to be in a block-closed SSA form for this to work!
for (Use &U : make_early_inc_range(BonusInst.uses())) {
auto *UI = cast<Instruction>(U.getUser());
+ // Avoid dangling select instructions
+ if (!UI->getParent())
+ continue;
auto *PN = dyn_cast<PHINode>(UI);
if (!PN) {
assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
@@ -3965,7 +3968,8 @@ shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI,
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
DomTreeUpdater *DTU,
MemorySSAUpdater *MSSAU,
- const TargetTransformInfo *TTI) {
+ const TargetTransformInfo *TTI,
+ SmallDenseMap<PHINode*, SelectInst*, 8> &InsertNewPHIs) {
BasicBlock *BB = BI->getParent();
BasicBlock *PredBlock = PBI->getParent();
@@ -4052,6 +4056,28 @@ static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
ValueToValueMapTy VMap; // maps original values to cloned values
cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap);
+ if (!InsertNewPHIs.empty()) {
+ // Fixup PHINode in the commong successor
+ for (PHINode &PN : CommonSucc->phis()) {
+ auto It = InsertNewPHIs.find(&PN);
+ if (It != InsertNewPHIs.end() && It->first == &PN) {
+ Instruction *SI = It->second;
+ // Oprands might have been promoted to bonous inst
+ RemapInstruction(SI, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ // Insert SelectInst as the new PHINode incoming value
+ SI->insertBefore(PredBlock->getTerminator()->getIterator());
+ // Fix PHINode
+ PN.removeIncomingValue(PredBlock);
+ PN.addIncoming(SI, PredBlock);
+ // Remove map entry
+ InsertNewPHIs.erase(It);
+ }
+ }
+ // Cleanup dangling SelectInst
+ for (SelectInst *SI : InsertNewPHIs.values())
+ delete SI;
+ }
Module *M = BB->getModule();
@@ -4111,15 +4137,48 @@ bool llvm::foldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
// With which predecessors will we want to deal with?
SmallVector<BasicBlock *, 8> Preds;
+ struct InsertPointTy {
+ InstructionCost Cost;
+ Value *TValue; // True Value
+ Value *FValue; // False Value
+ PHINode *Phi;
+ };
+ SmallDenseMap<BranchInst*, SmallVector<InsertPointTy, 8>, 8> InsertPts;
for (BasicBlock *PredBlock : predecessors(BB)) {
BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
- // Check that we have two conditional branches. If there is a PHI node in
- // the common successor, verify that the same value flows in from both
- // blocks.
- if (!PBI || PBI->isUnconditional() || !safeToMergeTerminators(BI, PBI))
+ // Check that we have two conditional branches.
+ if (!PBI || PBI->isUnconditional())
continue;
+ // If there is a PHI node in the common successor, verify that the same value flows in from both
+ // blocks. Otherwise, check whether we can create a SelectInst to combine the incoming values
+ if (!safeToMergeTerminators(BI, PBI)) {
+ if (BI == PBI)
+ continue;
+ for (BasicBlock *Succ : BI->successors()) {
+ if (llvm::is_contained(PBI->successors(), Succ)) {
+ for (PHINode &Phi : Succ->phis()) {
+ Value *IV0 = Phi.getIncomingValueForBlock(BB);
+ Value *IV1 = Phi.getIncomingValueForBlock(PredBlock);
+ InstructionCost PCost;
+ if (TTI) {
+ PCost = TTI->getCmpSelInstrCost(Instruction::Select, Phi.getType(),
+ CmpInst::makeCmpResultType(Phi.getType()),
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ }
+ auto &IP = InsertPts[PBI];
+ if (PBI->getSuccessor(0) == BB)
+ IP.emplace_back(InsertPointTy{PCost, IV0, IV1, &Phi});
+ else
+ IP.emplace_back(InsertPointTy{PCost, IV1, IV0, &Phi});
+ }
+ }
+ }
+ if (InsertPts.empty())
+ continue;
+ }
+
// Determine if the two branches share a common destination.
BasicBlock *CommonSucc;
Instruction::BinaryOps Opc;
@@ -4138,6 +4197,9 @@ bool llvm::foldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
!isa<CmpInst>(PBI->getCondition())))
Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
+ for (auto const &InsertPoints : InsertPts.values())
+ for (auto &InsertInfo : InsertPoints)
+ Cost += InsertInfo.Cost;
if (Cost > BranchFoldThreshold)
continue;
}
@@ -4203,7 +4265,15 @@ bool llvm::foldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
// Ok, we have the budget. Perform the transformation.
for (BasicBlock *PredBlock : Preds) {
auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
- return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
+ SmallDenseMap<PHINode *, SelectInst *, 8> newPhis;
+ if (InsertPts.contains(PBI)) {
+ Value *PC = PBI->getCondition();
+ for (auto const InsertInfo: InsertPts[PBI]) {
+ SelectInst *newPhi = SelectInst::Create(PC, InsertInfo.TValue, InsertInfo.FValue);
+ newPhis.insert(std::make_pair(InsertInfo.Phi, newPhi));
+ }
+ }
+ return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI, newPhis);
}
return false;
}
diff --git a/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll b/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll
index 9e3bb8ce8efc0..3240ef0a40ddc 100644
--- a/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll
+++ b/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll
@@ -49,16 +49,16 @@ define void @test_i16_2cmp_signed_2() {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, :got:cost_s_i8_i16
; CHECK-NEXT: ldr x8, [x8, :got_lo12:cost_s_i8_i16]
-; CHECK-NEXT: ldrsh w9, [x8, #2]
-; CHECK-NEXT: ldrsh w10, [x8, #4]
-; CHECK-NEXT: cmp w9, w10
-; CHECK-NEXT: b.gt .LBB1_2
-; CHECK-NEXT: // %bb.1: // %if.else
-; CHECK-NEXT: mov w9, w10
-; CHECK-NEXT: b.ge .LBB1_3
-; CHECK-NEXT: .LBB1_2: // %if.end8.sink.split
+; CHECK-NEXT: ldrh w10, [x8, #2]
+; CHECK-NEXT: ldrh w11, [x8, #4]
+; CHECK-NEXT: sxth w9, w10
+; CHECK-NEXT: cmp w9, w11, sxth
+; CHECK-NEXT: csel w9, w10, w11, gt
+; CHECK-NEXT: cmp w10, w11
+; CHECK-NEXT: b.eq .LBB1_2
+; CHECK-NEXT: // %bb.1: // %if.end8.sink.split
; CHECK-NEXT: strh w9, [x8]
-; CHECK-NEXT: .LBB1_3: // %if.end8
+; CHECK-NEXT: .LBB1_2: // %if.end8
; CHECK-NEXT: ret
entry:
%0 = load i16, ptr getelementptr inbounds (%struct.s_signed_i16, ptr @cost_s_i8_i16, i64 0, i32 1), align 2
@@ -125,13 +125,11 @@ define void @test_i16_2cmp_unsigned_2() {
; CHECK-NEXT: ldrh w9, [x8, #2]
; CHECK-NEXT: ldrh w10, [x8, #4]
; CHECK-NEXT: cmp w9, w10
-; CHECK-NEXT: b.hi .LBB3_2
-; CHECK-NEXT: // %bb.1: // %if.else
-; CHECK-NEXT: mov w9, w10
-; CHECK-NEXT: b.hs .LBB3_3
-; CHECK-NEXT: .LBB3_2: // %if.end8.sink.split
+; CHECK-NEXT: csel w9, w9, w10, hi
+; CHECK-NEXT: b.eq .LBB3_2
+; CHECK-NEXT: // %bb.1: // %if.end8.sink.split
; CHECK-NEXT: strh w9, [x8]
-; CHECK-NEXT: .LBB3_3: // %if.end8
+; CHECK-NEXT: .LBB3_2: // %if.end8
; CHECK-NEXT: ret
entry:
%0 = load i16, ptr getelementptr inbounds (%struct.s_unsigned_i16, ptr @cost_u_i16, i64 0, i32 1), align 2
@@ -204,16 +202,16 @@ define void @test_i8_2cmp_signed_2() {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, :got:cost_s
; CHECK-NEXT: ldr x8, [x8, :got_lo12:cost_s]
-; CHECK-NEXT: ldrsb w9, [x8, #1]
-; CHECK-NEXT: ldrsb w10, [x8, #2]
-; CHECK-NEXT: cmp w9, w10
-; CHECK-NEXT: b.gt .LBB5_2
-; CHECK-NEXT: // %bb.1: // %if.else
-; CHECK-NEXT: mov w9, w10
-; CHECK-NEXT: b.ge .LBB5_3
-; CHECK-NEXT: .LBB5_2: // %if.end8.sink.split
+; CHECK-NEXT: ldrb w10, [x8, #1]
+; CHECK-NEXT: ldrb w11, [x8, #2]
+; CHECK-NEXT: sxtb w9, w10
+; CHECK-NEXT: cmp w9, w11, sxtb
+; CHECK-NEXT: csel w9, w10, w11, gt
+; CHECK-NEXT: cmp w10, w11
+; CHECK-NEXT: b.eq .LBB5_2
+; CHECK-NEXT: // %bb.1: // %if.end8.sink.split
; CHECK-NEXT: strb w9, [x8]
-; CHECK-NEXT: .LBB5_3: // %if.end8
+; CHECK-NEXT: .LBB5_2: // %if.end8
; CHECK-NEXT: ret
entry:
%0 = load i8, ptr getelementptr inbounds (%struct.s_signed_i8, ptr @cost_s, i64 0, i32 1), align 2
@@ -280,13 +278,11 @@ define void @test_i8_2cmp_unsigned_2() {
; CHECK-NEXT: ldrb w9, [x8, #1]
; CHECK-NEXT: ldrb w10, [x8, #2]
; CHECK-NEXT: cmp w9, w10
-; CHECK-NEXT: b.hi .LBB7_2
-; CHECK-NEXT: // %bb.1: // %if.else
-; CHECK-NEXT: mov w9, w10
-; CHECK-NEXT: b.hs .LBB7_3
-; CHECK-NEXT: .LBB7_2: // %if.end8.sink.split
+; CHECK-NEXT: csel w9, w9, w10, hi
+; CHECK-NEXT: b.eq .LBB7_2
+; CHECK-NEXT: // %bb.1: // %if.end8.sink.split
; CHECK-NEXT: strb w9, [x8]
-; CHECK-NEXT: .LBB7_3: // %if.end8
+; CHECK-NEXT: .LBB7_2: // %if.end8
; CHECK-NEXT: ret
entry:
%0 = load i8, ptr getelementptr inbounds (%struct.s_unsigned_i8, ptr @cost_u_i8, i64 0, i32 1), align 2
diff --git a/llvm/test/CodeGen/AArch64/tailmerging_in_mbp.ll b/llvm/test/CodeGen/AArch64/tailmerging_in_mbp.ll
index 675380787af4d..75791f6589128 100644
--- a/llvm/test/CodeGen/AArch64/tailmerging_in_mbp.ll
+++ b/llvm/test/CodeGen/AArch64/tailmerging_in_mbp.ll
@@ -2,10 +2,10 @@
; CHECK-LABEL: test:
; CHECK-LABEL: %cond.false12.i
-; CHECK: b.gt
-; CHECK-NEXT: LBB0_8:
+; CHECK: b.le
+; CHECK-LABEL: LBB0_9:
; CHECK-NEXT: mov x8, x9
-; CHECK-NEXT: LBB0_9:
+; CHECK-NEXT: LBB0_10:
define i64 @test(i64 %n, ptr %a, ptr %b, ptr %c, ptr %d, ptr %e, ptr %f) {
entry:
%cmp28 = icmp sgt i64 %n, 1
diff --git a/llvm/test/CodeGen/AArch64/typepromotion-cost.ll b/llvm/test/CodeGen/AArch64/typepromotion-cost.ll
index 3aed4cb671c02..945588e5d2824 100644
--- a/llvm/test/CodeGen/AArch64/typepromotion-cost.ll
+++ b/llvm/test/CodeGen/AArch64/typepromotion-cost.ll
@@ -6,41 +6,28 @@
define i32 @needless_promotion(ptr nocapture noundef readonly %S, i64 noundef %red_cost) {
; CHECK-O2-LABEL: needless_promotion:
; CHECK-O2: // %bb.0: // %entry
-; CHECK-O2-NEXT: ldrsh w8, [x0, #4]
-; CHECK-O2-NEXT: tbnz w8, #31, .LBB0_3
-; CHECK-O2-NEXT: // %bb.1: // %lor.rhs
-; CHECK-O2-NEXT: cbz x1, .LBB0_5
-; CHECK-O2-NEXT: // %bb.2:
-; CHECK-O2-NEXT: mov w9, #2
-; CHECK-O2-NEXT: b .LBB0_4
-; CHECK-O2-NEXT: .LBB0_3:
-; CHECK-O2-NEXT: mov w9, #1
-; CHECK-O2-NEXT: .LBB0_4: // %lor.end.sink.split
-; CHECK-O2-NEXT: cmp w8, w9
-; CHECK-O2-NEXT: cset w0, eq
-; CHECK-O2-NEXT: ret
-; CHECK-O2-NEXT: .LBB0_5:
-; CHECK-O2-NEXT: mov w0, wzr
+; CHECK-O2-NEXT: ldrsh w9, [x0, #4]
+; CHECK-O2-NEXT: mov w8, #1 // =0x1
+; CHECK-O2-NEXT: cmp w9, #0
+; CHECK-O2-NEXT: cinc w8, w8, ge
+; CHECK-O2-NEXT: cmp w8, w9, uxth
+; CHECK-O2-NEXT: cset w8, eq
+; CHECK-O2-NEXT: cmp x1, #0
+; CHECK-O2-NEXT: ccmn w9, #1, #4, eq
+; CHECK-O2-NEXT: csel w0, wzr, w8, gt
; CHECK-O2-NEXT: ret
;
; CHECK-O3-LABEL: needless_promotion:
; CHECK-O3: // %bb.0: // %entry
-; CHECK-O3-NEXT: ldrsh w8, [x0, #4]
-; CHECK-O3-NEXT: tbnz w8, #31, .LBB0_3
-; CHECK-O3-NEXT: // %bb.1: // %lor.rhs
-; CHECK-O3-NEXT: cbz x1, .LBB0_4
-; CHECK-O3-NEXT: // %bb.2:
-; CHECK-O3-NEXT: mov w9, #2
-; CHECK-O3-NEXT: cmp w8, w9
-; CHECK-O3-NEXT: cset w0, eq
-; CHECK-O3-NEXT: ret
-; CHECK-O3-NEXT: .LBB0_3:
-; CHECK-O3-NEXT: mov w9, #1
-; CHECK-O3-NEXT: cmp w8, w9
-; CHECK-O3-NEXT: cset w0, eq
-; CHECK-O3-NEXT: ret
-; CHECK-O3-NEXT: .LBB0_4:
-; CHECK-O3-NEXT: mov w0, wzr
+; CHECK-O3-NEXT: ldrsh w9, [x0, #4]
+; CHECK-O3-NEXT: mov w8, #1 // =0x1
+; CHECK-O3-NEXT: cmp w9, #0
+; CHECK-O3-NEXT: cinc w8, w8, ge
+; CHECK-O3-NEXT: cmp w8, w9, uxth
+; CHECK-O3-NEXT: cset w8, eq
+; CHECK-O3-NEXT: cmp x1, #0
+; CHECK-O3-NEXT: ccmn w9, #1, #4, eq
+; CHECK-O3-NEXT: csel w0, wzr, w8, gt
; CHECK-O3-NEXT: ret
entry:
%ident = getelementptr inbounds %struct.S, ptr %S, i64 0, i32 1
diff --git a/llvm/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll b/llvm/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll
index bb8337d237f51..aceac8bb69b82 100644
--- a/llvm/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll
@@ -8,25 +8,27 @@
define signext i32 @limit_loop(i32 signext %iters, ptr nocapture readonly %vec, i32 signext %limit) local_unnamed_addr {
; CHECK-LABEL: limit_loop:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: mr 6, 3
-; CHECK-NEXT: li 3, 0
-; CHECK-NEXT: cmpwi 6, 0
-; CHECK-NEXT: blelr 0
+; CHECK-NEXT: cmpwi 3, 0
+; CHECK-NEXT: ble 0, .LBB0_4
; CHECK-NEXT: # %bb.1: # %for.body.preheader
-; CHECK-NEXT: mtctr 6
; CHECK-NEXT: addi 4, 4, -4
-; CHECK-NEXT: b .LBB0_3
-; CHECK-NEXT: .p2align 4
-; CHECK-NEXT: .LBB0_2: # %for.cond
-; CHECK-NEXT: #
-; CHECK-NEXT: bdzlr
-; CHECK-NEXT: .LBB0_3: # %for.body
+; CHECK-NEXT: li 6, 1
+; CHECK-NEXT: .p2align 5
+; CHECK-NEXT: .LBB0_2: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: lwzu 6, 4(4)
-; CHECK-NEXT: cmpw 6, 5
-; CHECK-NEXT: blt 0, .LBB0_2
-; CHECK-NEXT: # %bb.4:
+; CHECK-NEXT: lwzu 7, 4(4)
+; CHECK-NEXT: cmpd 1, 6, 3
+; CHECK-NEXT: addi 6, 6, 1
+; CHECK-NEXT: cmpw 7, 5
+; CHECK-NEXT: crand 20, 0, 4
+; CHECK-NEXT: bc 12, 20, .LBB0_2
+; CHECK-NEXT: # %bb.3: # %cleanup.loopexit
; CHECK-NEXT: li 3, 1
+; CHECK-NEXT: isellt 3, 0, 3
+; CHECK-NEXT: clrldi 3, 3, 32
+; CHECK-NEXT: blr
+; CHECK-NEXT: .LBB0_4:
+; CHECK-NEXT: li 3, 0
; CHECK-NEXT: blr
entry:
%cmp5 = icmp sgt i32 %iters, 0
diff --git a/llvm/test/CodeGen/X86/loop-search.ll b/llvm/test/CodeGen/X86/loop-search.ll
index 0d5f97d21fb3a..4fe5c48fcd26a 100644
--- a/llvm/test/CodeGen/X86/loop-search.ll
+++ b/llvm/test/CodeGen/X86/loop-search.ll
@@ -10,24 +10,29 @@ define zeroext i1 @search(i32 %needle, ptr nocapture readonly %haystack, i32 %co
; CHECK-NEXT: testl %edx, %edx
; CHECK-NEXT: jle LBB0_5
; CHECK-NEXT: ## %bb.1: ## %for.body.preheader
-; CHECK-NEXT: movslq %edx, %rax
-; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: movslq %edx, %rcx
+; CHECK-NEXT: movl $1, %edx
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: LBB0_2: ## %for.body
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: cmpl %edi, (%rsi,%rcx,4)
-; CHECK-NEXT: je LBB0_6
-; CHECK-NEXT: ## %bb.3: ## %for.cond
+; CHECK-NEXT: movl -4(%rsi,%rdx,4), %r8d
+; CHECK-NEXT: cmpl %edi, %r8d
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: negb %al
+; CHECK-NEXT: cmpl %edi, %r8d
+; CHECK-NEXT: je LBB0_4
+; CHECK-NEXT: ## %bb.3: ## %for.body
; CHECK-NEXT: ## in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT: incq %rcx
-; CHECK-NEXT: cmpq %rax, %rcx
+; CHECK-NEXT: cmpq %rcx, %rdx
+; CHECK-NEXT: leaq 1(%rdx), %rdx
; CHECK-NEXT: jl LBB0_2
-; CHECK-NEXT: LBB0_5:
-; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: LBB0_4: ## %cleanup
+; CHECK-NEXT: andb $1, %al
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
-; CHECK-NEXT: LBB0_6:
-; CHECK-NEXT: movb $1, %al
+; CHECK-NEXT: LBB0_5:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: andb $1, %al
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
entry:
diff --git a/llvm/test/Transforms/LICM/hoist-phi.ll b/llvm/test/Transforms/LICM/hoist-phi.ll
index bf999b98a1dac..3cedb14edfa30 100644
--- a/llvm/test/Transforms/LICM/hoist-phi.ll
+++ b/llvm/test/Transforms/LICM/hoist-phi.ll
@@ -629,16 +629,16 @@ define void @triangle_phi_loopexit(i32 %x, ptr %p) {
; CHECK-DISABLED-NEXT: entry:
; CHECK-DISABLED-NEXT: [[ADD:%.*]] = add i32 [[X:%.*]], 1
; CHECK-DISABLED-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[X]], 0
+; CHECK-DISABLED-NEXT: [[CMP1_NOT:%.*]] = xor i1 [[CMP1]], true
; CHECK-DISABLED-NEXT: [[CMP2:%.*]] = icmp sgt i32 10, [[ADD]]
+; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = select i1 [[CMP1]], i32 [[ADD]], i32 [[X]]
+; CHECK-DISABLED-NEXT: [[OR_COND:%.*]] = or i1 [[CMP1_NOT]], [[CMP2]]
+; CHECK-DISABLED-NEXT: [[CMP3:%.*]] = icmp ne i32 [[TMP0]], 0
; CHECK-DISABLED-NEXT: br label [[LOOP:%.*]]
; CHECK-DISABLED: loop:
-; CHECK-DISABLED-NEXT: br i1 [[CMP1]], label [[IF:%.*]], label [[THEN:%.*]]
-; CHECK-DISABLED: if:
-; CHECK-DISABLED-NEXT: br i1 [[CMP2]], label [[THEN]], label [[END:%.*]]
+; CHECK-DISABLED-NEXT: br i1 [[OR_COND]], label [[THEN:%.*]], label [[END:%.*]]
; CHECK-DISABLED: then:
-; CHECK-DISABLED-NEXT: [[PHI:%.*]] = phi i32 [ [[ADD]], [[IF]] ], [ [[X]], [[LOOP]] ]
-; CHECK-DISABLED-NEXT: store i32 [[PHI]], ptr [[P:%.*]], align 4
-; CHECK-DISABLED-NEXT: [[CMP3:%.*]] = icmp ne i32 [[PHI]], 0
+; CHECK-DISABLED-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
; CHECK-DISABLED-NEXT: br i1 [[CMP3]], label [[LOOP]], label [[END]]
; CHECK-DISABLED: end:
; CHECK-DISABLED-NEXT: ret void
@@ -647,20 +647,16 @@ define void @triangle_phi_loopexit(i32 %x, ptr %p) {
; CHECK-ENABLED-NEXT: entry:
; CHECK-ENABLED-NEXT: [[ADD:%.*]] = add i32 [[X:%.*]], 1
; CHECK-ENABLED-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[X]], 0
+; CHECK-ENABLED-NEXT: [[CMP1_NOT:%.*]] = xor i1 [[CMP1]], true
; CHECK-ENABLED-NEXT: [[CMP2:%.*]] = icmp sgt i32 10, [[ADD]]
-; CHECK-ENABLED-NEXT: br i1 [[CMP1]], label [[IF_LICM:%.*]], label [[THEN_LICM:%.*]]
-; CHECK-ENABLED: if.licm:
-; CHECK-ENABLED-NEXT: br label [[THEN_LICM]]
-; CHECK-ENABLED: then.licm:
-; CHECK-ENABLED-NEXT: [[PHI:%.*]] = phi i32 [ [[ADD]], [[IF_LICM]] ], [ [[X]], [[ENTRY:%.*]] ]
-; CHECK-ENABLED-NEXT: [[CMP3:%.*]] = icmp ne i32 [[PHI]], 0
+; CHECK-ENABLED-NEXT: [[TMP0:%.*]] = select i1 [[CMP1]], i32 [[ADD]], i32 [[X]]
+; CHECK-ENABLED-NEXT: [[OR_COND:%.*]] = or i1 [[CMP1_NOT]], [[CMP2]]
+; CHECK-ENABLED-NEXT: [[CMP3:%.*]] = icmp ne i32 [[TMP0]], 0
; CHECK-ENABLED-NEXT: br label [[LOOP:%.*]]
; CHECK-ENABLED: loop:
-; CHECK-ENABLED-NEXT: br i1 [[CMP1]], label [[IF:%.*]], label [[THEN:%.*]]
-; CHECK-ENABLED: if:
-; CHECK-ENABLED-NEXT: br i1 [[CMP2]], label [[THEN]], label [[END:%.*]]
+; CHECK-ENABLED-NEXT: br i1 [[OR_COND]], label [[THEN:%.*]], label [[END:%.*]]
; CHECK-ENABLED: then:
-; CHECK-ENABLED-NEXT: store i32 [[PHI]], ptr [[P:%.*]], align 4
+; CHECK-ENABLED-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
; CHECK-ENABLED-NEXT: br i1 [[CMP3]], label [[LOOP]], label [[END]]
; CHECK-ENABLED: end:
; CHECK-ENABLED-NEXT: ret void
diff --git a/llvm/test/Transforms/LICM/sinking.ll b/llvm/test/Transforms/LICM/sinking.ll
index e7ac07b50625a..5d369afcd2578 100644
--- a/llvm/test/Transforms/LICM/sinking.ll
+++ b/llvm/test/Transforms/LICM/sinking.ll
@@ -80,7 +80,7 @@ define double @test2c(ptr %P) {
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: br i1 true, label [[LOOP]], label [[OUT:%.*]]
; CHECK: Out:
-; CHECK-NEXT: [[A_LE:%.*]] = load double, ptr [[P:%.*]], align 8, !invariant.load !0
+; CHECK-NEXT: [[A_LE:%.*]] = load double, ptr [[P:%.*]], align 8, !invariant.load [[META0:![0-9]+]]
; CHECK-NEXT: ret double [[A_LE]]
;
br label %Loop
@@ -535,23 +535,16 @@ define i32 @test14(i32 %N, i32 %N2, i1 %C) {
; CHECK-NEXT: Entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: Loop:
-; CHECK-NEXT: [[N_ADDR_0_PN:%.*]] = phi i32 [ [[DEC:%.*]], [[CONTLOOP:%.*]] ], [ [[N:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[N_ADDR_0_PN:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY:%.*]] ], [ [[DEC:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[DEC]] = add i32 [[N_ADDR_0_PN]], -1
-; CHECK-NEXT: br i1 [[C:%.*]], label [[CONTLOOP]], label [[OUT12_SPLIT_LOOP_EXIT1:%.*]]
-; CHECK: ContLoop:
; CHECK-NEXT: [[TMP_1:%.*]] = icmp ...
[truncated]
|
@llvm/pr-subscribers-backend-x86 Author: None (HighW4y2H3ll) Changes
For exampe:
After: (
Patch is 150.60 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/144434.diff 28 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 975ce3bef5176..3c4d14b169bfd 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1170,6 +1170,9 @@ static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
// Note that we expect to be in a block-closed SSA form for this to work!
for (Use &U : make_early_inc_range(BonusInst.uses())) {
auto *UI = cast<Instruction>(U.getUser());
+ // Avoid dangling select instructions
+ if (!UI->getParent())
+ continue;
auto *PN = dyn_cast<PHINode>(UI);
if (!PN) {
assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
@@ -3965,7 +3968,8 @@ shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI,
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
DomTreeUpdater *DTU,
MemorySSAUpdater *MSSAU,
- const TargetTransformInfo *TTI) {
+ const TargetTransformInfo *TTI,
+ SmallDenseMap<PHINode*, SelectInst*, 8> &InsertNewPHIs) {
BasicBlock *BB = BI->getParent();
BasicBlock *PredBlock = PBI->getParent();
@@ -4052,6 +4056,28 @@ static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
ValueToValueMapTy VMap; // maps original values to cloned values
cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap);
+ if (!InsertNewPHIs.empty()) {
+ // Fixup PHINode in the commong successor
+ for (PHINode &PN : CommonSucc->phis()) {
+ auto It = InsertNewPHIs.find(&PN);
+ if (It != InsertNewPHIs.end() && It->first == &PN) {
+ Instruction *SI = It->second;
+ // Oprands might have been promoted to bonous inst
+ RemapInstruction(SI, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ // Insert SelectInst as the new PHINode incoming value
+ SI->insertBefore(PredBlock->getTerminator()->getIterator());
+ // Fix PHINode
+ PN.removeIncomingValue(PredBlock);
+ PN.addIncoming(SI, PredBlock);
+ // Remove map entry
+ InsertNewPHIs.erase(It);
+ }
+ }
+ // Cleanup dangling SelectInst
+ for (SelectInst *SI : InsertNewPHIs.values())
+ delete SI;
+ }
Module *M = BB->getModule();
@@ -4111,15 +4137,48 @@ bool llvm::foldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
// With which predecessors will we want to deal with?
SmallVector<BasicBlock *, 8> Preds;
+ struct InsertPointTy {
+ InstructionCost Cost;
+ Value *TValue; // True Value
+ Value *FValue; // False Value
+ PHINode *Phi;
+ };
+ SmallDenseMap<BranchInst*, SmallVector<InsertPointTy, 8>, 8> InsertPts;
for (BasicBlock *PredBlock : predecessors(BB)) {
BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
- // Check that we have two conditional branches. If there is a PHI node in
- // the common successor, verify that the same value flows in from both
- // blocks.
- if (!PBI || PBI->isUnconditional() || !safeToMergeTerminators(BI, PBI))
+ // Check that we have two conditional branches.
+ if (!PBI || PBI->isUnconditional())
continue;
+ // If there is a PHI node in the common successor, verify that the same value flows in from both
+ // blocks. Otherwise, check whether we can create a SelectInst to combine the incoming values
+ if (!safeToMergeTerminators(BI, PBI)) {
+ if (BI == PBI)
+ continue;
+ for (BasicBlock *Succ : BI->successors()) {
+ if (llvm::is_contained(PBI->successors(), Succ)) {
+ for (PHINode &Phi : Succ->phis()) {
+ Value *IV0 = Phi.getIncomingValueForBlock(BB);
+ Value *IV1 = Phi.getIncomingValueForBlock(PredBlock);
+ InstructionCost PCost;
+ if (TTI) {
+ PCost = TTI->getCmpSelInstrCost(Instruction::Select, Phi.getType(),
+ CmpInst::makeCmpResultType(Phi.getType()),
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ }
+ auto &IP = InsertPts[PBI];
+ if (PBI->getSuccessor(0) == BB)
+ IP.emplace_back(InsertPointTy{PCost, IV0, IV1, &Phi});
+ else
+ IP.emplace_back(InsertPointTy{PCost, IV1, IV0, &Phi});
+ }
+ }
+ }
+ if (InsertPts.empty())
+ continue;
+ }
+
// Determine if the two branches share a common destination.
BasicBlock *CommonSucc;
Instruction::BinaryOps Opc;
@@ -4138,6 +4197,9 @@ bool llvm::foldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
!isa<CmpInst>(PBI->getCondition())))
Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
+ for (auto const &InsertPoints : InsertPts.values())
+ for (auto &InsertInfo : InsertPoints)
+ Cost += InsertInfo.Cost;
if (Cost > BranchFoldThreshold)
continue;
}
@@ -4203,7 +4265,15 @@ bool llvm::foldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
// Ok, we have the budget. Perform the transformation.
for (BasicBlock *PredBlock : Preds) {
auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
- return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
+ SmallDenseMap<PHINode *, SelectInst *, 8> newPhis;
+ if (InsertPts.contains(PBI)) {
+ Value *PC = PBI->getCondition();
+ for (auto const InsertInfo: InsertPts[PBI]) {
+ SelectInst *newPhi = SelectInst::Create(PC, InsertInfo.TValue, InsertInfo.FValue);
+ newPhis.insert(std::make_pair(InsertInfo.Phi, newPhi));
+ }
+ }
+ return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI, newPhis);
}
return false;
}
diff --git a/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll b/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll
index 9e3bb8ce8efc0..3240ef0a40ddc 100644
--- a/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll
+++ b/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll
@@ -49,16 +49,16 @@ define void @test_i16_2cmp_signed_2() {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, :got:cost_s_i8_i16
; CHECK-NEXT: ldr x8, [x8, :got_lo12:cost_s_i8_i16]
-; CHECK-NEXT: ldrsh w9, [x8, #2]
-; CHECK-NEXT: ldrsh w10, [x8, #4]
-; CHECK-NEXT: cmp w9, w10
-; CHECK-NEXT: b.gt .LBB1_2
-; CHECK-NEXT: // %bb.1: // %if.else
-; CHECK-NEXT: mov w9, w10
-; CHECK-NEXT: b.ge .LBB1_3
-; CHECK-NEXT: .LBB1_2: // %if.end8.sink.split
+; CHECK-NEXT: ldrh w10, [x8, #2]
+; CHECK-NEXT: ldrh w11, [x8, #4]
+; CHECK-NEXT: sxth w9, w10
+; CHECK-NEXT: cmp w9, w11, sxth
+; CHECK-NEXT: csel w9, w10, w11, gt
+; CHECK-NEXT: cmp w10, w11
+; CHECK-NEXT: b.eq .LBB1_2
+; CHECK-NEXT: // %bb.1: // %if.end8.sink.split
; CHECK-NEXT: strh w9, [x8]
-; CHECK-NEXT: .LBB1_3: // %if.end8
+; CHECK-NEXT: .LBB1_2: // %if.end8
; CHECK-NEXT: ret
entry:
%0 = load i16, ptr getelementptr inbounds (%struct.s_signed_i16, ptr @cost_s_i8_i16, i64 0, i32 1), align 2
@@ -125,13 +125,11 @@ define void @test_i16_2cmp_unsigned_2() {
; CHECK-NEXT: ldrh w9, [x8, #2]
; CHECK-NEXT: ldrh w10, [x8, #4]
; CHECK-NEXT: cmp w9, w10
-; CHECK-NEXT: b.hi .LBB3_2
-; CHECK-NEXT: // %bb.1: // %if.else
-; CHECK-NEXT: mov w9, w10
-; CHECK-NEXT: b.hs .LBB3_3
-; CHECK-NEXT: .LBB3_2: // %if.end8.sink.split
+; CHECK-NEXT: csel w9, w9, w10, hi
+; CHECK-NEXT: b.eq .LBB3_2
+; CHECK-NEXT: // %bb.1: // %if.end8.sink.split
; CHECK-NEXT: strh w9, [x8]
-; CHECK-NEXT: .LBB3_3: // %if.end8
+; CHECK-NEXT: .LBB3_2: // %if.end8
; CHECK-NEXT: ret
entry:
%0 = load i16, ptr getelementptr inbounds (%struct.s_unsigned_i16, ptr @cost_u_i16, i64 0, i32 1), align 2
@@ -204,16 +202,16 @@ define void @test_i8_2cmp_signed_2() {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, :got:cost_s
; CHECK-NEXT: ldr x8, [x8, :got_lo12:cost_s]
-; CHECK-NEXT: ldrsb w9, [x8, #1]
-; CHECK-NEXT: ldrsb w10, [x8, #2]
-; CHECK-NEXT: cmp w9, w10
-; CHECK-NEXT: b.gt .LBB5_2
-; CHECK-NEXT: // %bb.1: // %if.else
-; CHECK-NEXT: mov w9, w10
-; CHECK-NEXT: b.ge .LBB5_3
-; CHECK-NEXT: .LBB5_2: // %if.end8.sink.split
+; CHECK-NEXT: ldrb w10, [x8, #1]
+; CHECK-NEXT: ldrb w11, [x8, #2]
+; CHECK-NEXT: sxtb w9, w10
+; CHECK-NEXT: cmp w9, w11, sxtb
+; CHECK-NEXT: csel w9, w10, w11, gt
+; CHECK-NEXT: cmp w10, w11
+; CHECK-NEXT: b.eq .LBB5_2
+; CHECK-NEXT: // %bb.1: // %if.end8.sink.split
; CHECK-NEXT: strb w9, [x8]
-; CHECK-NEXT: .LBB5_3: // %if.end8
+; CHECK-NEXT: .LBB5_2: // %if.end8
; CHECK-NEXT: ret
entry:
%0 = load i8, ptr getelementptr inbounds (%struct.s_signed_i8, ptr @cost_s, i64 0, i32 1), align 2
@@ -280,13 +278,11 @@ define void @test_i8_2cmp_unsigned_2() {
; CHECK-NEXT: ldrb w9, [x8, #1]
; CHECK-NEXT: ldrb w10, [x8, #2]
; CHECK-NEXT: cmp w9, w10
-; CHECK-NEXT: b.hi .LBB7_2
-; CHECK-NEXT: // %bb.1: // %if.else
-; CHECK-NEXT: mov w9, w10
-; CHECK-NEXT: b.hs .LBB7_3
-; CHECK-NEXT: .LBB7_2: // %if.end8.sink.split
+; CHECK-NEXT: csel w9, w9, w10, hi
+; CHECK-NEXT: b.eq .LBB7_2
+; CHECK-NEXT: // %bb.1: // %if.end8.sink.split
; CHECK-NEXT: strb w9, [x8]
-; CHECK-NEXT: .LBB7_3: // %if.end8
+; CHECK-NEXT: .LBB7_2: // %if.end8
; CHECK-NEXT: ret
entry:
%0 = load i8, ptr getelementptr inbounds (%struct.s_unsigned_i8, ptr @cost_u_i8, i64 0, i32 1), align 2
diff --git a/llvm/test/CodeGen/AArch64/tailmerging_in_mbp.ll b/llvm/test/CodeGen/AArch64/tailmerging_in_mbp.ll
index 675380787af4d..75791f6589128 100644
--- a/llvm/test/CodeGen/AArch64/tailmerging_in_mbp.ll
+++ b/llvm/test/CodeGen/AArch64/tailmerging_in_mbp.ll
@@ -2,10 +2,10 @@
; CHECK-LABEL: test:
; CHECK-LABEL: %cond.false12.i
-; CHECK: b.gt
-; CHECK-NEXT: LBB0_8:
+; CHECK: b.le
+; CHECK-LABEL: LBB0_9:
; CHECK-NEXT: mov x8, x9
-; CHECK-NEXT: LBB0_9:
+; CHECK-NEXT: LBB0_10:
define i64 @test(i64 %n, ptr %a, ptr %b, ptr %c, ptr %d, ptr %e, ptr %f) {
entry:
%cmp28 = icmp sgt i64 %n, 1
diff --git a/llvm/test/CodeGen/AArch64/typepromotion-cost.ll b/llvm/test/CodeGen/AArch64/typepromotion-cost.ll
index 3aed4cb671c02..945588e5d2824 100644
--- a/llvm/test/CodeGen/AArch64/typepromotion-cost.ll
+++ b/llvm/test/CodeGen/AArch64/typepromotion-cost.ll
@@ -6,41 +6,28 @@
define i32 @needless_promotion(ptr nocapture noundef readonly %S, i64 noundef %red_cost) {
; CHECK-O2-LABEL: needless_promotion:
; CHECK-O2: // %bb.0: // %entry
-; CHECK-O2-NEXT: ldrsh w8, [x0, #4]
-; CHECK-O2-NEXT: tbnz w8, #31, .LBB0_3
-; CHECK-O2-NEXT: // %bb.1: // %lor.rhs
-; CHECK-O2-NEXT: cbz x1, .LBB0_5
-; CHECK-O2-NEXT: // %bb.2:
-; CHECK-O2-NEXT: mov w9, #2
-; CHECK-O2-NEXT: b .LBB0_4
-; CHECK-O2-NEXT: .LBB0_3:
-; CHECK-O2-NEXT: mov w9, #1
-; CHECK-O2-NEXT: .LBB0_4: // %lor.end.sink.split
-; CHECK-O2-NEXT: cmp w8, w9
-; CHECK-O2-NEXT: cset w0, eq
-; CHECK-O2-NEXT: ret
-; CHECK-O2-NEXT: .LBB0_5:
-; CHECK-O2-NEXT: mov w0, wzr
+; CHECK-O2-NEXT: ldrsh w9, [x0, #4]
+; CHECK-O2-NEXT: mov w8, #1 // =0x1
+; CHECK-O2-NEXT: cmp w9, #0
+; CHECK-O2-NEXT: cinc w8, w8, ge
+; CHECK-O2-NEXT: cmp w8, w9, uxth
+; CHECK-O2-NEXT: cset w8, eq
+; CHECK-O2-NEXT: cmp x1, #0
+; CHECK-O2-NEXT: ccmn w9, #1, #4, eq
+; CHECK-O2-NEXT: csel w0, wzr, w8, gt
; CHECK-O2-NEXT: ret
;
; CHECK-O3-LABEL: needless_promotion:
; CHECK-O3: // %bb.0: // %entry
-; CHECK-O3-NEXT: ldrsh w8, [x0, #4]
-; CHECK-O3-NEXT: tbnz w8, #31, .LBB0_3
-; CHECK-O3-NEXT: // %bb.1: // %lor.rhs
-; CHECK-O3-NEXT: cbz x1, .LBB0_4
-; CHECK-O3-NEXT: // %bb.2:
-; CHECK-O3-NEXT: mov w9, #2
-; CHECK-O3-NEXT: cmp w8, w9
-; CHECK-O3-NEXT: cset w0, eq
-; CHECK-O3-NEXT: ret
-; CHECK-O3-NEXT: .LBB0_3:
-; CHECK-O3-NEXT: mov w9, #1
-; CHECK-O3-NEXT: cmp w8, w9
-; CHECK-O3-NEXT: cset w0, eq
-; CHECK-O3-NEXT: ret
-; CHECK-O3-NEXT: .LBB0_4:
-; CHECK-O3-NEXT: mov w0, wzr
+; CHECK-O3-NEXT: ldrsh w9, [x0, #4]
+; CHECK-O3-NEXT: mov w8, #1 // =0x1
+; CHECK-O3-NEXT: cmp w9, #0
+; CHECK-O3-NEXT: cinc w8, w8, ge
+; CHECK-O3-NEXT: cmp w8, w9, uxth
+; CHECK-O3-NEXT: cset w8, eq
+; CHECK-O3-NEXT: cmp x1, #0
+; CHECK-O3-NEXT: ccmn w9, #1, #4, eq
+; CHECK-O3-NEXT: csel w0, wzr, w8, gt
; CHECK-O3-NEXT: ret
entry:
%ident = getelementptr inbounds %struct.S, ptr %S, i64 0, i32 1
diff --git a/llvm/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll b/llvm/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll
index bb8337d237f51..aceac8bb69b82 100644
--- a/llvm/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll
@@ -8,25 +8,27 @@
define signext i32 @limit_loop(i32 signext %iters, ptr nocapture readonly %vec, i32 signext %limit) local_unnamed_addr {
; CHECK-LABEL: limit_loop:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: mr 6, 3
-; CHECK-NEXT: li 3, 0
-; CHECK-NEXT: cmpwi 6, 0
-; CHECK-NEXT: blelr 0
+; CHECK-NEXT: cmpwi 3, 0
+; CHECK-NEXT: ble 0, .LBB0_4
; CHECK-NEXT: # %bb.1: # %for.body.preheader
-; CHECK-NEXT: mtctr 6
; CHECK-NEXT: addi 4, 4, -4
-; CHECK-NEXT: b .LBB0_3
-; CHECK-NEXT: .p2align 4
-; CHECK-NEXT: .LBB0_2: # %for.cond
-; CHECK-NEXT: #
-; CHECK-NEXT: bdzlr
-; CHECK-NEXT: .LBB0_3: # %for.body
+; CHECK-NEXT: li 6, 1
+; CHECK-NEXT: .p2align 5
+; CHECK-NEXT: .LBB0_2: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: lwzu 6, 4(4)
-; CHECK-NEXT: cmpw 6, 5
-; CHECK-NEXT: blt 0, .LBB0_2
-; CHECK-NEXT: # %bb.4:
+; CHECK-NEXT: lwzu 7, 4(4)
+; CHECK-NEXT: cmpd 1, 6, 3
+; CHECK-NEXT: addi 6, 6, 1
+; CHECK-NEXT: cmpw 7, 5
+; CHECK-NEXT: crand 20, 0, 4
+; CHECK-NEXT: bc 12, 20, .LBB0_2
+; CHECK-NEXT: # %bb.3: # %cleanup.loopexit
; CHECK-NEXT: li 3, 1
+; CHECK-NEXT: isellt 3, 0, 3
+; CHECK-NEXT: clrldi 3, 3, 32
+; CHECK-NEXT: blr
+; CHECK-NEXT: .LBB0_4:
+; CHECK-NEXT: li 3, 0
; CHECK-NEXT: blr
entry:
%cmp5 = icmp sgt i32 %iters, 0
diff --git a/llvm/test/CodeGen/X86/loop-search.ll b/llvm/test/CodeGen/X86/loop-search.ll
index 0d5f97d21fb3a..4fe5c48fcd26a 100644
--- a/llvm/test/CodeGen/X86/loop-search.ll
+++ b/llvm/test/CodeGen/X86/loop-search.ll
@@ -10,24 +10,29 @@ define zeroext i1 @search(i32 %needle, ptr nocapture readonly %haystack, i32 %co
; CHECK-NEXT: testl %edx, %edx
; CHECK-NEXT: jle LBB0_5
; CHECK-NEXT: ## %bb.1: ## %for.body.preheader
-; CHECK-NEXT: movslq %edx, %rax
-; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: movslq %edx, %rcx
+; CHECK-NEXT: movl $1, %edx
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: LBB0_2: ## %for.body
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: cmpl %edi, (%rsi,%rcx,4)
-; CHECK-NEXT: je LBB0_6
-; CHECK-NEXT: ## %bb.3: ## %for.cond
+; CHECK-NEXT: movl -4(%rsi,%rdx,4), %r8d
+; CHECK-NEXT: cmpl %edi, %r8d
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: negb %al
+; CHECK-NEXT: cmpl %edi, %r8d
+; CHECK-NEXT: je LBB0_4
+; CHECK-NEXT: ## %bb.3: ## %for.body
; CHECK-NEXT: ## in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT: incq %rcx
-; CHECK-NEXT: cmpq %rax, %rcx
+; CHECK-NEXT: cmpq %rcx, %rdx
+; CHECK-NEXT: leaq 1(%rdx), %rdx
; CHECK-NEXT: jl LBB0_2
-; CHECK-NEXT: LBB0_5:
-; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: LBB0_4: ## %cleanup
+; CHECK-NEXT: andb $1, %al
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
-; CHECK-NEXT: LBB0_6:
-; CHECK-NEXT: movb $1, %al
+; CHECK-NEXT: LBB0_5:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: andb $1, %al
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
entry:
diff --git a/llvm/test/Transforms/LICM/hoist-phi.ll b/llvm/test/Transforms/LICM/hoist-phi.ll
index bf999b98a1dac..3cedb14edfa30 100644
--- a/llvm/test/Transforms/LICM/hoist-phi.ll
+++ b/llvm/test/Transforms/LICM/hoist-phi.ll
@@ -629,16 +629,16 @@ define void @triangle_phi_loopexit(i32 %x, ptr %p) {
; CHECK-DISABLED-NEXT: entry:
; CHECK-DISABLED-NEXT: [[ADD:%.*]] = add i32 [[X:%.*]], 1
; CHECK-DISABLED-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[X]], 0
+; CHECK-DISABLED-NEXT: [[CMP1_NOT:%.*]] = xor i1 [[CMP1]], true
; CHECK-DISABLED-NEXT: [[CMP2:%.*]] = icmp sgt i32 10, [[ADD]]
+; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = select i1 [[CMP1]], i32 [[ADD]], i32 [[X]]
+; CHECK-DISABLED-NEXT: [[OR_COND:%.*]] = or i1 [[CMP1_NOT]], [[CMP2]]
+; CHECK-DISABLED-NEXT: [[CMP3:%.*]] = icmp ne i32 [[TMP0]], 0
; CHECK-DISABLED-NEXT: br label [[LOOP:%.*]]
; CHECK-DISABLED: loop:
-; CHECK-DISABLED-NEXT: br i1 [[CMP1]], label [[IF:%.*]], label [[THEN:%.*]]
-; CHECK-DISABLED: if:
-; CHECK-DISABLED-NEXT: br i1 [[CMP2]], label [[THEN]], label [[END:%.*]]
+; CHECK-DISABLED-NEXT: br i1 [[OR_COND]], label [[THEN:%.*]], label [[END:%.*]]
; CHECK-DISABLED: then:
-; CHECK-DISABLED-NEXT: [[PHI:%.*]] = phi i32 [ [[ADD]], [[IF]] ], [ [[X]], [[LOOP]] ]
-; CHECK-DISABLED-NEXT: store i32 [[PHI]], ptr [[P:%.*]], align 4
-; CHECK-DISABLED-NEXT: [[CMP3:%.*]] = icmp ne i32 [[PHI]], 0
+; CHECK-DISABLED-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
; CHECK-DISABLED-NEXT: br i1 [[CMP3]], label [[LOOP]], label [[END]]
; CHECK-DISABLED: end:
; CHECK-DISABLED-NEXT: ret void
@@ -647,20 +647,16 @@ define void @triangle_phi_loopexit(i32 %x, ptr %p) {
; CHECK-ENABLED-NEXT: entry:
; CHECK-ENABLED-NEXT: [[ADD:%.*]] = add i32 [[X:%.*]], 1
; CHECK-ENABLED-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[X]], 0
+; CHECK-ENABLED-NEXT: [[CMP1_NOT:%.*]] = xor i1 [[CMP1]], true
; CHECK-ENABLED-NEXT: [[CMP2:%.*]] = icmp sgt i32 10, [[ADD]]
-; CHECK-ENABLED-NEXT: br i1 [[CMP1]], label [[IF_LICM:%.*]], label [[THEN_LICM:%.*]]
-; CHECK-ENABLED: if.licm:
-; CHECK-ENABLED-NEXT: br label [[THEN_LICM]]
-; CHECK-ENABLED: then.licm:
-; CHECK-ENABLED-NEXT: [[PHI:%.*]] = phi i32 [ [[ADD]], [[IF_LICM]] ], [ [[X]], [[ENTRY:%.*]] ]
-; CHECK-ENABLED-NEXT: [[CMP3:%.*]] = icmp ne i32 [[PHI]], 0
+; CHECK-ENABLED-NEXT: [[TMP0:%.*]] = select i1 [[CMP1]], i32 [[ADD]], i32 [[X]]
+; CHECK-ENABLED-NEXT: [[OR_COND:%.*]] = or i1 [[CMP1_NOT]], [[CMP2]]
+; CHECK-ENABLED-NEXT: [[CMP3:%.*]] = icmp ne i32 [[TMP0]], 0
; CHECK-ENABLED-NEXT: br label [[LOOP:%.*]]
; CHECK-ENABLED: loop:
-; CHECK-ENABLED-NEXT: br i1 [[CMP1]], label [[IF:%.*]], label [[THEN:%.*]]
-; CHECK-ENABLED: if:
-; CHECK-ENABLED-NEXT: br i1 [[CMP2]], label [[THEN]], label [[END:%.*]]
+; CHECK-ENABLED-NEXT: br i1 [[OR_COND]], label [[THEN:%.*]], label [[END:%.*]]
; CHECK-ENABLED: then:
-; CHECK-ENABLED-NEXT: store i32 [[PHI]], ptr [[P:%.*]], align 4
+; CHECK-ENABLED-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
; CHECK-ENABLED-NEXT: br i1 [[CMP3]], label [[LOOP]], label [[END]]
; CHECK-ENABLED: end:
; CHECK-ENABLED-NEXT: ret void
diff --git a/llvm/test/Transforms/LICM/sinking.ll b/llvm/test/Transforms/LICM/sinking.ll
index e7ac07b50625a..5d369afcd2578 100644
--- a/llvm/test/Transforms/LICM/sinking.ll
+++ b/llvm/test/Transforms/LICM/sinking.ll
@@ -80,7 +80,7 @@ define double @test2c(ptr %P) {
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: br i1 true, label [[LOOP]], label [[OUT:%.*]]
; CHECK: Out:
-; CHECK-NEXT: [[A_LE:%.*]] = load double, ptr [[P:%.*]], align 8, !invariant.load !0
+; CHECK-NEXT: [[A_LE:%.*]] = load double, ptr [[P:%.*]], align 8, !invariant.load [[META0:![0-9]+]]
; CHECK-NEXT: ret double [[A_LE]]
;
br label %Loop
@@ -535,23 +535,16 @@ define i32 @test14(i32 %N, i32 %N2, i1 %C) {
; CHECK-NEXT: Entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: Loop:
-; CHECK-NEXT: [[N_ADDR_0_PN:%.*]] = phi i32 [ [[DEC:%.*]], [[CONTLOOP:%.*]] ], [ [[N:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[N_ADDR_0_PN:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY:%.*]] ], [ [[DEC:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[DEC]] = add i32 [[N_ADDR_0_PN]], -1
-; CHECK-NEXT: br i1 [[C:%.*]], label [[CONTLOOP]], label [[OUT12_SPLIT_LOOP_EXIT1:%.*]]
-; CHECK: ContLoop:
; CHECK-NEXT: [[TMP_1:%.*]] = icmp ...
[truncated]
|
@llvm/pr-subscribers-llvm-transforms Author: None (HighW4y2H3ll) Changes
For exampe:
After: (
Patch is 150.60 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/144434.diff 28 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 975ce3bef5176..3c4d14b169bfd 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1170,6 +1170,9 @@ static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
// Note that we expect to be in a block-closed SSA form for this to work!
for (Use &U : make_early_inc_range(BonusInst.uses())) {
auto *UI = cast<Instruction>(U.getUser());
+ // Avoid dangling select instructions
+ if (!UI->getParent())
+ continue;
auto *PN = dyn_cast<PHINode>(UI);
if (!PN) {
assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
@@ -3965,7 +3968,8 @@ shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI,
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
DomTreeUpdater *DTU,
MemorySSAUpdater *MSSAU,
- const TargetTransformInfo *TTI) {
+ const TargetTransformInfo *TTI,
+ SmallDenseMap<PHINode*, SelectInst*, 8> &InsertNewPHIs) {
BasicBlock *BB = BI->getParent();
BasicBlock *PredBlock = PBI->getParent();
@@ -4052,6 +4056,28 @@ static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
ValueToValueMapTy VMap; // maps original values to cloned values
cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap);
+ if (!InsertNewPHIs.empty()) {
+ // Fixup PHINode in the commong successor
+ for (PHINode &PN : CommonSucc->phis()) {
+ auto It = InsertNewPHIs.find(&PN);
+ if (It != InsertNewPHIs.end() && It->first == &PN) {
+ Instruction *SI = It->second;
+ // Oprands might have been promoted to bonous inst
+ RemapInstruction(SI, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ // Insert SelectInst as the new PHINode incoming value
+ SI->insertBefore(PredBlock->getTerminator()->getIterator());
+ // Fix PHINode
+ PN.removeIncomingValue(PredBlock);
+ PN.addIncoming(SI, PredBlock);
+ // Remove map entry
+ InsertNewPHIs.erase(It);
+ }
+ }
+ // Cleanup dangling SelectInst
+ for (SelectInst *SI : InsertNewPHIs.values())
+ delete SI;
+ }
Module *M = BB->getModule();
@@ -4111,15 +4137,48 @@ bool llvm::foldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
// With which predecessors will we want to deal with?
SmallVector<BasicBlock *, 8> Preds;
+ struct InsertPointTy {
+ InstructionCost Cost;
+ Value *TValue; // True Value
+ Value *FValue; // False Value
+ PHINode *Phi;
+ };
+ SmallDenseMap<BranchInst*, SmallVector<InsertPointTy, 8>, 8> InsertPts;
for (BasicBlock *PredBlock : predecessors(BB)) {
BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
- // Check that we have two conditional branches. If there is a PHI node in
- // the common successor, verify that the same value flows in from both
- // blocks.
- if (!PBI || PBI->isUnconditional() || !safeToMergeTerminators(BI, PBI))
+ // Check that we have two conditional branches.
+ if (!PBI || PBI->isUnconditional())
continue;
+ // If there is a PHI node in the common successor, verify that the same value flows in from both
+ // blocks. Otherwise, check whether we can create a SelectInst to combine the incoming values
+ if (!safeToMergeTerminators(BI, PBI)) {
+ if (BI == PBI)
+ continue;
+ for (BasicBlock *Succ : BI->successors()) {
+ if (llvm::is_contained(PBI->successors(), Succ)) {
+ for (PHINode &Phi : Succ->phis()) {
+ Value *IV0 = Phi.getIncomingValueForBlock(BB);
+ Value *IV1 = Phi.getIncomingValueForBlock(PredBlock);
+ InstructionCost PCost;
+ if (TTI) {
+ PCost = TTI->getCmpSelInstrCost(Instruction::Select, Phi.getType(),
+ CmpInst::makeCmpResultType(Phi.getType()),
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ }
+ auto &IP = InsertPts[PBI];
+ if (PBI->getSuccessor(0) == BB)
+ IP.emplace_back(InsertPointTy{PCost, IV0, IV1, &Phi});
+ else
+ IP.emplace_back(InsertPointTy{PCost, IV1, IV0, &Phi});
+ }
+ }
+ }
+ if (InsertPts.empty())
+ continue;
+ }
+
// Determine if the two branches share a common destination.
BasicBlock *CommonSucc;
Instruction::BinaryOps Opc;
@@ -4138,6 +4197,9 @@ bool llvm::foldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
!isa<CmpInst>(PBI->getCondition())))
Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
+ for (auto const &InsertPoints : InsertPts.values())
+ for (auto &InsertInfo : InsertPoints)
+ Cost += InsertInfo.Cost;
if (Cost > BranchFoldThreshold)
continue;
}
@@ -4203,7 +4265,15 @@ bool llvm::foldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
// Ok, we have the budget. Perform the transformation.
for (BasicBlock *PredBlock : Preds) {
auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
- return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
+ SmallDenseMap<PHINode *, SelectInst *, 8> newPhis;
+ if (InsertPts.contains(PBI)) {
+ Value *PC = PBI->getCondition();
+ for (auto const InsertInfo: InsertPts[PBI]) {
+ SelectInst *newPhi = SelectInst::Create(PC, InsertInfo.TValue, InsertInfo.FValue);
+ newPhis.insert(std::make_pair(InsertInfo.Phi, newPhi));
+ }
+ }
+ return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI, newPhis);
}
return false;
}
diff --git a/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll b/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll
index 9e3bb8ce8efc0..3240ef0a40ddc 100644
--- a/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll
+++ b/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll
@@ -49,16 +49,16 @@ define void @test_i16_2cmp_signed_2() {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, :got:cost_s_i8_i16
; CHECK-NEXT: ldr x8, [x8, :got_lo12:cost_s_i8_i16]
-; CHECK-NEXT: ldrsh w9, [x8, #2]
-; CHECK-NEXT: ldrsh w10, [x8, #4]
-; CHECK-NEXT: cmp w9, w10
-; CHECK-NEXT: b.gt .LBB1_2
-; CHECK-NEXT: // %bb.1: // %if.else
-; CHECK-NEXT: mov w9, w10
-; CHECK-NEXT: b.ge .LBB1_3
-; CHECK-NEXT: .LBB1_2: // %if.end8.sink.split
+; CHECK-NEXT: ldrh w10, [x8, #2]
+; CHECK-NEXT: ldrh w11, [x8, #4]
+; CHECK-NEXT: sxth w9, w10
+; CHECK-NEXT: cmp w9, w11, sxth
+; CHECK-NEXT: csel w9, w10, w11, gt
+; CHECK-NEXT: cmp w10, w11
+; CHECK-NEXT: b.eq .LBB1_2
+; CHECK-NEXT: // %bb.1: // %if.end8.sink.split
; CHECK-NEXT: strh w9, [x8]
-; CHECK-NEXT: .LBB1_3: // %if.end8
+; CHECK-NEXT: .LBB1_2: // %if.end8
; CHECK-NEXT: ret
entry:
%0 = load i16, ptr getelementptr inbounds (%struct.s_signed_i16, ptr @cost_s_i8_i16, i64 0, i32 1), align 2
@@ -125,13 +125,11 @@ define void @test_i16_2cmp_unsigned_2() {
; CHECK-NEXT: ldrh w9, [x8, #2]
; CHECK-NEXT: ldrh w10, [x8, #4]
; CHECK-NEXT: cmp w9, w10
-; CHECK-NEXT: b.hi .LBB3_2
-; CHECK-NEXT: // %bb.1: // %if.else
-; CHECK-NEXT: mov w9, w10
-; CHECK-NEXT: b.hs .LBB3_3
-; CHECK-NEXT: .LBB3_2: // %if.end8.sink.split
+; CHECK-NEXT: csel w9, w9, w10, hi
+; CHECK-NEXT: b.eq .LBB3_2
+; CHECK-NEXT: // %bb.1: // %if.end8.sink.split
; CHECK-NEXT: strh w9, [x8]
-; CHECK-NEXT: .LBB3_3: // %if.end8
+; CHECK-NEXT: .LBB3_2: // %if.end8
; CHECK-NEXT: ret
entry:
%0 = load i16, ptr getelementptr inbounds (%struct.s_unsigned_i16, ptr @cost_u_i16, i64 0, i32 1), align 2
@@ -204,16 +202,16 @@ define void @test_i8_2cmp_signed_2() {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, :got:cost_s
; CHECK-NEXT: ldr x8, [x8, :got_lo12:cost_s]
-; CHECK-NEXT: ldrsb w9, [x8, #1]
-; CHECK-NEXT: ldrsb w10, [x8, #2]
-; CHECK-NEXT: cmp w9, w10
-; CHECK-NEXT: b.gt .LBB5_2
-; CHECK-NEXT: // %bb.1: // %if.else
-; CHECK-NEXT: mov w9, w10
-; CHECK-NEXT: b.ge .LBB5_3
-; CHECK-NEXT: .LBB5_2: // %if.end8.sink.split
+; CHECK-NEXT: ldrb w10, [x8, #1]
+; CHECK-NEXT: ldrb w11, [x8, #2]
+; CHECK-NEXT: sxtb w9, w10
+; CHECK-NEXT: cmp w9, w11, sxtb
+; CHECK-NEXT: csel w9, w10, w11, gt
+; CHECK-NEXT: cmp w10, w11
+; CHECK-NEXT: b.eq .LBB5_2
+; CHECK-NEXT: // %bb.1: // %if.end8.sink.split
; CHECK-NEXT: strb w9, [x8]
-; CHECK-NEXT: .LBB5_3: // %if.end8
+; CHECK-NEXT: .LBB5_2: // %if.end8
; CHECK-NEXT: ret
entry:
%0 = load i8, ptr getelementptr inbounds (%struct.s_signed_i8, ptr @cost_s, i64 0, i32 1), align 2
@@ -280,13 +278,11 @@ define void @test_i8_2cmp_unsigned_2() {
; CHECK-NEXT: ldrb w9, [x8, #1]
; CHECK-NEXT: ldrb w10, [x8, #2]
; CHECK-NEXT: cmp w9, w10
-; CHECK-NEXT: b.hi .LBB7_2
-; CHECK-NEXT: // %bb.1: // %if.else
-; CHECK-NEXT: mov w9, w10
-; CHECK-NEXT: b.hs .LBB7_3
-; CHECK-NEXT: .LBB7_2: // %if.end8.sink.split
+; CHECK-NEXT: csel w9, w9, w10, hi
+; CHECK-NEXT: b.eq .LBB7_2
+; CHECK-NEXT: // %bb.1: // %if.end8.sink.split
; CHECK-NEXT: strb w9, [x8]
-; CHECK-NEXT: .LBB7_3: // %if.end8
+; CHECK-NEXT: .LBB7_2: // %if.end8
; CHECK-NEXT: ret
entry:
%0 = load i8, ptr getelementptr inbounds (%struct.s_unsigned_i8, ptr @cost_u_i8, i64 0, i32 1), align 2
diff --git a/llvm/test/CodeGen/AArch64/tailmerging_in_mbp.ll b/llvm/test/CodeGen/AArch64/tailmerging_in_mbp.ll
index 675380787af4d..75791f6589128 100644
--- a/llvm/test/CodeGen/AArch64/tailmerging_in_mbp.ll
+++ b/llvm/test/CodeGen/AArch64/tailmerging_in_mbp.ll
@@ -2,10 +2,10 @@
; CHECK-LABEL: test:
; CHECK-LABEL: %cond.false12.i
-; CHECK: b.gt
-; CHECK-NEXT: LBB0_8:
+; CHECK: b.le
+; CHECK-LABEL: LBB0_9:
; CHECK-NEXT: mov x8, x9
-; CHECK-NEXT: LBB0_9:
+; CHECK-NEXT: LBB0_10:
define i64 @test(i64 %n, ptr %a, ptr %b, ptr %c, ptr %d, ptr %e, ptr %f) {
entry:
%cmp28 = icmp sgt i64 %n, 1
diff --git a/llvm/test/CodeGen/AArch64/typepromotion-cost.ll b/llvm/test/CodeGen/AArch64/typepromotion-cost.ll
index 3aed4cb671c02..945588e5d2824 100644
--- a/llvm/test/CodeGen/AArch64/typepromotion-cost.ll
+++ b/llvm/test/CodeGen/AArch64/typepromotion-cost.ll
@@ -6,41 +6,28 @@
define i32 @needless_promotion(ptr nocapture noundef readonly %S, i64 noundef %red_cost) {
; CHECK-O2-LABEL: needless_promotion:
; CHECK-O2: // %bb.0: // %entry
-; CHECK-O2-NEXT: ldrsh w8, [x0, #4]
-; CHECK-O2-NEXT: tbnz w8, #31, .LBB0_3
-; CHECK-O2-NEXT: // %bb.1: // %lor.rhs
-; CHECK-O2-NEXT: cbz x1, .LBB0_5
-; CHECK-O2-NEXT: // %bb.2:
-; CHECK-O2-NEXT: mov w9, #2
-; CHECK-O2-NEXT: b .LBB0_4
-; CHECK-O2-NEXT: .LBB0_3:
-; CHECK-O2-NEXT: mov w9, #1
-; CHECK-O2-NEXT: .LBB0_4: // %lor.end.sink.split
-; CHECK-O2-NEXT: cmp w8, w9
-; CHECK-O2-NEXT: cset w0, eq
-; CHECK-O2-NEXT: ret
-; CHECK-O2-NEXT: .LBB0_5:
-; CHECK-O2-NEXT: mov w0, wzr
+; CHECK-O2-NEXT: ldrsh w9, [x0, #4]
+; CHECK-O2-NEXT: mov w8, #1 // =0x1
+; CHECK-O2-NEXT: cmp w9, #0
+; CHECK-O2-NEXT: cinc w8, w8, ge
+; CHECK-O2-NEXT: cmp w8, w9, uxth
+; CHECK-O2-NEXT: cset w8, eq
+; CHECK-O2-NEXT: cmp x1, #0
+; CHECK-O2-NEXT: ccmn w9, #1, #4, eq
+; CHECK-O2-NEXT: csel w0, wzr, w8, gt
; CHECK-O2-NEXT: ret
;
; CHECK-O3-LABEL: needless_promotion:
; CHECK-O3: // %bb.0: // %entry
-; CHECK-O3-NEXT: ldrsh w8, [x0, #4]
-; CHECK-O3-NEXT: tbnz w8, #31, .LBB0_3
-; CHECK-O3-NEXT: // %bb.1: // %lor.rhs
-; CHECK-O3-NEXT: cbz x1, .LBB0_4
-; CHECK-O3-NEXT: // %bb.2:
-; CHECK-O3-NEXT: mov w9, #2
-; CHECK-O3-NEXT: cmp w8, w9
-; CHECK-O3-NEXT: cset w0, eq
-; CHECK-O3-NEXT: ret
-; CHECK-O3-NEXT: .LBB0_3:
-; CHECK-O3-NEXT: mov w9, #1
-; CHECK-O3-NEXT: cmp w8, w9
-; CHECK-O3-NEXT: cset w0, eq
-; CHECK-O3-NEXT: ret
-; CHECK-O3-NEXT: .LBB0_4:
-; CHECK-O3-NEXT: mov w0, wzr
+; CHECK-O3-NEXT: ldrsh w9, [x0, #4]
+; CHECK-O3-NEXT: mov w8, #1 // =0x1
+; CHECK-O3-NEXT: cmp w9, #0
+; CHECK-O3-NEXT: cinc w8, w8, ge
+; CHECK-O3-NEXT: cmp w8, w9, uxth
+; CHECK-O3-NEXT: cset w8, eq
+; CHECK-O3-NEXT: cmp x1, #0
+; CHECK-O3-NEXT: ccmn w9, #1, #4, eq
+; CHECK-O3-NEXT: csel w0, wzr, w8, gt
; CHECK-O3-NEXT: ret
entry:
%ident = getelementptr inbounds %struct.S, ptr %S, i64 0, i32 1
diff --git a/llvm/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll b/llvm/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll
index bb8337d237f51..aceac8bb69b82 100644
--- a/llvm/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll
@@ -8,25 +8,27 @@
define signext i32 @limit_loop(i32 signext %iters, ptr nocapture readonly %vec, i32 signext %limit) local_unnamed_addr {
; CHECK-LABEL: limit_loop:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: mr 6, 3
-; CHECK-NEXT: li 3, 0
-; CHECK-NEXT: cmpwi 6, 0
-; CHECK-NEXT: blelr 0
+; CHECK-NEXT: cmpwi 3, 0
+; CHECK-NEXT: ble 0, .LBB0_4
; CHECK-NEXT: # %bb.1: # %for.body.preheader
-; CHECK-NEXT: mtctr 6
; CHECK-NEXT: addi 4, 4, -4
-; CHECK-NEXT: b .LBB0_3
-; CHECK-NEXT: .p2align 4
-; CHECK-NEXT: .LBB0_2: # %for.cond
-; CHECK-NEXT: #
-; CHECK-NEXT: bdzlr
-; CHECK-NEXT: .LBB0_3: # %for.body
+; CHECK-NEXT: li 6, 1
+; CHECK-NEXT: .p2align 5
+; CHECK-NEXT: .LBB0_2: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: lwzu 6, 4(4)
-; CHECK-NEXT: cmpw 6, 5
-; CHECK-NEXT: blt 0, .LBB0_2
-; CHECK-NEXT: # %bb.4:
+; CHECK-NEXT: lwzu 7, 4(4)
+; CHECK-NEXT: cmpd 1, 6, 3
+; CHECK-NEXT: addi 6, 6, 1
+; CHECK-NEXT: cmpw 7, 5
+; CHECK-NEXT: crand 20, 0, 4
+; CHECK-NEXT: bc 12, 20, .LBB0_2
+; CHECK-NEXT: # %bb.3: # %cleanup.loopexit
; CHECK-NEXT: li 3, 1
+; CHECK-NEXT: isellt 3, 0, 3
+; CHECK-NEXT: clrldi 3, 3, 32
+; CHECK-NEXT: blr
+; CHECK-NEXT: .LBB0_4:
+; CHECK-NEXT: li 3, 0
; CHECK-NEXT: blr
entry:
%cmp5 = icmp sgt i32 %iters, 0
diff --git a/llvm/test/CodeGen/X86/loop-search.ll b/llvm/test/CodeGen/X86/loop-search.ll
index 0d5f97d21fb3a..4fe5c48fcd26a 100644
--- a/llvm/test/CodeGen/X86/loop-search.ll
+++ b/llvm/test/CodeGen/X86/loop-search.ll
@@ -10,24 +10,29 @@ define zeroext i1 @search(i32 %needle, ptr nocapture readonly %haystack, i32 %co
; CHECK-NEXT: testl %edx, %edx
; CHECK-NEXT: jle LBB0_5
; CHECK-NEXT: ## %bb.1: ## %for.body.preheader
-; CHECK-NEXT: movslq %edx, %rax
-; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: movslq %edx, %rcx
+; CHECK-NEXT: movl $1, %edx
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: LBB0_2: ## %for.body
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: cmpl %edi, (%rsi,%rcx,4)
-; CHECK-NEXT: je LBB0_6
-; CHECK-NEXT: ## %bb.3: ## %for.cond
+; CHECK-NEXT: movl -4(%rsi,%rdx,4), %r8d
+; CHECK-NEXT: cmpl %edi, %r8d
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: negb %al
+; CHECK-NEXT: cmpl %edi, %r8d
+; CHECK-NEXT: je LBB0_4
+; CHECK-NEXT: ## %bb.3: ## %for.body
; CHECK-NEXT: ## in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT: incq %rcx
-; CHECK-NEXT: cmpq %rax, %rcx
+; CHECK-NEXT: cmpq %rcx, %rdx
+; CHECK-NEXT: leaq 1(%rdx), %rdx
; CHECK-NEXT: jl LBB0_2
-; CHECK-NEXT: LBB0_5:
-; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: LBB0_4: ## %cleanup
+; CHECK-NEXT: andb $1, %al
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
-; CHECK-NEXT: LBB0_6:
-; CHECK-NEXT: movb $1, %al
+; CHECK-NEXT: LBB0_5:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: andb $1, %al
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
entry:
diff --git a/llvm/test/Transforms/LICM/hoist-phi.ll b/llvm/test/Transforms/LICM/hoist-phi.ll
index bf999b98a1dac..3cedb14edfa30 100644
--- a/llvm/test/Transforms/LICM/hoist-phi.ll
+++ b/llvm/test/Transforms/LICM/hoist-phi.ll
@@ -629,16 +629,16 @@ define void @triangle_phi_loopexit(i32 %x, ptr %p) {
; CHECK-DISABLED-NEXT: entry:
; CHECK-DISABLED-NEXT: [[ADD:%.*]] = add i32 [[X:%.*]], 1
; CHECK-DISABLED-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[X]], 0
+; CHECK-DISABLED-NEXT: [[CMP1_NOT:%.*]] = xor i1 [[CMP1]], true
; CHECK-DISABLED-NEXT: [[CMP2:%.*]] = icmp sgt i32 10, [[ADD]]
+; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = select i1 [[CMP1]], i32 [[ADD]], i32 [[X]]
+; CHECK-DISABLED-NEXT: [[OR_COND:%.*]] = or i1 [[CMP1_NOT]], [[CMP2]]
+; CHECK-DISABLED-NEXT: [[CMP3:%.*]] = icmp ne i32 [[TMP0]], 0
; CHECK-DISABLED-NEXT: br label [[LOOP:%.*]]
; CHECK-DISABLED: loop:
-; CHECK-DISABLED-NEXT: br i1 [[CMP1]], label [[IF:%.*]], label [[THEN:%.*]]
-; CHECK-DISABLED: if:
-; CHECK-DISABLED-NEXT: br i1 [[CMP2]], label [[THEN]], label [[END:%.*]]
+; CHECK-DISABLED-NEXT: br i1 [[OR_COND]], label [[THEN:%.*]], label [[END:%.*]]
; CHECK-DISABLED: then:
-; CHECK-DISABLED-NEXT: [[PHI:%.*]] = phi i32 [ [[ADD]], [[IF]] ], [ [[X]], [[LOOP]] ]
-; CHECK-DISABLED-NEXT: store i32 [[PHI]], ptr [[P:%.*]], align 4
-; CHECK-DISABLED-NEXT: [[CMP3:%.*]] = icmp ne i32 [[PHI]], 0
+; CHECK-DISABLED-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
; CHECK-DISABLED-NEXT: br i1 [[CMP3]], label [[LOOP]], label [[END]]
; CHECK-DISABLED: end:
; CHECK-DISABLED-NEXT: ret void
@@ -647,20 +647,16 @@ define void @triangle_phi_loopexit(i32 %x, ptr %p) {
; CHECK-ENABLED-NEXT: entry:
; CHECK-ENABLED-NEXT: [[ADD:%.*]] = add i32 [[X:%.*]], 1
; CHECK-ENABLED-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[X]], 0
+; CHECK-ENABLED-NEXT: [[CMP1_NOT:%.*]] = xor i1 [[CMP1]], true
; CHECK-ENABLED-NEXT: [[CMP2:%.*]] = icmp sgt i32 10, [[ADD]]
-; CHECK-ENABLED-NEXT: br i1 [[CMP1]], label [[IF_LICM:%.*]], label [[THEN_LICM:%.*]]
-; CHECK-ENABLED: if.licm:
-; CHECK-ENABLED-NEXT: br label [[THEN_LICM]]
-; CHECK-ENABLED: then.licm:
-; CHECK-ENABLED-NEXT: [[PHI:%.*]] = phi i32 [ [[ADD]], [[IF_LICM]] ], [ [[X]], [[ENTRY:%.*]] ]
-; CHECK-ENABLED-NEXT: [[CMP3:%.*]] = icmp ne i32 [[PHI]], 0
+; CHECK-ENABLED-NEXT: [[TMP0:%.*]] = select i1 [[CMP1]], i32 [[ADD]], i32 [[X]]
+; CHECK-ENABLED-NEXT: [[OR_COND:%.*]] = or i1 [[CMP1_NOT]], [[CMP2]]
+; CHECK-ENABLED-NEXT: [[CMP3:%.*]] = icmp ne i32 [[TMP0]], 0
; CHECK-ENABLED-NEXT: br label [[LOOP:%.*]]
; CHECK-ENABLED: loop:
-; CHECK-ENABLED-NEXT: br i1 [[CMP1]], label [[IF:%.*]], label [[THEN:%.*]]
-; CHECK-ENABLED: if:
-; CHECK-ENABLED-NEXT: br i1 [[CMP2]], label [[THEN]], label [[END:%.*]]
+; CHECK-ENABLED-NEXT: br i1 [[OR_COND]], label [[THEN:%.*]], label [[END:%.*]]
; CHECK-ENABLED: then:
-; CHECK-ENABLED-NEXT: store i32 [[PHI]], ptr [[P:%.*]], align 4
+; CHECK-ENABLED-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
; CHECK-ENABLED-NEXT: br i1 [[CMP3]], label [[LOOP]], label [[END]]
; CHECK-ENABLED: end:
; CHECK-ENABLED-NEXT: ret void
diff --git a/llvm/test/Transforms/LICM/sinking.ll b/llvm/test/Transforms/LICM/sinking.ll
index e7ac07b50625a..5d369afcd2578 100644
--- a/llvm/test/Transforms/LICM/sinking.ll
+++ b/llvm/test/Transforms/LICM/sinking.ll
@@ -80,7 +80,7 @@ define double @test2c(ptr %P) {
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: br i1 true, label [[LOOP]], label [[OUT:%.*]]
; CHECK: Out:
-; CHECK-NEXT: [[A_LE:%.*]] = load double, ptr [[P:%.*]], align 8, !invariant.load !0
+; CHECK-NEXT: [[A_LE:%.*]] = load double, ptr [[P:%.*]], align 8, !invariant.load [[META0:![0-9]+]]
; CHECK-NEXT: ret double [[A_LE]]
;
br label %Loop
@@ -535,23 +535,16 @@ define i32 @test14(i32 %N, i32 %N2, i1 %C) {
; CHECK-NEXT: Entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: Loop:
-; CHECK-NEXT: [[N_ADDR_0_PN:%.*]] = phi i32 [ [[DEC:%.*]], [[CONTLOOP:%.*]] ], [ [[N:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[N_ADDR_0_PN:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY:%.*]] ], [ [[DEC:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[DEC]] = add i32 [[N_ADDR_0_PN]], -1
-; CHECK-NEXT: br i1 [[C:%.*]], label [[CONTLOOP]], label [[OUT12_SPLIT_LOOP_EXIT1:%.*]]
-; CHECK: ContLoop:
; CHECK-NEXT: [[TMP_1:%.*]] = icmp ...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
ab4054d
to
c02abd9
Compare
foldBranchToCommonDest
currently skips the case when the PHI node in the common successor takes different incoming values from the predecessors. We can fix this by adding a SelectInst in the predecessor. Also counting the extra instruction cost by this added SelectInst.For exampe:
Before:
After: (
./bin/opt -passes=simplifycfg .test.ll -S
)