Skip to content

Commit 17eb6b6

Browse files
committed
Revert "[Taildup] Don't tail-duplicate loop header with multiple successors as its latches"
This reverts commit 1f9fa54.
1 parent 0733381 commit 17eb6b6

File tree

2 files changed

+52
-53
lines changed

2 files changed

+52
-53
lines changed

llvm/lib/CodeGen/TailDuplicator.cpp

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,6 @@ static cl::opt<unsigned> TailDupIndirectBranchSize(
7070
"end with indirect branches."), cl::init(20),
7171
cl::Hidden);
7272

73-
static cl::opt<unsigned> TailDupJmpTableLoopSize(
74-
"tail-dup-jmptable-loop-size",
75-
cl::desc("Maximum loop latches to consider tail duplication that are "
76-
"successors of loop header."),
77-
cl::init(128), cl::Hidden);
78-
7973
static cl::opt<bool>
8074
TailDupVerify("tail-dup-verify",
8175
cl::desc("Verify sanity of PHI instructions during taildup"),
@@ -569,29 +563,6 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
569563
if (TailBB.isSuccessor(&TailBB))
570564
return false;
571565

572-
// When doing tail-duplication with jumptable loops like:
573-
// 1 -> 2 <-> 3 |
574-
// \ <-> 4 |
575-
// \ <-> 5 |
576-
// \ <-> ... |
577-
// \---> rest |
578-
// quadratic number of edges and much more loops are added to CFG. This
579-
// may cause compile time regression when jumptable is quiet large.
580-
// So set the limit on jumptable cases.
581-
auto isLargeJumpTableLoop = [](const MachineBasicBlock &TailBB) {
582-
const SmallPtrSet<const MachineBasicBlock *, 8> Preds(TailBB.pred_begin(),
583-
TailBB.pred_end());
584-
// Check the basic block has large number of successors, all of them only
585-
// have one successor which is the basic block itself.
586-
return llvm::count_if(
587-
TailBB.successors(), [&](const MachineBasicBlock *SuccBB) {
588-
return Preds.count(SuccBB) && SuccBB->succ_size() == 1;
589-
}) > TailDupJmpTableLoopSize;
590-
};
591-
592-
if (isLargeJumpTableLoop(TailBB))
593-
return false;
594-
595566
// Set the limit on the cost to duplicate. When optimizing for size,
596567
// duplicate only one, because one branch instruction can be eliminated to
597568
// compensate for the duplication.

llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll

Lines changed: 52 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,76 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -tail-dup-jmptable-loop-size=5 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
2+
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
33
define i8* @large_loop_switch(i8* %p) {
44
; CHECK-LABEL: large_loop_switch:
55
; CHECK: # %bb.0: # %entry
66
; CHECK-NEXT: pushq %rbx
77
; CHECK-NEXT: .cfi_def_cfa_offset 16
88
; CHECK-NEXT: .cfi_offset %rbx, -16
9-
; CHECK-NEXT: movq %rdi, %rsi
9+
; CHECK-NEXT: movq %rdi, %rax
1010
; CHECK-NEXT: movl $6, %ebx
11-
; CHECK-NEXT: movl %ebx, %eax
12-
; CHECK-NEXT: jmpq *.LJTI0_0(,%rax,8)
11+
; CHECK-NEXT: movl %ebx, %ecx
12+
; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8)
13+
; CHECK-NEXT: .LBB0_1: # %for.cond.cleanup
14+
; CHECK-NEXT: movl $530, %edi # imm = 0x212
15+
; CHECK-NEXT: movq %rax, %rsi
16+
; CHECK-NEXT: popq %rbx
17+
; CHECK-NEXT: .cfi_def_cfa_offset 8
18+
; CHECK-NEXT: jmp ccc@PLT # TAILCALL
19+
; CHECK-NEXT: .p2align 4, 0x90
1320
; CHECK-NEXT: .LBB0_2: # %sw.bb1
21+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
22+
; CHECK-NEXT: .cfi_def_cfa_offset 16
1423
; CHECK-NEXT: movl $531, %edi # imm = 0x213
15-
; CHECK-NEXT: .LBB0_3: # %for.body
16-
; CHECK-NEXT: callq ccc@PLT
17-
; CHECK-NEXT: .LBB0_4: # %for.body
1824
; CHECK-NEXT: movq %rax, %rsi
25+
; CHECK-NEXT: callq ccc@PLT
1926
; CHECK-NEXT: decl %ebx
20-
; CHECK-NEXT: movl %ebx, %eax
21-
; CHECK-NEXT: jmpq *.LJTI0_0(,%rax,8)
22-
; CHECK-NEXT: .LBB0_5: # %sw.bb3
27+
; CHECK-NEXT: movl %ebx, %ecx
28+
; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8)
29+
; CHECK-NEXT: .p2align 4, 0x90
30+
; CHECK-NEXT: .LBB0_3: # %sw.bb3
31+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2332
; CHECK-NEXT: movl $532, %edi # imm = 0x214
33+
; CHECK-NEXT: movq %rax, %rsi
2434
; CHECK-NEXT: callq bbb@PLT
25-
; CHECK-NEXT: jmp .LBB0_4
26-
; CHECK-NEXT: .LBB0_7: # %sw.bb5
35+
; CHECK-NEXT: decl %ebx
36+
; CHECK-NEXT: movl %ebx, %ecx
37+
; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8)
38+
; CHECK-NEXT: .p2align 4, 0x90
39+
; CHECK-NEXT: .LBB0_4: # %sw.bb5
40+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2741
; CHECK-NEXT: movl $533, %edi # imm = 0x215
42+
; CHECK-NEXT: movq %rax, %rsi
2843
; CHECK-NEXT: callq bbb@PLT
29-
; CHECK-NEXT: jmp .LBB0_4
30-
; CHECK-NEXT: .LBB0_8: # %sw.bb7
44+
; CHECK-NEXT: decl %ebx
45+
; CHECK-NEXT: movl %ebx, %ecx
46+
; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8)
47+
; CHECK-NEXT: .p2align 4, 0x90
48+
; CHECK-NEXT: .LBB0_5: # %sw.bb7
49+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3150
; CHECK-NEXT: movl $535, %edi # imm = 0x217
51+
; CHECK-NEXT: movq %rax, %rsi
3252
; CHECK-NEXT: callq bbb@PLT
33-
; CHECK-NEXT: jmp .LBB0_4
34-
; CHECK-NEXT: .LBB0_9: # %sw.bb9
53+
; CHECK-NEXT: decl %ebx
54+
; CHECK-NEXT: movl %ebx, %ecx
55+
; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8)
56+
; CHECK-NEXT: .p2align 4, 0x90
57+
; CHECK-NEXT: .LBB0_6: # %sw.bb9
58+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3559
; CHECK-NEXT: movl $536, %edi # imm = 0x218
36-
; CHECK-NEXT: jmp .LBB0_3
37-
; CHECK-NEXT: .LBB0_10: # %sw.bb11
60+
; CHECK-NEXT: movq %rax, %rsi
61+
; CHECK-NEXT: callq ccc@PLT
62+
; CHECK-NEXT: decl %ebx
63+
; CHECK-NEXT: movl %ebx, %ecx
64+
; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8)
65+
; CHECK-NEXT: .p2align 4, 0x90
66+
; CHECK-NEXT: .LBB0_7: # %sw.bb11
67+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3868
; CHECK-NEXT: movl $658, %edi # imm = 0x292
69+
; CHECK-NEXT: movq %rax, %rsi
3970
; CHECK-NEXT: callq bbb@PLT
40-
; CHECK-NEXT: jmp .LBB0_4
41-
; CHECK-NEXT: .LBB0_11: # %for.cond.cleanup
42-
; CHECK-NEXT: movl $530, %edi # imm = 0x212
43-
; CHECK-NEXT: popq %rbx
44-
; CHECK-NEXT: .cfi_def_cfa_offset 8
45-
; CHECK-NEXT: jmp ccc@PLT # TAILCALL
71+
; CHECK-NEXT: decl %ebx
72+
; CHECK-NEXT: movl %ebx, %ecx
73+
; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8)
4674
entry:
4775
br label %for.body
4876

0 commit comments

Comments
 (0)