Skip to content

Commit 1f9fa54

Browse files
committed
[Taildup] Don't tail-duplicate loop header with multiple successors as its latches
when Taildup hit loop with multiple latches like: // 1 -> 2 <-> 3 | // \ <-> 4 | // \ <-> 5 | // \---> rest | it may transform this loop into multiple loops by duplicate loop header. However, this change may has little benefit while makes cfg much complex. In some uncommon cases, it causes large compile time regression (offered by @alexfh in D106056). This patch disable tail-duplicate of such cases. TestPlan: check-llvm Differential Revision: https://reviews.llvm.org/D110613
1 parent f8833ba commit 1f9fa54

File tree

2 files changed

+53
-52
lines changed

2 files changed

+53
-52
lines changed

llvm/lib/CodeGen/TailDuplicator.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,12 @@ static cl::opt<unsigned> TailDupIndirectBranchSize(
7070
"end with indirect branches."), cl::init(20),
7171
cl::Hidden);
7272

73+
static cl::opt<unsigned> TailDupJmpTableLoopSize(
74+
"tail-dup-jmptable-loop-size",
75+
cl::desc("Maximum loop latches to consider tail duplication that are "
76+
"successors of loop header."),
77+
cl::init(128), cl::Hidden);
78+
7379
static cl::opt<bool>
7480
TailDupVerify("tail-dup-verify",
7581
cl::desc("Verify sanity of PHI instructions during taildup"),
@@ -563,6 +569,29 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
563569
if (TailBB.isSuccessor(&TailBB))
564570
return false;
565571

572+
// When doing tail-duplication with jumptable loops like:
573+
// 1 -> 2 <-> 3 |
574+
// \ <-> 4 |
575+
// \ <-> 5 |
576+
// \ <-> ... |
577+
// \---> rest |
578+
// quadratic number of edges and much more loops are added to CFG. This
579+
// may cause compile time regression when jumptable is quiet large.
580+
// So set the limit on jumptable cases.
581+
auto isLargeJumpTableLoop = [](const MachineBasicBlock &TailBB) {
582+
const SmallPtrSet<const MachineBasicBlock *, 8> Preds(TailBB.pred_begin(),
583+
TailBB.pred_end());
584+
// Check the basic block has large number of successors, all of them only
585+
// have one successor which is the basic block itself.
586+
return llvm::count_if(
587+
TailBB.successors(), [&](const MachineBasicBlock *SuccBB) {
588+
return Preds.count(SuccBB) && SuccBB->succ_size() == 1;
589+
}) > TailDupJmpTableLoopSize;
590+
};
591+
592+
if (isLargeJumpTableLoop(TailBB))
593+
return false;
594+
566595
// Set the limit on the cost to duplicate. When optimizing for size,
567596
// duplicate only one, because one branch instruction can be eliminated to
568597
// compensate for the duplication.

llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll

Lines changed: 24 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,76 +1,48 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
2+
; RUN: llc < %s -tail-dup-jmptable-loop-size=5 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
33
define i8* @large_loop_switch(i8* %p) {
44
; CHECK-LABEL: large_loop_switch:
55
; CHECK: # %bb.0: # %entry
66
; CHECK-NEXT: pushq %rbx
77
; CHECK-NEXT: .cfi_def_cfa_offset 16
88
; CHECK-NEXT: .cfi_offset %rbx, -16
9-
; CHECK-NEXT: movq %rdi, %rax
9+
; CHECK-NEXT: movq %rdi, %rsi
1010
; CHECK-NEXT: movl $6, %ebx
11-
; CHECK-NEXT: movl %ebx, %ecx
12-
; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8)
13-
; CHECK-NEXT: .LBB0_1: # %for.cond.cleanup
14-
; CHECK-NEXT: movl $530, %edi # imm = 0x212
15-
; CHECK-NEXT: movq %rax, %rsi
16-
; CHECK-NEXT: popq %rbx
17-
; CHECK-NEXT: .cfi_def_cfa_offset 8
18-
; CHECK-NEXT: jmp ccc@PLT # TAILCALL
19-
; CHECK-NEXT: .p2align 4, 0x90
11+
; CHECK-NEXT: movl %ebx, %eax
12+
; CHECK-NEXT: jmpq *.LJTI0_0(,%rax,8)
2013
; CHECK-NEXT: .LBB0_2: # %sw.bb1
21-
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
22-
; CHECK-NEXT: .cfi_def_cfa_offset 16
2314
; CHECK-NEXT: movl $531, %edi # imm = 0x213
24-
; CHECK-NEXT: movq %rax, %rsi
15+
; CHECK-NEXT: .LBB0_3: # %for.body
2516
; CHECK-NEXT: callq ccc@PLT
17+
; CHECK-NEXT: .LBB0_4: # %for.body
18+
; CHECK-NEXT: movq %rax, %rsi
2619
; CHECK-NEXT: decl %ebx
27-
; CHECK-NEXT: movl %ebx, %ecx
28-
; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8)
29-
; CHECK-NEXT: .p2align 4, 0x90
30-
; CHECK-NEXT: .LBB0_3: # %sw.bb3
31-
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
20+
; CHECK-NEXT: movl %ebx, %eax
21+
; CHECK-NEXT: jmpq *.LJTI0_0(,%rax,8)
22+
; CHECK-NEXT: .LBB0_5: # %sw.bb3
3223
; CHECK-NEXT: movl $532, %edi # imm = 0x214
33-
; CHECK-NEXT: movq %rax, %rsi
3424
; CHECK-NEXT: callq bbb@PLT
35-
; CHECK-NEXT: decl %ebx
36-
; CHECK-NEXT: movl %ebx, %ecx
37-
; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8)
38-
; CHECK-NEXT: .p2align 4, 0x90
39-
; CHECK-NEXT: .LBB0_4: # %sw.bb5
40-
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
25+
; CHECK-NEXT: jmp .LBB0_4
26+
; CHECK-NEXT: .LBB0_7: # %sw.bb5
4127
; CHECK-NEXT: movl $533, %edi # imm = 0x215
42-
; CHECK-NEXT: movq %rax, %rsi
4328
; CHECK-NEXT: callq bbb@PLT
44-
; CHECK-NEXT: decl %ebx
45-
; CHECK-NEXT: movl %ebx, %ecx
46-
; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8)
47-
; CHECK-NEXT: .p2align 4, 0x90
48-
; CHECK-NEXT: .LBB0_5: # %sw.bb7
49-
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
29+
; CHECK-NEXT: jmp .LBB0_4
30+
; CHECK-NEXT: .LBB0_8: # %sw.bb7
5031
; CHECK-NEXT: movl $535, %edi # imm = 0x217
51-
; CHECK-NEXT: movq %rax, %rsi
5232
; CHECK-NEXT: callq bbb@PLT
53-
; CHECK-NEXT: decl %ebx
54-
; CHECK-NEXT: movl %ebx, %ecx
55-
; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8)
56-
; CHECK-NEXT: .p2align 4, 0x90
57-
; CHECK-NEXT: .LBB0_6: # %sw.bb9
58-
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
33+
; CHECK-NEXT: jmp .LBB0_4
34+
; CHECK-NEXT: .LBB0_9: # %sw.bb9
5935
; CHECK-NEXT: movl $536, %edi # imm = 0x218
60-
; CHECK-NEXT: movq %rax, %rsi
61-
; CHECK-NEXT: callq ccc@PLT
62-
; CHECK-NEXT: decl %ebx
63-
; CHECK-NEXT: movl %ebx, %ecx
64-
; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8)
65-
; CHECK-NEXT: .p2align 4, 0x90
66-
; CHECK-NEXT: .LBB0_7: # %sw.bb11
67-
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
36+
; CHECK-NEXT: jmp .LBB0_3
37+
; CHECK-NEXT: .LBB0_10: # %sw.bb11
6838
; CHECK-NEXT: movl $658, %edi # imm = 0x292
69-
; CHECK-NEXT: movq %rax, %rsi
7039
; CHECK-NEXT: callq bbb@PLT
71-
; CHECK-NEXT: decl %ebx
72-
; CHECK-NEXT: movl %ebx, %ecx
73-
; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8)
40+
; CHECK-NEXT: jmp .LBB0_4
41+
; CHECK-NEXT: .LBB0_11: # %for.cond.cleanup
42+
; CHECK-NEXT: movl $530, %edi # imm = 0x212
43+
; CHECK-NEXT: popq %rbx
44+
; CHECK-NEXT: .cfi_def_cfa_offset 8
45+
; CHECK-NEXT: jmp ccc@PLT # TAILCALL
7446
entry:
7547
br label %for.body
7648

0 commit comments

Comments
 (0)