Skip to content

Commit da8a5f1

Browse files
ruilingsivan-shani
authored andcommitted
MachineScheduler: Reset next cluster candidate for each node (llvm#139513)
When a node is picked, we should reset its next cluster candidate to null before releasing its successors/predecessors.
1 parent 08375b9 commit da8a5f1

File tree

120 files changed

+10485
-10557
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

120 files changed

+10485
-10557
lines changed

llvm/lib/CodeGen/MachineScheduler.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -967,6 +967,12 @@ void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
967967

968968
/// releaseSuccessors - Call releaseSucc on each of SU's successors.
969969
void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
970+
// Reset the next successor, For example, we want to cluster A B C.
971+
// After A is picked, we will set B as next cluster succ, but if we pick
972+
// D instead of B after A, then we need to reset the next cluster succ because
973+
// we have decided to not pick the cluster candidate B during pickNode().
974+
// Leaving B as the NextClusterSucc just make things messy.
975+
NextClusterSucc = nullptr;
970976
for (SDep &Succ : SU->Succs)
971977
releaseSucc(SU, &Succ);
972978
}
@@ -1004,6 +1010,7 @@ void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
10041010

10051011
/// releasePredecessors - Call releasePred on each of SU's predecessors.
10061012
void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
1013+
NextClusterPred = nullptr;
10071014
for (SDep &Pred : SU->Preds)
10081015
releasePred(SU, &Pred);
10091016
}

llvm/test/CodeGen/AArch64/expand-select.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@ define void @foo(i32 %In1, <2 x i128> %In2, <2 x i128> %In3, ptr %Out) {
88
; CHECK-NEXT: fmov s0, wzr
99
; CHECK-NEXT: ldr x11, [sp]
1010
; CHECK-NEXT: fmov s1, w8
11-
; CHECK-NEXT: ldp x9, x10, [sp, #8]
11+
; CHECK-NEXT: ldp x8, x10, [sp, #8]
1212
; CHECK-NEXT: cmeq v0.4s, v1.4s, v0.4s
13-
; CHECK-NEXT: fmov w8, s0
14-
; CHECK-NEXT: tst w8, #0x1
15-
; CHECK-NEXT: csel x8, x5, x9, ne
13+
; CHECK-NEXT: fmov w9, s0
14+
; CHECK-NEXT: tst w9, #0x1
15+
; CHECK-NEXT: csel x8, x5, x8, ne
1616
; CHECK-NEXT: csel x9, x4, x11, ne
1717
; CHECK-NEXT: stp x9, x8, [x10, #16]
1818
; CHECK-NEXT: csel x8, x3, x7, ne
@@ -36,14 +36,14 @@ define void @bar(i32 %In1, <2 x i96> %In2, <2 x i96> %In3, ptr %Out) {
3636
; CHECK-NEXT: ldr x10, [sp, #16]
3737
; CHECK-NEXT: fmov s1, w8
3838
; CHECK-NEXT: cmeq v0.4s, v1.4s, v0.4s
39-
; CHECK-NEXT: fmov w8, s0
40-
; CHECK-NEXT: tst w8, #0x1
41-
; CHECK-NEXT: ldp x9, x8, [sp]
39+
; CHECK-NEXT: fmov w9, s0
40+
; CHECK-NEXT: tst w9, #0x1
41+
; CHECK-NEXT: ldp x8, x9, [sp]
4242
; CHECK-NEXT: csel x11, x2, x6, ne
4343
; CHECK-NEXT: str x11, [x10]
44-
; CHECK-NEXT: csel x9, x4, x9, ne
45-
; CHECK-NEXT: csel x8, x5, x8, ne
46-
; CHECK-NEXT: stur x9, [x10, #12]
44+
; CHECK-NEXT: csel x8, x4, x8, ne
45+
; CHECK-NEXT: stur x8, [x10, #12]
46+
; CHECK-NEXT: csel x8, x5, x9, ne
4747
; CHECK-NEXT: csel x9, x3, x7, ne
4848
; CHECK-NEXT: str w8, [x10, #20]
4949
; CHECK-NEXT: str w9, [x10, #8]

llvm/test/CodeGen/AArch64/extbinopload.ll

Lines changed: 43 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -667,30 +667,30 @@ define <16 x i32> @extrause_load(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) {
667667
; CHECK-NEXT: add x10, x3, #12
668668
; CHECK-NEXT: bic v1.8h, #255, lsl #8
669669
; CHECK-NEXT: ld1 { v0.s }[3], [x3], #4
670-
; CHECK-NEXT: ldr s3, [x0, #12]
671-
; CHECK-NEXT: ldp s2, s7, [x0, #4]
670+
; CHECK-NEXT: ldr s4, [x0, #12]
671+
; CHECK-NEXT: ldp s5, s2, [x2, #4]
672672
; CHECK-NEXT: ldr s6, [x2, #12]
673-
; CHECK-NEXT: ldp s5, s4, [x2, #4]
674-
; CHECK-NEXT: ld1 { v3.s }[1], [x11]
673+
; CHECK-NEXT: ldp s3, s7, [x0, #4]
674+
; CHECK-NEXT: ld1 { v4.s }[1], [x11]
675675
; CHECK-NEXT: ld1 { v6.s }[1], [x10]
676-
; CHECK-NEXT: ld1 { v2.s }[1], [x9]
677-
; CHECK-NEXT: ld1 { v4.s }[1], [x8]
676+
; CHECK-NEXT: ld1 { v2.s }[1], [x8]
678677
; CHECK-NEXT: ld1 { v5.s }[1], [x3]
679678
; CHECK-NEXT: add x8, x1, #8
679+
; CHECK-NEXT: ld1 { v3.s }[1], [x9]
680680
; CHECK-NEXT: ld1 { v7.s }[1], [x8]
681-
; CHECK-NEXT: uaddl v2.8h, v2.8b, v3.8b
682-
; CHECK-NEXT: ushll v4.8h, v4.8b, #0
683-
; CHECK-NEXT: uaddl v3.8h, v5.8b, v6.8b
681+
; CHECK-NEXT: ushll v2.8h, v2.8b, #0
682+
; CHECK-NEXT: uaddl v3.8h, v3.8b, v4.8b
683+
; CHECK-NEXT: uaddl v4.8h, v5.8b, v6.8b
684684
; CHECK-NEXT: uaddw v1.8h, v1.8h, v7.8b
685-
; CHECK-NEXT: uaddw2 v4.8h, v4.8h, v0.16b
686-
; CHECK-NEXT: ushll v0.4s, v2.4h, #3
687-
; CHECK-NEXT: ushll v5.4s, v3.4h, #3
685+
; CHECK-NEXT: uaddw2 v2.8h, v2.8h, v0.16b
686+
; CHECK-NEXT: ushll v0.4s, v3.4h, #3
687+
; CHECK-NEXT: ushll v5.4s, v4.4h, #3
688+
; CHECK-NEXT: ushll2 v4.4s, v4.8h, #3
688689
; CHECK-NEXT: ushll2 v3.4s, v3.8h, #3
689-
; CHECK-NEXT: ushll2 v2.4s, v2.8h, #3
690690
; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h
691-
; CHECK-NEXT: uaddw2 v1.4s, v2.4s, v1.8h
692-
; CHECK-NEXT: uaddw2 v3.4s, v3.4s, v4.8h
693-
; CHECK-NEXT: uaddw v2.4s, v5.4s, v4.4h
691+
; CHECK-NEXT: uaddw2 v1.4s, v3.4s, v1.8h
692+
; CHECK-NEXT: uaddw2 v3.4s, v4.4s, v2.8h
693+
; CHECK-NEXT: uaddw v2.4s, v5.4s, v2.4h
694694
; CHECK-NEXT: ret
695695
%lp1 = load <4 x i8>, ptr %p
696696
store <4 x i8> %lp1, ptr %z
@@ -1073,24 +1073,24 @@ define <16 x i32> @extrause_ext2(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) {
10731073
; CHECK-NEXT: ld1 { v6.s }[1], [x10]
10741074
; CHECK-NEXT: ld1 { v5.s }[1], [x9]
10751075
; CHECK-NEXT: ld1 { v7.s }[1], [x8]
1076-
; CHECK-NEXT: uaddl v16.8h, v2.8b, v3.8b
1077-
; CHECK-NEXT: uaddl v3.8h, v1.8b, v6.8b
1078-
; CHECK-NEXT: uaddl v2.8h, v4.8b, v5.8b
1076+
; CHECK-NEXT: uaddl v2.8h, v2.8b, v3.8b
1077+
; CHECK-NEXT: uaddl v1.8h, v1.8b, v6.8b
1078+
; CHECK-NEXT: uaddl v3.8h, v4.8b, v5.8b
10791079
; CHECK-NEXT: uaddl v4.8h, v0.8b, v7.8b
1080-
; CHECK-NEXT: ushll v0.4s, v16.4h, #3
1081-
; CHECK-NEXT: ushll2 v1.4s, v16.8h, #3
1082-
; CHECK-NEXT: ushll2 v18.4s, v16.8h, #0
1083-
; CHECK-NEXT: ushll v6.4s, v2.4h, #3
1084-
; CHECK-NEXT: ushll2 v7.4s, v2.8h, #3
1085-
; CHECK-NEXT: ushll2 v5.4s, v2.8h, #0
1080+
; CHECK-NEXT: ushll2 v0.4s, v2.8h, #0
1081+
; CHECK-NEXT: ushll v5.4s, v2.4h, #3
1082+
; CHECK-NEXT: ushll2 v16.4s, v2.8h, #3
1083+
; CHECK-NEXT: ushll v6.4s, v3.4h, #3
1084+
; CHECK-NEXT: ushll2 v7.4s, v3.8h, #3
10861085
; CHECK-NEXT: ushll v17.4s, v2.4h, #0
1087-
; CHECK-NEXT: uaddw2 v1.4s, v1.4s, v3.8h
1088-
; CHECK-NEXT: uaddw v0.4s, v0.4s, v3.4h
1086+
; CHECK-NEXT: ushll2 v18.4s, v3.8h, #0
1087+
; CHECK-NEXT: ushll v19.4s, v3.4h, #0
1088+
; CHECK-NEXT: stp q17, q0, [x4]
1089+
; CHECK-NEXT: uaddw v0.4s, v5.4s, v1.4h
1090+
; CHECK-NEXT: uaddw2 v1.4s, v16.4s, v1.8h
10891091
; CHECK-NEXT: uaddw2 v3.4s, v7.4s, v4.8h
10901092
; CHECK-NEXT: uaddw v2.4s, v6.4s, v4.4h
1091-
; CHECK-NEXT: ushll v4.4s, v16.4h, #0
1092-
; CHECK-NEXT: stp q17, q5, [x4, #32]
1093-
; CHECK-NEXT: stp q4, q18, [x4]
1093+
; CHECK-NEXT: stp q19, q18, [x4, #32]
10941094
; CHECK-NEXT: ret
10951095
%lp1 = load <4 x i8>, ptr %p
10961096
%p2 = getelementptr i8, ptr %p, i32 4
@@ -1176,19 +1176,20 @@ define <16 x i32> @extrause_shl(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) {
11761176
; CHECK-NEXT: ld1 { v5.s }[1], [x9]
11771177
; CHECK-NEXT: ld1 { v7.s }[1], [x8]
11781178
; CHECK-NEXT: uaddl v2.8h, v2.8b, v3.8b
1179+
; CHECK-NEXT: uaddl v1.8h, v1.8b, v6.8b
11791180
; CHECK-NEXT: uaddl v3.8h, v4.8b, v5.8b
1180-
; CHECK-NEXT: uaddl v4.8h, v1.8b, v6.8b
1181-
; CHECK-NEXT: ushll v5.4s, v2.4h, #3
1182-
; CHECK-NEXT: ushll2 v6.4s, v2.8h, #3
1183-
; CHECK-NEXT: uaddl v2.8h, v0.8b, v7.8b
1184-
; CHECK-NEXT: ushll v7.4s, v3.4h, #3
1185-
; CHECK-NEXT: ushll2 v16.4s, v3.8h, #3
1186-
; CHECK-NEXT: uaddw2 v1.4s, v6.4s, v4.8h
1187-
; CHECK-NEXT: uaddw v0.4s, v5.4s, v4.4h
1188-
; CHECK-NEXT: stp q5, q6, [x4]
1189-
; CHECK-NEXT: uaddw2 v3.4s, v16.4s, v2.8h
1190-
; CHECK-NEXT: uaddw v2.4s, v7.4s, v2.4h
1191-
; CHECK-NEXT: stp q7, q16, [x4, #32]
1181+
; CHECK-NEXT: uaddl v5.8h, v0.8b, v7.8b
1182+
; CHECK-NEXT: ushll v4.4s, v2.4h, #3
1183+
; CHECK-NEXT: ushll2 v2.4s, v2.8h, #3
1184+
; CHECK-NEXT: ushll v6.4s, v3.4h, #3
1185+
; CHECK-NEXT: ushll2 v7.4s, v3.8h, #3
1186+
; CHECK-NEXT: uaddw v0.4s, v4.4s, v1.4h
1187+
; CHECK-NEXT: uaddw2 v1.4s, v2.4s, v1.8h
1188+
; CHECK-NEXT: str q4, [x4]
1189+
; CHECK-NEXT: stp q2, q6, [x4, #16]
1190+
; CHECK-NEXT: uaddw2 v3.4s, v7.4s, v5.8h
1191+
; CHECK-NEXT: uaddw v2.4s, v6.4s, v5.4h
1192+
; CHECK-NEXT: str q7, [x4, #48]
11921193
; CHECK-NEXT: ret
11931194
%lp1 = load <4 x i8>, ptr %p
11941195
%p2 = getelementptr i8, ptr %p, i32 4

0 commit comments

Comments
 (0)