Skip to content

Commit c60c13f

Browse files
committed
[SLP] Improve reordering in presence of constant only nodes.
We can skip the analysis of the constant nodes, their order should not affect the ordering of the trees/subtrees. Differential Revision: https://reviews.llvm.org/D127775
1 parent 60e3599 commit c60c13f

File tree

3 files changed

+17
-19
lines changed

3 files changed

+17
-19
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3717,10 +3717,7 @@ void BoUpSLP::reorderTopToBottom() {
37173717
EI.UserTE->isAltShuffle() && EI.UserTE->Idx != 0;
37183718
}))
37193719
return;
3720-
if (UserTE->UserTreeIndices.empty())
3721-
UserTE = nullptr;
3722-
else
3723-
UserTE = UserTE->UserTreeIndices.back().UserTE;
3720+
UserTE = UserTE->UserTreeIndices.back().UserTE;
37243721
++Cnt;
37253722
}
37263723
VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
@@ -3885,15 +3882,17 @@ bool BoUpSLP::canReorderOperands(
38853882
}
38863883
ArrayRef<Value *> VL = UserTE->getOperand(I);
38873884
TreeEntry *Gather = nullptr;
3888-
if (count_if(ReorderableGathers, [VL, &Gather](TreeEntry *TE) {
3889-
assert(TE->State != TreeEntry::Vectorize &&
3890-
"Only non-vectorized nodes are expected.");
3891-
if (TE->isSame(VL)) {
3892-
Gather = TE;
3893-
return true;
3894-
}
3895-
return false;
3896-
}) > 1)
3885+
if (count_if(ReorderableGathers,
3886+
[VL, &Gather](TreeEntry *TE) {
3887+
assert(TE->State != TreeEntry::Vectorize &&
3888+
"Only non-vectorized nodes are expected.");
3889+
if (TE->isSame(VL)) {
3890+
Gather = TE;
3891+
return true;
3892+
}
3893+
return false;
3894+
}) > 1 &&
3895+
!all_of(VL, isConstant))
38973896
return false;
38983897
if (Gather)
38993898
GatherOps.push_back(Gather);

llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,13 @@ define void @test() {
88
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP1]] to <4 x i8>*
99
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, <4 x i8>* [[TMP3]], align 1
1010
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
11-
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
12-
; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> zeroinitializer, [[SHUFFLE]]
11+
; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP5]]
1312
; CHECK-NEXT: [[TMP7:%.*]] = shl nsw <4 x i32> [[TMP6]], zeroinitializer
1413
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], zeroinitializer
1514
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
1615
; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP9]]
1716
; CHECK-NEXT: [[TMP11:%.*]] = sub nsw <4 x i32> [[TMP8]], [[TMP9]]
18-
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
17+
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> <i32 1, i32 4, i32 3, i32 6>
1918
; CHECK-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP12]]
2019
; CHECK-NEXT: [[TMP14:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP12]]
2120
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>

llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@ define void @test() {
1717
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP2]], i32 1
1818
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> <float poison, float 0.000000e+00>, float [[TMP2]], i32 0
1919
; CHECK-NEXT: [[TMP8:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP6]], <2 x float> [[TMP7]])
20-
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
2120
; CHECK-NEXT: br i1 false, label [[BB2:%.*]], label [[BB3:%.*]]
2221
; CHECK: bb2:
23-
; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x float> [[SHUFFLE]], zeroinitializer
22+
; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x float> [[TMP8]], zeroinitializer
2423
; CHECK-NEXT: br label [[BB3]]
2524
; CHECK: bb3:
2625
; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x float> [ [[TMP9]], [[BB2]] ], [ zeroinitializer, [[BB1]] ]
27-
; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x float> [[TMP1]], [[TMP10]]
26+
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
27+
; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x float> [[TMP1]], [[SHUFFLE]]
2828
; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x float> [[TMP11]], zeroinitializer
2929
; CHECK-NEXT: [[TMP13:%.*]] = fsub <2 x float> [[TMP12]], zeroinitializer
3030
; CHECK-NEXT: [[TMP14:%.*]] = fsub <2 x float> [[TMP13]], zeroinitializer

0 commit comments

Comments
 (0)