Skip to content

Commit 37edd2c

Browse files
authored
[X86] combineEXTRACT_SUBVECTOR - generalize extract_subvector(broadcast(x),c) fold with IsElementEquivalent (#141963)
Instead of matching the broadcast nodes directly, let IsElementEquivalent handle it to allow BITCAST handling, which we already have with IsElementEquivalent
1 parent 417e43a commit 37edd2c

File tree

3 files changed

+22
-41
lines changed

3 files changed

+22
-41
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -59549,6 +59549,7 @@ static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
5954959549
unsigned SizeInBits = VT.getSizeInBits();
5955059550
unsigned InSizeInBits = InVecVT.getSizeInBits();
5955159551
unsigned NumSubElts = VT.getVectorNumElements();
59552+
unsigned NumInElts = InVecVT.getVectorNumElements();
5955259553
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5955359554
SDLoc DL(N);
5955459555

@@ -59615,22 +59616,22 @@ static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
5961559616
}
5961659617
}
5961759618

59618-
// If we're extracting an upper subvector from a broadcast we should just
59619-
// extract the lowest subvector instead which should allow
59619+
// If we're extracting an upper subvector see if we'd get the same elements if
59620+
// we extracted the lowest subvector instead which should allow
5962059621
// SimplifyDemandedVectorElts do more simplifications.
59621-
if (IdxVal != 0 && (InVec.getOpcode() == X86ISD::VBROADCAST ||
59622-
InVec.getOpcode() == X86ISD::VBROADCAST_LOAD ||
59623-
DAG.isSplatValue(InVec, /*AllowUndefs*/ false)))
59624-
return extractSubVector(InVec, 0, DAG, DL, SizeInBits);
59622+
if (IdxVal != 0) {
59623+
bool AllEquiv = all_of(seq<unsigned>(NumSubElts), [&](unsigned I) {
59624+
return IsElementEquivalent(NumInElts, InVec, InVec, I, I + IdxVal);
59625+
});
59626+
if (AllEquiv)
59627+
return extractSubVector(InVec, 0, DAG, DL, SizeInBits);
59628+
}
5962559629

5962659630
// Check if we're extracting a whole broadcasted subvector.
5962759631
if (InVec.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) {
5962859632
auto *MemIntr = cast<MemIntrinsicSDNode>(InVec);
5962959633
EVT MemVT = MemIntr->getMemoryVT();
5963059634
if (MemVT == VT) {
59631-
// Just use the lowest subvector.
59632-
if (IdxVal != 0)
59633-
return extractSubVector(InVec, 0, DAG, DL, SizeInBits);
5963459635
// If this is the only use, we can replace with a regular load (this may
5963559636
// have been missed by SimplifyDemandedVectorElts due to extra uses of the
5963659637
// memory chain).

llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll

Lines changed: 8 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3951,7 +3951,6 @@ define void @vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32(ptr %in
39513951
; AVX512F-LABEL: vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32:
39523952
; AVX512F: # %bb.0:
39533953
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm0
3954-
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
39553954
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
39563955
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
39573956
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -3962,7 +3961,6 @@ define void @vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32(ptr %in
39623961
; AVX512DQ-LABEL: vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32:
39633962
; AVX512DQ: # %bb.0:
39643963
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm0
3965-
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
39663964
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
39673965
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
39683966
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4004,7 +4002,7 @@ define void @vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16(ptr %in
40044002
;
40054003
; AVX-LABEL: vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16:
40064004
; AVX: # %bb.0:
4007-
; AVX-NEXT: vbroadcastss (%rdi), %ymm0
4005+
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,0,0,0]
40084006
; AVX-NEXT: vpaddb 48(%rsi), %xmm0, %xmm1
40094007
; AVX-NEXT: vpaddb 32(%rsi), %xmm0, %xmm2
40104008
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm3
@@ -4013,7 +4011,6 @@ define void @vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16(ptr %in
40134011
; AVX-NEXT: vmovdqa %xmm3, 16(%rdx)
40144012
; AVX-NEXT: vmovdqa %xmm2, 32(%rdx)
40154013
; AVX-NEXT: vmovdqa %xmm1, 48(%rdx)
4016-
; AVX-NEXT: vzeroupper
40174014
; AVX-NEXT: retq
40184015
;
40194016
; AVX2-LABEL: vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16:
@@ -4029,7 +4026,6 @@ define void @vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16(ptr %in
40294026
; AVX512F-LABEL: vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16:
40304027
; AVX512F: # %bb.0:
40314028
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm0
4032-
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
40334029
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
40344030
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
40354031
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4040,7 +4036,6 @@ define void @vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16(ptr %in
40404036
; AVX512DQ-LABEL: vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16:
40414037
; AVX512DQ: # %bb.0:
40424038
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm0
4043-
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
40444039
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
40454040
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
40464041
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4082,7 +4077,7 @@ define void @vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8(ptr %in.e
40824077
;
40834078
; AVX-LABEL: vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8:
40844079
; AVX: # %bb.0:
4085-
; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
4080+
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,1,0,1]
40864081
; AVX-NEXT: vpaddb 48(%rsi), %xmm0, %xmm1
40874082
; AVX-NEXT: vpaddb 32(%rsi), %xmm0, %xmm2
40884083
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm3
@@ -4091,7 +4086,6 @@ define void @vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8(ptr %in.e
40914086
; AVX-NEXT: vmovdqa %xmm3, 16(%rdx)
40924087
; AVX-NEXT: vmovdqa %xmm2, 32(%rdx)
40934088
; AVX-NEXT: vmovdqa %xmm1, 48(%rdx)
4094-
; AVX-NEXT: vzeroupper
40954089
; AVX-NEXT: retq
40964090
;
40974091
; AVX2-LABEL: vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8:
@@ -4107,7 +4101,6 @@ define void @vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8(ptr %in.e
41074101
; AVX512F-LABEL: vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8:
41084102
; AVX512F: # %bb.0:
41094103
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm0
4110-
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
41114104
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
41124105
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
41134106
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4118,7 +4111,6 @@ define void @vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8(ptr %in.e
41184111
; AVX512DQ-LABEL: vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8:
41194112
; AVX512DQ: # %bb.0:
41204113
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm0
4121-
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
41224114
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
41234115
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
41244116
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4184,7 +4176,6 @@ define void @vec512_i8_widen_to_i128_factor16_broadcast_to_v4i128_factor4(ptr %i
41844176
; AVX512F-LABEL: vec512_i8_widen_to_i128_factor16_broadcast_to_v4i128_factor4:
41854177
; AVX512F: # %bb.0:
41864178
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm0
4187-
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
41884179
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
41894180
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
41904181
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4195,7 +4186,6 @@ define void @vec512_i8_widen_to_i128_factor16_broadcast_to_v4i128_factor4(ptr %i
41954186
; AVX512DQ-LABEL: vec512_i8_widen_to_i128_factor16_broadcast_to_v4i128_factor4:
41964187
; AVX512DQ: # %bb.0:
41974188
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm0
4198-
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
41994189
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
42004190
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
42014191
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4338,7 +4328,6 @@ define void @vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16(ptr %i
43384328
; AVX512F-LABEL: vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16:
43394329
; AVX512F: # %bb.0:
43404330
; AVX512F-NEXT: vpbroadcastw (%rdi), %ymm0
4341-
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
43424331
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
43434332
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
43444333
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4349,7 +4338,6 @@ define void @vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16(ptr %i
43494338
; AVX512DQ-LABEL: vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16:
43504339
; AVX512DQ: # %bb.0:
43514340
; AVX512DQ-NEXT: vpbroadcastw (%rdi), %ymm0
4352-
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
43534341
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
43544342
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
43554343
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4418,7 +4406,6 @@ define void @vec512_i16_widen_to_i64_factor4_broadcast_to_v8i64_factor8(ptr %in.
44184406
; AVX512F-LABEL: vec512_i16_widen_to_i64_factor4_broadcast_to_v8i64_factor8:
44194407
; AVX512F: # %bb.0:
44204408
; AVX512F-NEXT: vpbroadcastw (%rdi), %ymm0
4421-
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
44224409
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
44234410
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
44244411
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4429,7 +4416,6 @@ define void @vec512_i16_widen_to_i64_factor4_broadcast_to_v8i64_factor8(ptr %in.
44294416
; AVX512DQ-LABEL: vec512_i16_widen_to_i64_factor4_broadcast_to_v8i64_factor8:
44304417
; AVX512DQ: # %bb.0:
44314418
; AVX512DQ-NEXT: vpbroadcastw (%rdi), %ymm0
4432-
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
44334419
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
44344420
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
44354421
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4497,7 +4483,6 @@ define void @vec512_i16_widen_to_i128_factor8_broadcast_to_v4i128_factor4(ptr %i
44974483
; AVX512F-LABEL: vec512_i16_widen_to_i128_factor8_broadcast_to_v4i128_factor4:
44984484
; AVX512F: # %bb.0:
44994485
; AVX512F-NEXT: vpbroadcastw (%rdi), %ymm0
4500-
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
45014486
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
45024487
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
45034488
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4508,7 +4493,6 @@ define void @vec512_i16_widen_to_i128_factor8_broadcast_to_v4i128_factor4(ptr %i
45084493
; AVX512DQ-LABEL: vec512_i16_widen_to_i128_factor8_broadcast_to_v4i128_factor4:
45094494
; AVX512DQ: # %bb.0:
45104495
; AVX512DQ-NEXT: vpbroadcastw (%rdi), %ymm0
4511-
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
45124496
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
45134497
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
45144498
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4654,7 +4638,7 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in.
46544638
;
46554639
; AVX512F-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
46564640
; AVX512F: # %bb.0:
4657-
; AVX512F-NEXT: vpbroadcastd (%rdi), %zmm0
4641+
; AVX512F-NEXT: vpbroadcastd (%rdi), %ymm0
46584642
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
46594643
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
46604644
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4664,7 +4648,7 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in.
46644648
;
46654649
; AVX512DQ-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
46664650
; AVX512DQ: # %bb.0:
4667-
; AVX512DQ-NEXT: vpbroadcastd (%rdi), %zmm0
4651+
; AVX512DQ-NEXT: vpbroadcastd (%rdi), %ymm0
46684652
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
46694653
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
46704654
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4731,7 +4715,7 @@ define void @vec512_i32_widen_to_i128_factor4_broadcast_to_v4i128_factor4(ptr %i
47314715
;
47324716
; AVX512F-LABEL: vec512_i32_widen_to_i128_factor4_broadcast_to_v4i128_factor4:
47334717
; AVX512F: # %bb.0:
4734-
; AVX512F-NEXT: vpbroadcastd (%rdi), %zmm0
4718+
; AVX512F-NEXT: vpbroadcastd (%rdi), %ymm0
47354719
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
47364720
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
47374721
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4741,7 +4725,7 @@ define void @vec512_i32_widen_to_i128_factor4_broadcast_to_v4i128_factor4(ptr %i
47414725
;
47424726
; AVX512DQ-LABEL: vec512_i32_widen_to_i128_factor4_broadcast_to_v4i128_factor4:
47434727
; AVX512DQ: # %bb.0:
4744-
; AVX512DQ-NEXT: vpbroadcastd (%rdi), %zmm0
4728+
; AVX512DQ-NEXT: vpbroadcastd (%rdi), %ymm0
47454729
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
47464730
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
47474731
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4886,7 +4870,7 @@ define void @vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4(ptr %i
48864870
;
48874871
; AVX512F-LABEL: vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4:
48884872
; AVX512F: # %bb.0:
4889-
; AVX512F-NEXT: vpbroadcastq (%rdi), %zmm0
4873+
; AVX512F-NEXT: vpbroadcastq (%rdi), %ymm0
48904874
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
48914875
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
48924876
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -4896,7 +4880,7 @@ define void @vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4(ptr %i
48964880
;
48974881
; AVX512DQ-LABEL: vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4:
48984882
; AVX512DQ: # %bb.0:
4899-
; AVX512DQ-NEXT: vpbroadcastq (%rdi), %zmm0
4883+
; AVX512DQ-NEXT: vpbroadcastq (%rdi), %ymm0
49004884
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
49014885
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
49024886
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)

llvm/test/CodeGen/X86/avx512-vbroadcasti128.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -134,9 +134,8 @@ define <16 x i32> @test_broadcast_4i32_16i32(ptr%p) nounwind {
134134
define <32 x i16> @test_broadcast_8i16_32i16(ptr%p) nounwind {
135135
; X64-AVX512VL-LABEL: test_broadcast_8i16_32i16:
136136
; X64-AVX512VL: ## %bb.0:
137-
; X64-AVX512VL-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
137+
; X64-AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
138138
; X64-AVX512VL-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
139-
; X64-AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
140139
; X64-AVX512VL-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
141140
; X64-AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
142141
; X64-AVX512VL-NEXT: retq
@@ -149,9 +148,8 @@ define <32 x i16> @test_broadcast_8i16_32i16(ptr%p) nounwind {
149148
;
150149
; X64-AVX512DQVL-LABEL: test_broadcast_8i16_32i16:
151150
; X64-AVX512DQVL: ## %bb.0:
152-
; X64-AVX512DQVL-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
151+
; X64-AVX512DQVL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
153152
; X64-AVX512DQVL-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
154-
; X64-AVX512DQVL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
155153
; X64-AVX512DQVL-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
156154
; X64-AVX512DQVL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
157155
; X64-AVX512DQVL-NEXT: retq
@@ -164,9 +162,8 @@ define <32 x i16> @test_broadcast_8i16_32i16(ptr%p) nounwind {
164162
define <64 x i8> @test_broadcast_16i8_64i8(ptr%p) nounwind {
165163
; X64-AVX512VL-LABEL: test_broadcast_16i8_64i8:
166164
; X64-AVX512VL: ## %bb.0:
167-
; X64-AVX512VL-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
165+
; X64-AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
168166
; X64-AVX512VL-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
169-
; X64-AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
170167
; X64-AVX512VL-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
171168
; X64-AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
172169
; X64-AVX512VL-NEXT: retq
@@ -179,9 +176,8 @@ define <64 x i8> @test_broadcast_16i8_64i8(ptr%p) nounwind {
179176
;
180177
; X64-AVX512DQVL-LABEL: test_broadcast_16i8_64i8:
181178
; X64-AVX512DQVL: ## %bb.0:
182-
; X64-AVX512DQVL-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
179+
; X64-AVX512DQVL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
183180
; X64-AVX512DQVL-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
184-
; X64-AVX512DQVL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
185181
; X64-AVX512DQVL-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
186182
; X64-AVX512DQVL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
187183
; X64-AVX512DQVL-NEXT: retq

0 commit comments

Comments
 (0)