Skip to content

Commit 6d88343

Browse files
authored
[IA] Add support for [de]interleave{4,6,8} (#141512)
This teaches the interleaved access pass to the lower the intrinsics for factors 4,6 and 8 added in #139893 to target intrinsics. Because factors 4 and 8 could either have been recursively [de]interleaved or have just been a single intrinsic, we need to check that it's the former it before reshuffling around the values via interleaveLeafValues. After this patch, we can teach the loop vectorizer to emit a single interleave intrinsic for factors 2 through to 8, and then we can remove the recursive interleaving matching in interleaved access pass.
1 parent 5ab944a commit 6d88343

File tree

6 files changed

+531
-42
lines changed

6 files changed

+531
-42
lines changed

llvm/lib/CodeGen/InterleavedAccessPass.cpp

Lines changed: 57 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,36 @@ bool InterleavedAccessImpl::lowerInterleavedStore(
571571
return true;
572572
}
573573

574+
static bool isInterleaveIntrinsic(Intrinsic::ID IID) {
575+
switch (IID) {
576+
case Intrinsic::vector_interleave2:
577+
case Intrinsic::vector_interleave3:
578+
case Intrinsic::vector_interleave4:
579+
case Intrinsic::vector_interleave5:
580+
case Intrinsic::vector_interleave6:
581+
case Intrinsic::vector_interleave7:
582+
case Intrinsic::vector_interleave8:
583+
return true;
584+
default:
585+
return false;
586+
}
587+
}
588+
589+
static bool isDeinterleaveIntrinsic(Intrinsic::ID IID) {
590+
switch (IID) {
591+
case Intrinsic::vector_deinterleave2:
592+
case Intrinsic::vector_deinterleave3:
593+
case Intrinsic::vector_deinterleave4:
594+
case Intrinsic::vector_deinterleave5:
595+
case Intrinsic::vector_deinterleave6:
596+
case Intrinsic::vector_deinterleave7:
597+
case Intrinsic::vector_deinterleave8:
598+
return true;
599+
default:
600+
return false;
601+
}
602+
}
603+
574604
static unsigned getIntrinsicFactor(const IntrinsicInst *II) {
575605
switch (II->getIntrinsicID()) {
576606
case Intrinsic::vector_deinterleave2:
@@ -579,12 +609,21 @@ static unsigned getIntrinsicFactor(const IntrinsicInst *II) {
579609
case Intrinsic::vector_deinterleave3:
580610
case Intrinsic::vector_interleave3:
581611
return 3;
612+
case Intrinsic::vector_deinterleave4:
613+
case Intrinsic::vector_interleave4:
614+
return 4;
582615
case Intrinsic::vector_deinterleave5:
583616
case Intrinsic::vector_interleave5:
584617
return 5;
618+
case Intrinsic::vector_deinterleave6:
619+
case Intrinsic::vector_interleave6:
620+
return 6;
585621
case Intrinsic::vector_deinterleave7:
586622
case Intrinsic::vector_interleave7:
587623
return 7;
624+
case Intrinsic::vector_deinterleave8:
625+
case Intrinsic::vector_interleave8:
626+
return 8;
588627
default:
589628
llvm_unreachable("Unexpected intrinsic");
590629
}
@@ -605,10 +644,9 @@ static unsigned getIntrinsicFactor(const IntrinsicInst *II) {
605644
// to reorder them by interleaving these values.
606645
static void interleaveLeafValues(MutableArrayRef<Value *> SubLeaves) {
607646
unsigned NumLeaves = SubLeaves.size();
608-
if (NumLeaves == 2 || !isPowerOf2_64(NumLeaves))
609-
return;
610-
611647
assert(isPowerOf2_32(NumLeaves) && NumLeaves > 1);
648+
if (NumLeaves == 2)
649+
return;
612650

613651
const unsigned HalfLeaves = NumLeaves / 2;
614652
// Visit the sub-trees.
@@ -627,10 +665,7 @@ static void interleaveLeafValues(MutableArrayRef<Value *> SubLeaves) {
627665
static bool
628666
getVectorInterleaveFactor(IntrinsicInst *II, SmallVectorImpl<Value *> &Operands,
629667
SmallVectorImpl<Instruction *> &DeadInsts) {
630-
assert(II->getIntrinsicID() == Intrinsic::vector_interleave2 ||
631-
II->getIntrinsicID() == Intrinsic::vector_interleave3 ||
632-
II->getIntrinsicID() == Intrinsic::vector_interleave5 ||
633-
II->getIntrinsicID() == Intrinsic::vector_interleave7);
668+
assert(isInterleaveIntrinsic(II->getIntrinsicID()));
634669

635670
// Visit with BFS
636671
SmallVector<IntrinsicInst *, 8> Queue;
@@ -660,24 +695,25 @@ getVectorInterleaveFactor(IntrinsicInst *II, SmallVectorImpl<Value *> &Operands,
660695
}
661696

662697
const unsigned Factor = Operands.size();
663-
// Currently we only recognize factors of 3, 5, 7, and powers of 2.
698+
// Currently we only recognize factors 2...8 and other powers of 2.
664699
// FIXME: should we assert here instead?
665700
if (Factor <= 1 ||
666701
(!isPowerOf2_32(Factor) && Factor != getIntrinsicFactor(II)))
667702
return false;
668703

669-
interleaveLeafValues(Operands);
704+
// Recursively interleaved factors need to have their values reordered
705+
// TODO: Remove once the loop vectorizer no longer recursively interleaves
706+
// factors 4 + 8
707+
if (isPowerOf2_32(Factor) && getIntrinsicFactor(II) == 2)
708+
interleaveLeafValues(Operands);
670709
return true;
671710
}
672711

673712
static bool
674713
getVectorDeinterleaveFactor(IntrinsicInst *II,
675714
SmallVectorImpl<Value *> &Results,
676715
SmallVectorImpl<Instruction *> &DeadInsts) {
677-
assert(II->getIntrinsicID() == Intrinsic::vector_deinterleave2 ||
678-
II->getIntrinsicID() == Intrinsic::vector_deinterleave3 ||
679-
II->getIntrinsicID() == Intrinsic::vector_deinterleave5 ||
680-
II->getIntrinsicID() == Intrinsic::vector_deinterleave7);
716+
assert(isDeinterleaveIntrinsic(II->getIntrinsicID()));
681717
using namespace PatternMatch;
682718
if (!II->hasNUses(getIntrinsicFactor(II)))
683719
return false;
@@ -737,13 +773,17 @@ getVectorDeinterleaveFactor(IntrinsicInst *II,
737773
}
738774

739775
const unsigned Factor = Results.size();
740-
// Currently we only recognize factors of 3, 5, 7, and powers of 2.
776+
// Currently we only recognize factors of 2...8 and other powers of 2.
741777
// FIXME: should we assert here instead?
742778
if (Factor <= 1 ||
743779
(!isPowerOf2_32(Factor) && Factor != getIntrinsicFactor(II)))
744780
return 0;
745781

746-
interleaveLeafValues(Results);
782+
// Recursively interleaved factors need to have their values reordered
783+
// TODO: Remove once the loop vectorizer no longer recursively interleaves
784+
// factors 4 + 8
785+
if (isPowerOf2_32(Factor) && getIntrinsicFactor(II) == 2)
786+
interleaveLeafValues(Results);
747787
return true;
748788
}
749789

@@ -902,24 +942,10 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) {
902942
Changed |= lowerInterleavedStore(&I, DeadInsts);
903943

904944
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
905-
// At present, we only have intrinsics to represent (de)interleaving
906-
// with a factor of 2,3,5 and 7.
907-
switch (II->getIntrinsicID()) {
908-
case Intrinsic::vector_deinterleave2:
909-
case Intrinsic::vector_deinterleave3:
910-
case Intrinsic::vector_deinterleave5:
911-
case Intrinsic::vector_deinterleave7:
945+
if (isDeinterleaveIntrinsic(II->getIntrinsicID()))
912946
Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts);
913-
break;
914-
case Intrinsic::vector_interleave2:
915-
case Intrinsic::vector_interleave3:
916-
case Intrinsic::vector_interleave5:
917-
case Intrinsic::vector_interleave7:
947+
else if (isInterleaveIntrinsic(II->getIntrinsicID()))
918948
Changed |= lowerInterleaveIntrinsic(II, DeadInsts);
919-
break;
920-
default:
921-
break;
922-
}
923949
}
924950
}
925951

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,26 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_fact
279279
; CHECK: # %bb.0:
280280
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
281281
; CHECK-NEXT: vlseg4e8.v v8, (a0)
282+
; CHECK-NEXT: ret
283+
%vec = load <32 x i8>, ptr %p
284+
%d0 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.vector.deinterleave4(<32 x i8> %vec)
285+
%t0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 0
286+
%t1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 1
287+
%t2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 2
288+
%t3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 3
289+
%res0 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } poison, <8 x i8> %t0, 0
290+
%res1 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res0, <8 x i8> %t1, 1
291+
%res2 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res1, <8 x i8> %t2, 2
292+
%res3 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res2, <8 x i8> %t3, 3
293+
ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3
294+
}
295+
296+
; TODO: Remove once recursive deinterleaving support is removed
297+
define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor4_recursive(ptr %p) {
298+
; CHECK-LABEL: vector_deinterleave_load_factor4_recursive:
299+
; CHECK: # %bb.0:
300+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
301+
; CHECK-NEXT: vlseg4e8.v v8, (a0)
282302
; CHECK-NEXT: ret
283303
%vec = load <32 x i8>, ptr %p
284304
%d0 = call {<16 x i8>, <16 x i8>} @llvm.vector.deinterleave2.v32i8(<32 x i8> %vec)
@@ -319,6 +339,29 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave
319339
ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res4
320340
}
321341

342+
define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor6(ptr %p) {
343+
; CHECK-LABEL: vector_deinterleave_load_factor6:
344+
; CHECK: # %bb.0:
345+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
346+
; CHECK-NEXT: vlseg6e8.v v8, (a0)
347+
; CHECK-NEXT: ret
348+
%vec = load <48 x i8>, ptr %p
349+
%d0 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.vector.deinterleave6(<48 x i8> %vec)
350+
%t0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 0
351+
%t1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 1
352+
%t2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 2
353+
%t3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 3
354+
%t4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 4
355+
%t5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 5
356+
%res0 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } poison, <8 x i8> %t0, 0
357+
%res1 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res0, <8 x i8> %t1, 1
358+
%res2 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res1, <8 x i8> %t2, 2
359+
%res3 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res2, <8 x i8> %t3, 3
360+
%res4 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3, <8 x i8> %t4, 4
361+
%res5 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res4, <8 x i8> %t5, 5
362+
ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res5
363+
}
364+
322365
define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor7(ptr %p) {
323366
; CHECK-LABEL: vector_deinterleave_load_factor7:
324367
; CHECK: # %bb.0:
@@ -339,14 +382,43 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
339382
%res2 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res1, <8 x i8> %t2, 2
340383
%res3 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res2, <8 x i8> %t3, 3
341384
%res4 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3, <8 x i8> %t4, 4
342-
%res5 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3, <8 x i8> %t5, 5
343-
%res6 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3, <8 x i8> %t6, 6
385+
%res5 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res4, <8 x i8> %t5, 5
386+
%res6 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res5, <8 x i8> %t6, 6
344387
ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res6
345388
}
346389

347-
define {<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>} @vector_deinterleave_load_factor8(ptr %ptr) {
390+
define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor8(ptr %p) {
348391
; CHECK-LABEL: vector_deinterleave_load_factor8:
349392
; CHECK: # %bb.0:
393+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
394+
; CHECK-NEXT: vlseg8e8.v v8, (a0)
395+
; CHECK-NEXT: vmv1r.v v15, v14
396+
; CHECK-NEXT: ret
397+
%vec = load <64 x i8>, ptr %p
398+
%d0 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.vector.deinterleave8(<64 x i8> %vec)
399+
%t0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 0
400+
%t1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 1
401+
%t2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 2
402+
%t3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 3
403+
%t4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 4
404+
%t5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 5
405+
%t6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 6
406+
%t7 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 7
407+
%res0 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } poison, <8 x i8> %t0, 0
408+
%res1 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res0, <8 x i8> %t1, 1
409+
%res2 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res1, <8 x i8> %t2, 2
410+
%res3 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res2, <8 x i8> %t3, 3
411+
%res4 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3, <8 x i8> %t4, 4
412+
%res5 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res4, <8 x i8> %t5, 5
413+
%res6 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res5, <8 x i8> %t6, 6
414+
%res7 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res6, <8 x i8> %t6, 7
415+
ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res7
416+
}
417+
418+
; TODO: Remove once recursive deinterleaving support is removed
419+
define {<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>} @vector_deinterleave_load_factor8_recursive(ptr %ptr) {
420+
; CHECK-LABEL: vector_deinterleave_load_factor8_recursive:
421+
; CHECK: # %bb.0:
350422
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
351423
; CHECK-NEXT: vlseg8e32.v v8, (a0)
352424
; CHECK-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,18 @@ define void @vector_interleave_store_factor4(<4 x i32> %a, <4 x i32> %b, <4 x i3
197197
; CHECK: # %bb.0:
198198
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
199199
; CHECK-NEXT: vsseg4e32.v v8, (a0)
200+
; CHECK-NEXT: ret
201+
%v = call <16 x i32> @llvm.vector.interleave4(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d)
202+
store <16 x i32> %v, ptr %p
203+
ret void
204+
}
205+
206+
; TODO: Remove once recursive interleaving support is removed
207+
define void @vector_interleave_store_factor4_recursive(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, ptr %p) {
208+
; CHECK-LABEL: vector_interleave_store_factor4_recursive:
209+
; CHECK: # %bb.0:
210+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
211+
; CHECK-NEXT: vsseg4e32.v v8, (a0)
200212
; CHECK-NEXT: ret
201213
%v0 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %a, <4 x i32> %c)
202214
%v1 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %b, <4 x i32> %d)
@@ -216,6 +228,17 @@ define void @vector_interleave_store_factor5(<4 x i32> %a, <4 x i32> %b, <4 x i3
216228
ret void
217229
}
218230

231+
define void @vector_interleave_store_factor6(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f, ptr %p) {
232+
; CHECK-LABEL: vector_interleave_store_factor6:
233+
; CHECK: # %bb.0:
234+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
235+
; CHECK-NEXT: vsseg6e32.v v8, (a0)
236+
; CHECK-NEXT: ret
237+
%v = call <24 x i32> @llvm.vector.interleave6(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f)
238+
store <24 x i32> %v, ptr %p
239+
ret void
240+
}
241+
219242
define void @vector_interleave_store_factor7(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f, <4 x i32> %g, ptr %p) {
220243
; CHECK-LABEL: vector_interleave_store_factor7:
221244
; CHECK: # %bb.0:
@@ -232,6 +255,18 @@ define void @vector_interleave_store_factor8(<4 x i32> %a, <4 x i32> %b, <4 x i3
232255
; CHECK: # %bb.0:
233256
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
234257
; CHECK-NEXT: vsseg8e32.v v8, (a0)
258+
; CHECK-NEXT: ret
259+
%v = call <32 x i32> @llvm.vector.interleave8(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f, <4 x i32> %g, <4 x i32> %h)
260+
store <32 x i32> %v, ptr %p
261+
ret void
262+
}
263+
264+
; TODO: Remove once recursive interleaving support is removed
265+
define void @vector_interleave_store_factor8_recursive(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f, <4 x i32> %g, <4 x i32> %h, ptr %p) {
266+
; CHECK-LABEL: vector_interleave_store_factor8_recursive:
267+
; CHECK: # %bb.0:
268+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
269+
; CHECK-NEXT: vsseg8e32.v v8, (a0)
235270
; CHECK-NEXT: ret
236271
%v0 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %a, <4 x i32> %e)
237272
%v1 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %c, <4 x i32> %g)

0 commit comments

Comments
 (0)