Skip to content

Commit ad77735

Browse files
committed
[IA] Remove recursive [de]interleaving support
Now that the loop vectorizer emits just a single llvm.vector.[de]interleaveN intrinsic, we can remove the need to recognise recursively [de]interleaved intrinsics. No in-tree target currently has instructions to emit an interleaved access with a factor > 8, and I'm not aware of any other passes that will emit recursive interleave patterns, so this code is effectively dead. Some tests have been converted from the recursive form to a single intrinsic, and some others were deleted that are no longer needed, e.g. to do with the recursive tree. This closes off the work started in llvm#139893.
1 parent 7ef77eb commit ad77735

10 files changed

+127
-905
lines changed

llvm/lib/CodeGen/InterleavedAccessPass.cpp

Lines changed: 21 additions & 178 deletions
Original file line numberDiff line numberDiff line change
@@ -629,173 +629,12 @@ static unsigned getIntrinsicFactor(const IntrinsicInst *II) {
629629
}
630630
}
631631

632-
// For an (de)interleave tree like this:
633-
//
634-
// A C B D
635-
// |___| |___|
636-
// |_____|
637-
// |
638-
// A B C D
639-
//
640-
// We will get ABCD at the end while the leaf operands/results
641-
// are ACBD, which are also what we initially collected in
642-
// getVectorInterleaveFactor / getVectorDeinterleaveFactor. But TLI
643-
// hooks (e.g. lowerDeinterleaveIntrinsicToLoad) expect ABCD, so we need
644-
// to reorder them by interleaving these values.
645-
static void interleaveLeafValues(MutableArrayRef<Value *> SubLeaves) {
646-
unsigned NumLeaves = SubLeaves.size();
647-
assert(isPowerOf2_32(NumLeaves) && NumLeaves > 1);
648-
if (NumLeaves == 2)
649-
return;
650-
651-
const unsigned HalfLeaves = NumLeaves / 2;
652-
// Visit the sub-trees.
653-
interleaveLeafValues(SubLeaves.take_front(HalfLeaves));
654-
interleaveLeafValues(SubLeaves.drop_front(HalfLeaves));
655-
656-
SmallVector<Value *, 8> Buffer;
657-
// a0 a1 a2 a3 b0 b1 b2 b3
658-
// -> a0 b0 a1 b1 a2 b2 a3 b3
659-
for (unsigned i = 0U; i < NumLeaves; ++i)
660-
Buffer.push_back(SubLeaves[i / 2 + (i % 2 ? HalfLeaves : 0)]);
661-
662-
llvm::copy(Buffer, SubLeaves.begin());
663-
}
664-
665-
static bool
666-
getVectorInterleaveFactor(IntrinsicInst *II, SmallVectorImpl<Value *> &Operands,
667-
SmallVectorImpl<Instruction *> &DeadInsts) {
668-
assert(isInterleaveIntrinsic(II->getIntrinsicID()));
669-
670-
// Visit with BFS
671-
SmallVector<IntrinsicInst *, 8> Queue;
672-
Queue.push_back(II);
673-
while (!Queue.empty()) {
674-
IntrinsicInst *Current = Queue.front();
675-
Queue.erase(Queue.begin());
676-
677-
// All the intermediate intrinsics will be deleted.
678-
DeadInsts.push_back(Current);
679-
680-
for (unsigned I = 0; I < getIntrinsicFactor(Current); ++I) {
681-
Value *Op = Current->getOperand(I);
682-
if (auto *OpII = dyn_cast<IntrinsicInst>(Op))
683-
if (OpII->getIntrinsicID() == Intrinsic::vector_interleave2) {
684-
Queue.push_back(OpII);
685-
continue;
686-
}
687-
688-
// If this is not a perfectly balanced tree, the leaf
689-
// result types would be different.
690-
if (!Operands.empty() && Op->getType() != Operands.back()->getType())
691-
return false;
692-
693-
Operands.push_back(Op);
694-
}
695-
}
696-
697-
const unsigned Factor = Operands.size();
698-
// Currently we only recognize factors 2...8 and other powers of 2.
699-
// FIXME: should we assert here instead?
700-
if (Factor <= 1 ||
701-
(!isPowerOf2_32(Factor) && Factor != getIntrinsicFactor(II)))
702-
return false;
703-
704-
// Recursively interleaved factors need to have their values reordered
705-
// TODO: Remove once the loop vectorizer no longer recursively interleaves
706-
// factors 4 + 8
707-
if (isPowerOf2_32(Factor) && getIntrinsicFactor(II) == 2)
708-
interleaveLeafValues(Operands);
709-
return true;
710-
}
711-
712-
static bool
713-
getVectorDeinterleaveFactor(IntrinsicInst *II,
714-
SmallVectorImpl<Value *> &Results,
715-
SmallVectorImpl<Instruction *> &DeadInsts) {
716-
assert(isDeinterleaveIntrinsic(II->getIntrinsicID()));
717-
using namespace PatternMatch;
718-
if (!II->hasNUses(getIntrinsicFactor(II)))
719-
return false;
720-
721-
// Visit with BFS
722-
SmallVector<IntrinsicInst *, 8> Queue;
723-
Queue.push_back(II);
724-
while (!Queue.empty()) {
725-
IntrinsicInst *Current = Queue.front();
726-
Queue.erase(Queue.begin());
727-
assert(Current->hasNUses(getIntrinsicFactor(Current)));
728-
729-
// All the intermediate intrinsics will be deleted from the bottom-up.
730-
DeadInsts.insert(DeadInsts.begin(), Current);
731-
732-
SmallVector<ExtractValueInst *> EVs(getIntrinsicFactor(Current), nullptr);
733-
for (User *Usr : Current->users()) {
734-
if (!isa<ExtractValueInst>(Usr))
735-
return 0;
736-
737-
auto *EV = cast<ExtractValueInst>(Usr);
738-
// Intermediate ExtractValue instructions will also be deleted.
739-
DeadInsts.insert(DeadInsts.begin(), EV);
740-
ArrayRef<unsigned> Indices = EV->getIndices();
741-
if (Indices.size() != 1)
742-
return false;
743-
744-
if (!EVs[Indices[0]])
745-
EVs[Indices[0]] = EV;
746-
else
747-
return false;
748-
}
749-
750-
// We have legal indices. At this point we're either going
751-
// to continue the traversal or push the leaf values into Results.
752-
for (ExtractValueInst *EV : EVs) {
753-
// Continue the traversal. We're playing safe here and matching only the
754-
// expression consisting of a perfectly balanced binary tree in which all
755-
// intermediate values are only used once.
756-
if (EV->hasOneUse() &&
757-
match(EV->user_back(),
758-
m_Intrinsic<Intrinsic::vector_deinterleave2>()) &&
759-
EV->user_back()->hasNUses(2)) {
760-
auto *EVUsr = cast<IntrinsicInst>(EV->user_back());
761-
Queue.push_back(EVUsr);
762-
continue;
763-
}
764-
765-
// If this is not a perfectly balanced tree, the leaf
766-
// result types would be different.
767-
if (!Results.empty() && EV->getType() != Results.back()->getType())
768-
return false;
769-
770-
// Save the leaf value.
771-
Results.push_back(EV);
772-
}
773-
}
774-
775-
const unsigned Factor = Results.size();
776-
// Currently we only recognize factors of 2...8 and other powers of 2.
777-
// FIXME: should we assert here instead?
778-
if (Factor <= 1 ||
779-
(!isPowerOf2_32(Factor) && Factor != getIntrinsicFactor(II)))
780-
return 0;
781-
782-
// Recursively interleaved factors need to have their values reordered
783-
// TODO: Remove once the loop vectorizer no longer recursively interleaves
784-
// factors 4 + 8
785-
if (isPowerOf2_32(Factor) && getIntrinsicFactor(II) == 2)
786-
interleaveLeafValues(Results);
787-
return true;
788-
}
789-
790632
static Value *getMask(Value *WideMask, unsigned Factor,
791633
ElementCount LeafValueEC) {
792634
if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
793-
SmallVector<Value *, 8> Operands;
794-
SmallVector<Instruction *, 8> DeadInsts;
795-
if (getVectorInterleaveFactor(IMI, Operands, DeadInsts)) {
796-
assert(!Operands.empty());
797-
if (Operands.size() == Factor && llvm::all_equal(Operands))
798-
return Operands[0];
635+
if (isInterleaveIntrinsic(IMI->getIntrinsicID()) &&
636+
getIntrinsicFactor(IMI) == Factor && llvm::all_equal(IMI->args())) {
637+
return IMI->getArgOperand(0);
799638
}
800639
}
801640

@@ -830,13 +669,19 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
830669
if (!LoadedVal->hasOneUse() || !isa<LoadInst, VPIntrinsic>(LoadedVal))
831670
return false;
832671

833-
SmallVector<Value *, 8> DeinterleaveValues;
834-
SmallVector<Instruction *, 8> DeinterleaveDeadInsts;
835-
if (!getVectorDeinterleaveFactor(DI, DeinterleaveValues,
836-
DeinterleaveDeadInsts))
672+
const unsigned Factor = getIntrinsicFactor(DI);
673+
if (!DI->hasNUses(Factor))
837674
return false;
838-
839-
const unsigned Factor = DeinterleaveValues.size();
675+
SmallVector<Value *, 8> DeinterleaveValues(Factor);
676+
for (auto *User : DI->users()) {
677+
auto *Extract = dyn_cast<ExtractValueInst>(User);
678+
if (!Extract || Extract->getNumIndices() != 1)
679+
return false;
680+
unsigned Idx = Extract->getIndices()[0];
681+
if (DeinterleaveValues[Idx])
682+
return false;
683+
DeinterleaveValues[Idx] = Extract;
684+
}
840685

841686
if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadedVal)) {
842687
if (VPLoad->getIntrinsicID() != Intrinsic::vp_load)
@@ -869,7 +714,9 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
869714
return false;
870715
}
871716

872-
DeadInsts.insert_range(DeinterleaveDeadInsts);
717+
for (Value *V : DeinterleaveValues)
718+
DeadInsts.insert(cast<Instruction>(V));
719+
DeadInsts.insert(DI);
873720
// We now have a target-specific load, so delete the old one.
874721
DeadInsts.insert(cast<Instruction>(LoadedVal));
875722
return true;
@@ -883,12 +730,8 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
883730
if (!isa<StoreInst, VPIntrinsic>(StoredBy))
884731
return false;
885732

886-
SmallVector<Value *, 8> InterleaveValues;
887-
SmallVector<Instruction *, 8> InterleaveDeadInsts;
888-
if (!getVectorInterleaveFactor(II, InterleaveValues, InterleaveDeadInsts))
889-
return false;
890-
891-
const unsigned Factor = InterleaveValues.size();
733+
SmallVector<Value *, 8> InterleaveValues(II->args());
734+
const unsigned Factor = getIntrinsicFactor(II);
892735

893736
if (auto *VPStore = dyn_cast<VPIntrinsic>(StoredBy)) {
894737
if (VPStore->getIntrinsicID() != Intrinsic::vp_store)
@@ -922,7 +765,7 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
922765

923766
// We now have a target-specific store, so delete the old one.
924767
DeadInsts.insert(cast<Instruction>(StoredBy));
925-
DeadInsts.insert_range(InterleaveDeadInsts);
768+
DeadInsts.insert(II);
926769
return true;
927770
}
928771

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll

Lines changed: 0 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -293,31 +293,6 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_fact
293293
ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3
294294
}
295295

296-
; TODO: Remove once recursive deinterleaving support is removed
297-
define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor4_recursive(ptr %p) {
298-
; CHECK-LABEL: vector_deinterleave_load_factor4_recursive:
299-
; CHECK: # %bb.0:
300-
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
301-
; CHECK-NEXT: vlseg4e8.v v8, (a0)
302-
; CHECK-NEXT: ret
303-
%vec = load <32 x i8>, ptr %p
304-
%d0 = call {<16 x i8>, <16 x i8>} @llvm.vector.deinterleave2.v32i8(<32 x i8> %vec)
305-
%d0.0 = extractvalue { <16 x i8>, <16 x i8> } %d0, 0
306-
%d0.1 = extractvalue { <16 x i8>, <16 x i8> } %d0, 1
307-
%d1 = call {<8 x i8>, <8 x i8>} @llvm.vector.deinterleave2.v16i8(<16 x i8> %d0.0)
308-
%t0 = extractvalue { <8 x i8>, <8 x i8> } %d1, 0
309-
%t2 = extractvalue { <8 x i8>, <8 x i8> } %d1, 1
310-
%d2 = call {<8 x i8>, <8 x i8>} @llvm.vector.deinterleave2.v16i8(<16 x i8> %d0.1)
311-
%t1 = extractvalue { <8 x i8>, <8 x i8> } %d2, 0
312-
%t3 = extractvalue { <8 x i8>, <8 x i8> } %d2, 1
313-
314-
%res0 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } undef, <8 x i8> %t0, 0
315-
%res1 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res0, <8 x i8> %t1, 1
316-
%res2 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res1, <8 x i8> %t2, 2
317-
%res3 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res2, <8 x i8> %t3, 3
318-
ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3
319-
}
320-
321296
define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor5(ptr %p) {
322297
; CHECK-LABEL: vector_deinterleave_load_factor5:
323298
; CHECK: # %bb.0:
@@ -414,45 +389,3 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <
414389
%res7 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res6, <8 x i8> %t6, 7
415390
ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res7
416391
}
417-
418-
; TODO: Remove once recursive deinterleaving support is removed
419-
define {<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>} @vector_deinterleave_load_factor8_recursive(ptr %ptr) {
420-
; CHECK-LABEL: vector_deinterleave_load_factor8_recursive:
421-
; CHECK: # %bb.0:
422-
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
423-
; CHECK-NEXT: vlseg8e32.v v8, (a0)
424-
; CHECK-NEXT: ret
425-
%vec = load <16 x i32>, ptr %ptr
426-
%d0 = call { <8 x i32>, <8 x i32> } @llvm.vector.deinterleave2.v16i32(<16 x i32> %vec)
427-
%d0.0 = extractvalue { <8 x i32>, <8 x i32> } %d0, 0
428-
%d0.1 = extractvalue { <8 x i32>, <8 x i32> } %d0, 1
429-
%d1 = call { <4 x i32>, <4 x i32> } @llvm.vector.deinterleave2.v8i32(<8 x i32> %d0.0)
430-
%d1.0 = extractvalue { <4 x i32>, <4 x i32> } %d1, 0
431-
%d1.1 = extractvalue { <4 x i32>, <4 x i32> } %d1, 1
432-
%d2 = call { <4 x i32>, <4 x i32> } @llvm.vector.deinterleave2.v8i32(<8 x i32> %d0.1)
433-
%d2.0 = extractvalue { <4 x i32>, <4 x i32> } %d2, 0
434-
%d2.1 = extractvalue { <4 x i32>, <4 x i32> } %d2, 1
435-
436-
%d3 = call { <2 x i32>, <2 x i32> } @llvm.vector.deinterleave2.v4i32(<4 x i32> %d1.0)
437-
%t0 = extractvalue { <2 x i32>, <2 x i32> } %d3, 0
438-
%t4 = extractvalue { <2 x i32>, <2 x i32> } %d3, 1
439-
%d4 = call { <2 x i32>, <2 x i32> } @llvm.vector.deinterleave2.v4i32(<4 x i32> %d1.1)
440-
%t2 = extractvalue { <2 x i32>, <2 x i32> } %d4, 0
441-
%t6 = extractvalue { <2 x i32>, <2 x i32> } %d4, 1
442-
%d5 = call { <2 x i32>, <2 x i32> } @llvm.vector.deinterleave2.v4i32(<4 x i32> %d2.0)
443-
%t1 = extractvalue { <2 x i32>, <2 x i32> } %d5, 0
444-
%t5 = extractvalue { <2 x i32>, <2 x i32> } %d5, 1
445-
%d6 = call { <2 x i32>, <2 x i32> } @llvm.vector.deinterleave2.v4i32(<4 x i32> %d2.1)
446-
%t3 = extractvalue { <2 x i32>, <2 x i32> } %d6, 0
447-
%t7 = extractvalue { <2 x i32>, <2 x i32> } %d6, 1
448-
449-
%res0 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } undef, <2 x i32> %t0, 0
450-
%res1 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res0, <2 x i32> %t1, 1
451-
%res2 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res1, <2 x i32> %t2, 2
452-
%res3 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res2, <2 x i32> %t3, 3
453-
%res4 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res3, <2 x i32> %t4, 4
454-
%res5 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res4, <2 x i32> %t5, 5
455-
%res6 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res5, <2 x i32> %t6, 6
456-
%res7 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res6, <2 x i32> %t7, 7
457-
ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res7
458-
}

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -203,20 +203,6 @@ define void @vector_interleave_store_factor4(<4 x i32> %a, <4 x i32> %b, <4 x i3
203203
ret void
204204
}
205205

206-
; TODO: Remove once recursive interleaving support is removed
207-
define void @vector_interleave_store_factor4_recursive(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, ptr %p) {
208-
; CHECK-LABEL: vector_interleave_store_factor4_recursive:
209-
; CHECK: # %bb.0:
210-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
211-
; CHECK-NEXT: vsseg4e32.v v8, (a0)
212-
; CHECK-NEXT: ret
213-
%v0 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %a, <4 x i32> %c)
214-
%v1 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %b, <4 x i32> %d)
215-
%v2 = call <16 x i32> @llvm.vector.interleave2.v16i32(<8 x i32> %v0, <8 x i32> %v1)
216-
store <16 x i32> %v2, ptr %p
217-
ret void
218-
}
219-
220206
define void @vector_interleave_store_factor5(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, ptr %p) {
221207
; CHECK-LABEL: vector_interleave_store_factor5:
222208
; CHECK: # %bb.0:
@@ -260,23 +246,3 @@ define void @vector_interleave_store_factor8(<4 x i32> %a, <4 x i32> %b, <4 x i3
260246
store <32 x i32> %v, ptr %p
261247
ret void
262248
}
263-
264-
; TODO: Remove once recursive interleaving support is removed
265-
define void @vector_interleave_store_factor8_recursive(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f, <4 x i32> %g, <4 x i32> %h, ptr %p) {
266-
; CHECK-LABEL: vector_interleave_store_factor8_recursive:
267-
; CHECK: # %bb.0:
268-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
269-
; CHECK-NEXT: vsseg8e32.v v8, (a0)
270-
; CHECK-NEXT: ret
271-
%v0 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %a, <4 x i32> %e)
272-
%v1 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %c, <4 x i32> %g)
273-
%v2 = call <16 x i32> @llvm.vector.interleave2.v16i32(<8 x i32> %v0, <8 x i32> %v1)
274-
275-
%v3 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %b, <4 x i32> %f)
276-
%v4 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %d, <4 x i32> %h)
277-
%v5 = call <16 x i32> @llvm.vector.interleave2.v16i32(<8 x i32> %v3, <8 x i32> %v4)
278-
279-
%v6 = call <32 x i32> @llvm.vector.interleave2.v32i32(<16 x i32> %v2, <16 x i32> %v5)
280-
store <32 x i32> %v6, ptr %p
281-
ret void
282-
}

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -302,15 +302,11 @@ define {<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>} @vpload_factor4_intrinsics(p
302302
; CHECK-NEXT: vlseg4e32.v v8, (a0)
303303
; CHECK-NEXT: ret
304304
%wide.masked.load = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 8)
305-
%d0 = call { <4 x i32>, <4 x i32> } @llvm.vector.deinterleave2.v8i32(<8 x i32> %wide.masked.load)
306-
%d0.0 = extractvalue { <4 x i32>, <4 x i32> } %d0, 0
307-
%d0.1 = extractvalue { <4 x i32>, <4 x i32> } %d0, 1
308-
%d1 = call { <2 x i32>, <2 x i32> } @llvm.vector.deinterleave2.v4i32(<4 x i32> %d0.0)
309-
%t0 = extractvalue { <2 x i32>, <2 x i32> } %d1, 0
310-
%t2 = extractvalue { <2 x i32>, <2 x i32> } %d1, 1
311-
%d2 = call { <2 x i32>, <2 x i32> } @llvm.vector.deinterleave2.v4i32(<4 x i32> %d0.1)
312-
%t1 = extractvalue { <2 x i32>, <2 x i32> } %d2, 0
313-
%t3 = extractvalue { <2 x i32>, <2 x i32> } %d2, 1
305+
%d = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.vector.deinterleave4.v8i32(<8 x i32> %wide.masked.load)
306+
%t0 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %d, 0
307+
%t1 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %d, 1
308+
%t2 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %d, 2
309+
%t3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %d, 3
314310

315311
%res0 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } poison, <2 x i32> %t0, 0
316312
%res1 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res0, <2 x i32> %t1, 1

0 commit comments

Comments
 (0)