Skip to content

[SelectionDAG][AArch64] Legalize power of 2 vector.[de]interleaveN #141513

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3472,6 +3472,59 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(TLI.expandVectorSplice(Node, DAG));
break;
}
case ISD::VECTOR_DEINTERLEAVE: {
unsigned Factor = Node->getNumOperands();
if (Factor <= 2 || !isPowerOf2_32(Factor))
break;
SmallVector<SDValue, 8> Ops;
for (SDValue Op : Node->ops())
Ops.push_back(Op);
EVT VecVT = Node->getValueType(0);
SmallVector<EVT> HalfVTs(Factor / 2, VecVT);
// Deinterleave at Factor/2 so each result contains two factors interleaved:
// a0b0 c0d0 a1b1 c1d1 -> [a0c0 b0d0] [a1c1 b1d1]
SDValue L = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, dl, HalfVTs,
ArrayRef(Ops).take_front(Factor / 2));
SDValue R = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, dl, HalfVTs,
ArrayRef(Ops).take_back(Factor / 2));
Results.resize(Factor);
// Deinterleave the 2 factors out:
// [a0c0 a1c1] [b0d0 b1d1] -> a0a1 b0b1 c0c1 d0d1
for (unsigned I = 0; I < Factor / 2; I++) {
SDValue Deinterleave =
DAG.getNode(ISD::VECTOR_DEINTERLEAVE, dl, {VecVT, VecVT},
{L.getValue(I), R.getValue(I)});
Results[I] = Deinterleave.getValue(0);
Results[I + Factor / 2] = Deinterleave.getValue(1);
}
break;
}
case ISD::VECTOR_INTERLEAVE: {
unsigned Factor = Node->getNumOperands();
if (Factor <= 2 || !isPowerOf2_32(Factor))
break;
EVT VecVT = Node->getValueType(0);
SmallVector<EVT> HalfVTs(Factor / 2, VecVT);
SmallVector<SDValue, 8> LOps, ROps;
// Interleave so we have 2 factors per result:
// a0a1 b0b1 c0c1 d0d1 -> [a0c0 b0d0] [a1c1 b1d1]
for (unsigned I = 0; I < Factor / 2; I++) {
SDValue Interleave =
DAG.getNode(ISD::VECTOR_INTERLEAVE, dl, {VecVT, VecVT},
{Node->getOperand(I), Node->getOperand(I + Factor / 2)});
LOps.push_back(Interleave.getValue(0));
ROps.push_back(Interleave.getValue(1));
}
// Interleave at Factor/2:
// [a0c0 b0d0] [a1c1 b1d1] -> a0b0 c0d0 a1b1 c1d1
SDValue L = DAG.getNode(ISD::VECTOR_INTERLEAVE, dl, HalfVTs, LOps);
SDValue R = DAG.getNode(ISD::VECTOR_INTERLEAVE, dl, HalfVTs, ROps);
for (unsigned I = 0; I < Factor / 2; I++)
Results.push_back(L.getValue(I));
for (unsigned I = 0; I < Factor / 2; I++)
Results.push_back(R.getValue(I));
break;
}
case ISD::EXTRACT_ELEMENT: {
EVT OpTy = Node->getOperand(0).getValueType();
if (Node->getConstantOperandVal(1)) {
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29208,6 +29208,10 @@ AArch64TargetLowering::LowerVECTOR_DEINTERLEAVE(SDValue Op,
EVT OpVT = Op.getValueType();
assert(OpVT.isScalableVector() &&
"Expected scalable vector in LowerVECTOR_DEINTERLEAVE.");

if (Op->getNumOperands() != 2)
return SDValue();

SDValue Even = DAG.getNode(AArch64ISD::UZP1, DL, OpVT, Op.getOperand(0),
Op.getOperand(1));
SDValue Odd = DAG.getNode(AArch64ISD::UZP2, DL, OpVT, Op.getOperand(0),
Expand All @@ -29222,6 +29226,9 @@ SDValue AArch64TargetLowering::LowerVECTOR_INTERLEAVE(SDValue Op,
assert(OpVT.isScalableVector() &&
"Expected scalable vector in LowerVECTOR_INTERLEAVE.");

if (Op->getNumOperands() != 2)
return SDValue();

SDValue Lo = DAG.getNode(AArch64ISD::ZIP1, DL, OpVT, Op.getOperand(0),
Op.getOperand(1));
SDValue Hi = DAG.getNode(AArch64ISD::ZIP2, DL, OpVT, Op.getOperand(0),
Expand Down
97 changes: 96 additions & 1 deletion llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,102 @@ define {<vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv
ret {<vscale x 2 x i64>, <vscale x 2 x i64>} %retval
}

define {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} @vector_deinterleave_nxv16i8_nxv64i8(<vscale x 64 x i8> %vec) {
; CHECK-LABEL: vector_deinterleave_nxv16i8_nxv64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: uzp1 z4.b, z2.b, z3.b
; CHECK-NEXT: uzp1 z5.b, z0.b, z1.b
; CHECK-NEXT: uzp2 z3.b, z2.b, z3.b
; CHECK-NEXT: uzp2 z6.b, z0.b, z1.b
; CHECK-NEXT: uzp1 z0.b, z5.b, z4.b
; CHECK-NEXT: uzp2 z2.b, z5.b, z4.b
; CHECK-NEXT: uzp1 z1.b, z6.b, z3.b
; CHECK-NEXT: uzp2 z3.b, z6.b, z3.b
; CHECK-NEXT: ret
%retval = call {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave4.nxv64i8(<vscale x 64 x i8> %vec)
ret {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} %retval
}

define {<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>} @vector_deinterleave_nxv8i16_nxv32i16(<vscale x 32 x i16> %vec) {
; CHECK-LABEL: vector_deinterleave_nxv8i16_nxv32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: uzp1 z4.h, z2.h, z3.h
; CHECK-NEXT: uzp1 z5.h, z0.h, z1.h
; CHECK-NEXT: uzp2 z3.h, z2.h, z3.h
; CHECK-NEXT: uzp2 z6.h, z0.h, z1.h
; CHECK-NEXT: uzp1 z0.h, z5.h, z4.h
; CHECK-NEXT: uzp2 z2.h, z5.h, z4.h
; CHECK-NEXT: uzp1 z1.h, z6.h, z3.h
; CHECK-NEXT: uzp2 z3.h, z6.h, z3.h
; CHECK-NEXT: ret
%retval = call {<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.vector.deinterleave4.nxv32i16(<vscale x 32 x i16> %vec)
ret {<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>} %retval
}

define {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} @vector_deinterleave_nxv4i32_nxv16i32(<vscale x 16 x i32> %vec) {
; CHECK-LABEL: vector_deinterleave_nxv4i32_nxv16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: uzp1 z4.s, z2.s, z3.s
; CHECK-NEXT: uzp1 z5.s, z0.s, z1.s
; CHECK-NEXT: uzp2 z3.s, z2.s, z3.s
; CHECK-NEXT: uzp2 z6.s, z0.s, z1.s
; CHECK-NEXT: uzp1 z0.s, z5.s, z4.s
; CHECK-NEXT: uzp2 z2.s, z5.s, z4.s
; CHECK-NEXT: uzp1 z1.s, z6.s, z3.s
; CHECK-NEXT: uzp2 z3.s, z6.s, z3.s
; CHECK-NEXT: ret
%retval = call {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.vector.deinterleave4.nxv16i32(<vscale x 16 x i32> %vec)
ret {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} %retval
}

define {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv8i64(<vscale x 8 x i64> %vec) {
; CHECK-LABEL: vector_deinterleave_nxv2i64_nxv8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: uzp1 z4.d, z2.d, z3.d
; CHECK-NEXT: uzp1 z5.d, z0.d, z1.d
; CHECK-NEXT: uzp2 z3.d, z2.d, z3.d
; CHECK-NEXT: uzp2 z6.d, z0.d, z1.d
; CHECK-NEXT: uzp1 z0.d, z5.d, z4.d
; CHECK-NEXT: uzp2 z2.d, z5.d, z4.d
; CHECK-NEXT: uzp1 z1.d, z6.d, z3.d
; CHECK-NEXT: uzp2 z3.d, z6.d, z3.d
; CHECK-NEXT: ret
%retval = call {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.vector.deinterleave4.nxv8i64(<vscale x 8 x i64> %vec)
ret {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} %retval
}

define {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv16i64(<vscale x 16 x i64> %vec) {
; CHECK-LABEL: vector_deinterleave_nxv2i64_nxv16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: uzp1 z24.d, z6.d, z7.d
; CHECK-NEXT: uzp1 z25.d, z4.d, z5.d
; CHECK-NEXT: uzp1 z26.d, z2.d, z3.d
; CHECK-NEXT: uzp1 z27.d, z0.d, z1.d
; CHECK-NEXT: uzp2 z6.d, z6.d, z7.d
; CHECK-NEXT: uzp2 z4.d, z4.d, z5.d
; CHECK-NEXT: uzp2 z2.d, z2.d, z3.d
; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d
; CHECK-NEXT: uzp1 z5.d, z25.d, z24.d
; CHECK-NEXT: uzp2 z24.d, z25.d, z24.d
; CHECK-NEXT: uzp1 z7.d, z27.d, z26.d
; CHECK-NEXT: uzp1 z28.d, z4.d, z6.d
; CHECK-NEXT: uzp2 z25.d, z27.d, z26.d
; CHECK-NEXT: uzp1 z29.d, z0.d, z2.d
; CHECK-NEXT: uzp2 z26.d, z4.d, z6.d
; CHECK-NEXT: uzp2 z27.d, z0.d, z2.d
; CHECK-NEXT: uzp1 z0.d, z7.d, z5.d
; CHECK-NEXT: uzp1 z2.d, z25.d, z24.d
; CHECK-NEXT: uzp2 z4.d, z7.d, z5.d
; CHECK-NEXT: uzp1 z1.d, z29.d, z28.d
; CHECK-NEXT: uzp1 z3.d, z27.d, z26.d
; CHECK-NEXT: uzp2 z5.d, z29.d, z28.d
; CHECK-NEXT: uzp2 z6.d, z25.d, z24.d
; CHECK-NEXT: uzp2 z7.d, z27.d, z26.d
; CHECK-NEXT: ret
%retval = call {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.vector.deinterleave8.nxv16i64(<vscale x 16 x i64> %vec)
ret {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} %retval
}

; Predicated
define {<vscale x 16 x i1>, <vscale x 16 x i1>} @vector_deinterleave_nxv16i1_nxv32i1(<vscale x 32 x i1> %vec) {
; CHECK-LABEL: vector_deinterleave_nxv16i1_nxv32i1:
Expand Down Expand Up @@ -279,7 +375,6 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @vector_deinterleave_nxv2i32_nxv
ret {<vscale x 2 x i32>, <vscale x 2 x i32>} %retval
}


; Floating declarations
declare {<vscale x 2 x half>,<vscale x 2 x half>} @llvm.vector.deinterleave2.nxv4f16(<vscale x 4 x half>)
declare {<vscale x 4 x half>, <vscale x 4 x half>} @llvm.vector.deinterleave2.nxv8f16(<vscale x 8 x half>)
Expand Down
96 changes: 96 additions & 0 deletions llvm/test/CodeGen/AArch64/sve-vector-interleave.ll
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,102 @@ define <vscale x 4 x i64> @interleave2_nxv4i64(<vscale x 2 x i64> %vec0, <vscale
ret <vscale x 4 x i64> %retval
}

define <vscale x 64 x i8> @interleave4_nxv16i8(<vscale x 16 x i8> %vec0, <vscale x 16 x i8> %vec1, <vscale x 16 x i8> %vec2, <vscale x 16 x i8> %vec3) {
; CHECK-LABEL: interleave4_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: zip1 z4.b, z1.b, z3.b
; CHECK-NEXT: zip1 z5.b, z0.b, z2.b
; CHECK-NEXT: zip2 z3.b, z1.b, z3.b
; CHECK-NEXT: zip2 z6.b, z0.b, z2.b
; CHECK-NEXT: zip1 z0.b, z5.b, z4.b
; CHECK-NEXT: zip2 z1.b, z5.b, z4.b
; CHECK-NEXT: zip1 z2.b, z6.b, z3.b
; CHECK-NEXT: zip2 z3.b, z6.b, z3.b
; CHECK-NEXT: ret
%retval = call <vscale x 64 x i8> @llvm.vector.interleave4.nxv16i8(<vscale x 16 x i8> %vec0, <vscale x 16 x i8> %vec1, <vscale x 16 x i8> %vec2, <vscale x 16 x i8> %vec3)
ret <vscale x 64 x i8> %retval
}

define <vscale x 32 x i16> @interleave4_nxv8i16(<vscale x 8 x i16> %vec0, <vscale x 8 x i16> %vec1, <vscale x 8 x i16> %vec2, <vscale x 8 x i16> %vec3) {
; CHECK-LABEL: interleave4_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: zip1 z4.h, z1.h, z3.h
; CHECK-NEXT: zip1 z5.h, z0.h, z2.h
; CHECK-NEXT: zip2 z3.h, z1.h, z3.h
; CHECK-NEXT: zip2 z6.h, z0.h, z2.h
; CHECK-NEXT: zip1 z0.h, z5.h, z4.h
; CHECK-NEXT: zip2 z1.h, z5.h, z4.h
; CHECK-NEXT: zip1 z2.h, z6.h, z3.h
; CHECK-NEXT: zip2 z3.h, z6.h, z3.h
; CHECK-NEXT: ret
%retval = call <vscale x 32 x i16> @llvm.vector.interleave4.nxv8i16(<vscale x 8 x i16> %vec0, <vscale x 8 x i16> %vec1, <vscale x 8 x i16> %vec2, <vscale x 8 x i16> %vec3)
ret <vscale x 32 x i16> %retval
}

define <vscale x 16 x i32> @interleave4_nxv4i32(<vscale x 4 x i32> %vec0, <vscale x 4 x i32> %vec1, <vscale x 4 x i32> %vec2, <vscale x 4 x i32> %vec3) {
; CHECK-LABEL: interleave4_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: zip1 z4.s, z1.s, z3.s
; CHECK-NEXT: zip1 z5.s, z0.s, z2.s
; CHECK-NEXT: zip2 z3.s, z1.s, z3.s
; CHECK-NEXT: zip2 z6.s, z0.s, z2.s
; CHECK-NEXT: zip1 z0.s, z5.s, z4.s
; CHECK-NEXT: zip2 z1.s, z5.s, z4.s
; CHECK-NEXT: zip1 z2.s, z6.s, z3.s
; CHECK-NEXT: zip2 z3.s, z6.s, z3.s
; CHECK-NEXT: ret
%retval = call <vscale x 16 x i32> @llvm.vector.interleave4.nxv4i32(<vscale x 4 x i32> %vec0, <vscale x 4 x i32> %vec1, <vscale x 4 x i32> %vec2, <vscale x 4 x i32> %vec3)
ret <vscale x 16 x i32> %retval
}

define <vscale x 8 x i64> @interleave4_nxv8i64(<vscale x 2 x i64> %vec0, <vscale x 2 x i64> %vec1, <vscale x 2 x i64> %vec2, <vscale x 2 x i64> %vec3) {
; CHECK-LABEL: interleave4_nxv8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: zip1 z4.d, z1.d, z3.d
; CHECK-NEXT: zip1 z5.d, z0.d, z2.d
; CHECK-NEXT: zip2 z3.d, z1.d, z3.d
; CHECK-NEXT: zip2 z6.d, z0.d, z2.d
; CHECK-NEXT: zip1 z0.d, z5.d, z4.d
; CHECK-NEXT: zip2 z1.d, z5.d, z4.d
; CHECK-NEXT: zip1 z2.d, z6.d, z3.d
; CHECK-NEXT: zip2 z3.d, z6.d, z3.d
; CHECK-NEXT: ret
%retval = call <vscale x 8 x i64> @llvm.vector.interleave4.nxv8i64(<vscale x 2 x i64> %vec0, <vscale x 2 x i64> %vec1, <vscale x 2 x i64> %vec2, <vscale x 2 x i64> %vec3)
ret <vscale x 8 x i64> %retval
}

define <vscale x 16 x i64> @interleave8_nxv16i64(<vscale x 2 x i64> %vec0, <vscale x 2 x i64> %vec1, <vscale x 2 x i64> %vec2, <vscale x 2 x i64> %vec3, <vscale x 2 x i64> %vec4, <vscale x 2 x i64> %vec5, <vscale x 2 x i64> %vec6, <vscale x 2 x i64> %vec7) {
; CHECK-LABEL: interleave8_nxv16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: zip1 z24.d, z3.d, z7.d
; CHECK-NEXT: zip1 z25.d, z1.d, z5.d
; CHECK-NEXT: zip1 z26.d, z2.d, z6.d
; CHECK-NEXT: zip1 z27.d, z0.d, z4.d
; CHECK-NEXT: zip2 z3.d, z3.d, z7.d
; CHECK-NEXT: zip2 z1.d, z1.d, z5.d
; CHECK-NEXT: zip2 z2.d, z2.d, z6.d
; CHECK-NEXT: zip2 z0.d, z0.d, z4.d
; CHECK-NEXT: zip1 z4.d, z25.d, z24.d
; CHECK-NEXT: zip2 z6.d, z25.d, z24.d
; CHECK-NEXT: zip1 z5.d, z27.d, z26.d
; CHECK-NEXT: zip2 z7.d, z27.d, z26.d
; CHECK-NEXT: zip1 z24.d, z1.d, z3.d
; CHECK-NEXT: zip1 z25.d, z0.d, z2.d
; CHECK-NEXT: zip2 z26.d, z1.d, z3.d
; CHECK-NEXT: zip2 z27.d, z0.d, z2.d
; CHECK-NEXT: zip1 z0.d, z5.d, z4.d
; CHECK-NEXT: zip2 z1.d, z5.d, z4.d
; CHECK-NEXT: zip1 z2.d, z7.d, z6.d
; CHECK-NEXT: zip2 z3.d, z7.d, z6.d
; CHECK-NEXT: zip1 z4.d, z25.d, z24.d
; CHECK-NEXT: zip2 z5.d, z25.d, z24.d
; CHECK-NEXT: zip1 z6.d, z27.d, z26.d
; CHECK-NEXT: zip2 z7.d, z27.d, z26.d
; CHECK-NEXT: ret
%retval = call <vscale x 16 x i64> @llvm.vector.interleave8.nxv16i64(<vscale x 2 x i64> %vec0, <vscale x 2 x i64> %vec1, <vscale x 2 x i64> %vec2, <vscale x 2 x i64> %vec3, <vscale x 2 x i64> %vec4, <vscale x 2 x i64> %vec5, <vscale x 2 x i64> %vec6, <vscale x 2 x i64> %vec7)
ret <vscale x 16 x i64> %retval
}

; Predicated

define <vscale x 32 x i1> @interleave2_nxv32i1(<vscale x 16 x i1> %vec0, <vscale x 16 x i1> %vec1) {
Expand Down
Loading