Skip to content

[IR] Add llvm.vector.[de]interleave{4,6,8} #139893

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions llvm/docs/LangRef.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20209,7 +20209,7 @@ Arguments:

The argument to this intrinsic must be a vector.

'``llvm.vector.deinterleave2/3/5/7``' Intrinsic
'``llvm.vector.deinterleave2/3/4/5/6/7/8``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Syntax:
Expand All @@ -20227,8 +20227,8 @@ This is an overloaded intrinsic.
Overview:
"""""""""

The '``llvm.vector.deinterleave2/3/5/7``' intrinsics deinterleave adjacent lanes
into 2, 3, 5, and 7 separate vectors, respectively, and return them as the
The '``llvm.vector.deinterleave2/3/4/5/6/7/8``' intrinsics deinterleave adjacent lanes
into 2 through to 8 separate vectors, respectively, and return them as the
result.

This intrinsic works for both fixed and scalable vectors. While this intrinsic
Expand All @@ -20250,7 +20250,7 @@ Arguments:
The argument is a vector whose type corresponds to the logical concatenation of
the aggregated result types.

'``llvm.vector.interleave2/3/5/7``' Intrinsic
'``llvm.vector.interleave2/3/4/5/6/7/8``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Syntax:
Expand All @@ -20268,7 +20268,7 @@ This is an overloaded intrinsic.
Overview:
"""""""""

The '``llvm.vector.interleave2/3/5/7``' intrinsic constructs a vector
The '``llvm.vector.interleave2/3/4/5/6/7/8``' intrinsic constructs a vector
by interleaving all the input vectors.

This intrinsic works for both fixed and scalable vectors. While this intrinsic
Expand Down
28 changes: 17 additions & 11 deletions llvm/include/llvm/IR/Intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,11 @@ namespace Intrinsic {
TruncArgument,
HalfVecArgument,
OneThirdVecArgument,
OneFourthVecArgument,
OneFifthVecArgument,
OneSixthVecArgument,
OneSeventhVecArgument,
OneEighthVecArgument,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we instead parameterize a single IIT descriptor with the divisor?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like it, I've opened up a follow up PR to do this: #141492

SameVecWidthArgument,
VecOfAnyPtrsToElt,
VecElementArgument,
Expand All @@ -166,9 +169,12 @@ namespace Intrinsic {
AArch64Svcount,
} Kind;

// These three have to be contiguous.
static_assert(OneFifthVecArgument == OneThirdVecArgument + 1 &&
OneSeventhVecArgument == OneFifthVecArgument + 1);
// These six have to be contiguous.
static_assert(OneFourthVecArgument == OneThirdVecArgument + 1 &&
OneFifthVecArgument == OneFourthVecArgument + 1 &&
OneSixthVecArgument == OneFifthVecArgument + 1 &&
OneSeventhVecArgument == OneSixthVecArgument + 1 &&
OneEighthVecArgument == OneSeventhVecArgument + 1);
union {
unsigned Integer_Width;
unsigned Float_Width;
Expand All @@ -188,19 +194,19 @@ namespace Intrinsic {
unsigned getArgumentNumber() const {
assert(Kind == Argument || Kind == ExtendArgument ||
Kind == TruncArgument || Kind == HalfVecArgument ||
Kind == OneThirdVecArgument || Kind == OneFifthVecArgument ||
Kind == OneSeventhVecArgument || Kind == SameVecWidthArgument ||
Kind == VecElementArgument || Kind == Subdivide2Argument ||
Kind == Subdivide4Argument || Kind == VecOfBitcastsToInt);
(Kind >= OneThirdVecArgument && Kind <= OneEighthVecArgument) ||
Kind == SameVecWidthArgument || Kind == VecElementArgument ||
Kind == Subdivide2Argument || Kind == Subdivide4Argument ||
Kind == VecOfBitcastsToInt);
return Argument_Info >> 3;
}
ArgKind getArgumentKind() const {
assert(Kind == Argument || Kind == ExtendArgument ||
Kind == TruncArgument || Kind == HalfVecArgument ||
Kind == OneThirdVecArgument || Kind == OneFifthVecArgument ||
Kind == OneSeventhVecArgument || Kind == SameVecWidthArgument ||
Kind == VecElementArgument || Kind == Subdivide2Argument ||
Kind == Subdivide4Argument || Kind == VecOfBitcastsToInt);
(Kind >= OneThirdVecArgument && Kind <= OneEighthVecArgument) ||
Kind == SameVecWidthArgument || Kind == VecElementArgument ||
Kind == Subdivide2Argument || Kind == Subdivide4Argument ||
Kind == VecOfBitcastsToInt);
return (ArgKind)(Argument_Info & 7);
}

Expand Down
66 changes: 66 additions & 0 deletions llvm/include/llvm/IR/Intrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,9 @@ def IIT_ONE_FIFTH_VEC_ARG : IIT_Base<63>;
def IIT_ONE_SEVENTH_VEC_ARG : IIT_Base<64>;
def IIT_V2048: IIT_Vec<2048, 65>;
def IIT_V4096: IIT_Vec<4096, 66>;
def IIT_ONE_FOURTH_VEC_ARG : IIT_Base<67>;
def IIT_ONE_SIXTH_VEC_ARG : IIT_Base<68>;
def IIT_ONE_EIGHTH_VEC_ARG : IIT_Base<69>;
}

defvar IIT_all_FixedTypes = !filter(iit, IIT_all,
Expand Down Expand Up @@ -483,12 +486,21 @@ class LLVMHalfElementsVectorType<int num>
class LLVMOneThirdElementsVectorType<int num>
: LLVMMatchType<num, IIT_ONE_THIRD_VEC_ARG>;

class LLVMOneFourthElementsVectorType<int num>
: LLVMMatchType<num, IIT_ONE_FOURTH_VEC_ARG>;

class LLVMOneFifthElementsVectorType<int num>
: LLVMMatchType<num, IIT_ONE_FIFTH_VEC_ARG>;

class LLVMOneSixthElementsVectorType<int num>
: LLVMMatchType<num, IIT_ONE_SIXTH_VEC_ARG>;

class LLVMOneSeventhElementsVectorType<int num>
: LLVMMatchType<num, IIT_ONE_SEVENTH_VEC_ARG>;

class LLVMOneEighthElementsVectorType<int num>
: LLVMMatchType<num, IIT_ONE_EIGHTH_VEC_ARG>;

// Match the type of another intrinsic parameter that is expected to be a
// vector type (i.e. <N x iM>) but with each element subdivided to
// form a vector with more elements that are smaller than the original.
Expand Down Expand Up @@ -2781,6 +2793,20 @@ def int_vector_deinterleave3 : DefaultAttrsIntrinsic<[LLVMOneThirdElementsVector
[llvm_anyvector_ty],
[IntrNoMem]>;

def int_vector_interleave4 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMOneFourthElementsVectorType<0>,
LLVMOneFourthElementsVectorType<0>,
LLVMOneFourthElementsVectorType<0>,
LLVMOneFourthElementsVectorType<0>],
[IntrNoMem]>;

def int_vector_deinterleave4 : DefaultAttrsIntrinsic<[LLVMOneFourthElementsVectorType<0>,
LLVMOneFourthElementsVectorType<0>,
LLVMOneFourthElementsVectorType<0>,
LLVMOneFourthElementsVectorType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;

def int_vector_interleave5 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMOneFifthElementsVectorType<0>,
LLVMOneFifthElementsVectorType<0>,
Expand All @@ -2797,6 +2823,24 @@ def int_vector_deinterleave5 : DefaultAttrsIntrinsic<[LLVMOneFifthElementsVector
[llvm_anyvector_ty],
[IntrNoMem]>;

def int_vector_interleave6 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMOneSixthElementsVectorType<0>,
LLVMOneSixthElementsVectorType<0>,
LLVMOneSixthElementsVectorType<0>,
LLVMOneSixthElementsVectorType<0>,
LLVMOneSixthElementsVectorType<0>,
LLVMOneSixthElementsVectorType<0>],
[IntrNoMem]>;

def int_vector_deinterleave6 : DefaultAttrsIntrinsic<[LLVMOneSixthElementsVectorType<0>,
LLVMOneSixthElementsVectorType<0>,
LLVMOneSixthElementsVectorType<0>,
LLVMOneSixthElementsVectorType<0>,
LLVMOneSixthElementsVectorType<0>,
LLVMOneSixthElementsVectorType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;

def int_vector_interleave7 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMOneSeventhElementsVectorType<0>,
LLVMOneSeventhElementsVectorType<0>,
Expand All @@ -2817,6 +2861,28 @@ def int_vector_deinterleave7 : DefaultAttrsIntrinsic<[LLVMOneSeventhElementsVect
[llvm_anyvector_ty],
[IntrNoMem]>;

def int_vector_interleave8 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMOneEighthElementsVectorType<0>,
LLVMOneEighthElementsVectorType<0>,
LLVMOneEighthElementsVectorType<0>,
LLVMOneEighthElementsVectorType<0>,
LLVMOneEighthElementsVectorType<0>,
LLVMOneEighthElementsVectorType<0>,
LLVMOneEighthElementsVectorType<0>,
LLVMOneEighthElementsVectorType<0>],
[IntrNoMem]>;

def int_vector_deinterleave8 : DefaultAttrsIntrinsic<[LLVMOneEighthElementsVectorType<0>,
LLVMOneEighthElementsVectorType<0>,
LLVMOneEighthElementsVectorType<0>,
LLVMOneEighthElementsVectorType<0>,
LLVMOneEighthElementsVectorType<0>,
LLVMOneEighthElementsVectorType<0>,
LLVMOneEighthElementsVectorType<0>,
LLVMOneEighthElementsVectorType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;

//===-------------- Intrinsics to perform partial reduction ---------------===//

def int_experimental_vector_partial_reduce_add : DefaultAttrsIntrinsic<[LLVMMatchType<0>],
Expand Down
18 changes: 18 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8198,24 +8198,42 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::vector_interleave3:
visitVectorInterleave(I, 3);
return;
case Intrinsic::vector_interleave4:
visitVectorInterleave(I, 4);
return;
case Intrinsic::vector_interleave5:
visitVectorInterleave(I, 5);
return;
case Intrinsic::vector_interleave6:
visitVectorInterleave(I, 6);
return;
case Intrinsic::vector_interleave7:
visitVectorInterleave(I, 7);
return;
case Intrinsic::vector_interleave8:
visitVectorInterleave(I, 8);
return;
case Intrinsic::vector_deinterleave2:
visitVectorDeinterleave(I, 2);
return;
case Intrinsic::vector_deinterleave3:
visitVectorDeinterleave(I, 3);
return;
case Intrinsic::vector_deinterleave4:
visitVectorDeinterleave(I, 4);
return;
case Intrinsic::vector_deinterleave5:
visitVectorDeinterleave(I, 5);
return;
case Intrinsic::vector_deinterleave6:
visitVectorDeinterleave(I, 6);
return;
case Intrinsic::vector_deinterleave7:
visitVectorDeinterleave(I, 7);
return;
case Intrinsic::vector_deinterleave8:
visitVectorDeinterleave(I, 8);
return;
case Intrinsic::experimental_vector_compress:
setValue(&I, DAG.getNode(ISD::VECTOR_COMPRESS, sdl,
getValue(I.getArgOperand(0)).getValueType(),
Expand Down
28 changes: 26 additions & 2 deletions llvm/lib/IR/Intrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -378,18 +378,36 @@ DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
IITDescriptor::get(IITDescriptor::OneThirdVecArgument, ArgInfo));
return;
}
case IIT_ONE_FOURTH_VEC_ARG: {
unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
OutputTable.push_back(
IITDescriptor::get(IITDescriptor::OneFourthVecArgument, ArgInfo));
return;
}
case IIT_ONE_FIFTH_VEC_ARG: {
unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
OutputTable.push_back(
IITDescriptor::get(IITDescriptor::OneFifthVecArgument, ArgInfo));
return;
}
case IIT_ONE_SIXTH_VEC_ARG: {
unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
OutputTable.push_back(
IITDescriptor::get(IITDescriptor::OneSixthVecArgument, ArgInfo));
return;
}
case IIT_ONE_SEVENTH_VEC_ARG: {
unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
OutputTable.push_back(
IITDescriptor::get(IITDescriptor::OneSeventhVecArgument, ArgInfo));
return;
}
case IIT_ONE_EIGHTH_VEC_ARG: {
unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
OutputTable.push_back(
IITDescriptor::get(IITDescriptor::OneEighthVecArgument, ArgInfo));
return;
}
case IIT_SAME_VEC_WIDTH_ARG: {
unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
OutputTable.push_back(
Expand Down Expand Up @@ -584,11 +602,14 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
return VectorType::getHalfElementsVectorType(
cast<VectorType>(Tys[D.getArgumentNumber()]));
case IITDescriptor::OneThirdVecArgument:
case IITDescriptor::OneFourthVecArgument:
case IITDescriptor::OneFifthVecArgument:
case IITDescriptor::OneSixthVecArgument:
case IITDescriptor::OneSeventhVecArgument:
case IITDescriptor::OneEighthVecArgument:
return VectorType::getOneNthElementsVectorType(
cast<VectorType>(Tys[D.getArgumentNumber()]),
3 + (D.Kind - IITDescriptor::OneThirdVecArgument) * 2);
3 + (D.Kind - IITDescriptor::OneThirdVecArgument));
case IITDescriptor::SameVecWidthArgument: {
Type *EltTy = DecodeFixedType(Infos, Tys, Context);
Type *Ty = Tys[D.getArgumentNumber()];
Expand Down Expand Up @@ -974,15 +995,18 @@ matchIntrinsicType(Type *Ty, ArrayRef<Intrinsic::IITDescriptor> &Infos,
VectorType::getHalfElementsVectorType(
cast<VectorType>(ArgTys[D.getArgumentNumber()])) != Ty;
case IITDescriptor::OneThirdVecArgument:
case IITDescriptor::OneFourthVecArgument:
case IITDescriptor::OneFifthVecArgument:
case IITDescriptor::OneSixthVecArgument:
case IITDescriptor::OneSeventhVecArgument:
case IITDescriptor::OneEighthVecArgument:
// If this is a forward reference, defer the check for later.
if (D.getArgumentNumber() >= ArgTys.size())
return IsDeferredCheck || DeferCheck(Ty);
return !isa<VectorType>(ArgTys[D.getArgumentNumber()]) ||
VectorType::getOneNthElementsVectorType(
cast<VectorType>(ArgTys[D.getArgumentNumber()]),
3 + (D.Kind - IITDescriptor::OneThirdVecArgument) * 2) != Ty;
3 + (D.Kind - IITDescriptor::OneThirdVecArgument)) != Ty;
case IITDescriptor::SameVecWidthArgument: {
if (D.getArgumentNumber() >= ArgTys.size()) {
// Defer check and subsequent check for the vector element type.
Expand Down
Loading