Skip to content

Commit 992a64a

Browse files
[Clang][SVE] Change LLVM representation of ACLE tuple types to be struct based. (llvm#108008)
This implements our original design now that LLVM is comfortable with structs and arrays of scalable vector types. All SVE ACLE intrinsics already use struct types so the effect of this change is purely the types used for alloca and function parameters. There should be no C/C++ user visible change with this patch.
1 parent 7574e1d commit 992a64a

File tree

108 files changed

+27788
-22510
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

108 files changed

+27788
-22510
lines changed

clang/include/clang/Basic/AArch64SVEACLETypes.def

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,8 @@
4646
//
4747
// - ElBits is the size of one element in bits.
4848
//
49-
// - NF enumerates the number of sub-vectors.
50-
// TODO: Tuple types are represented as a concatination of "NumEls x ElBits"
51-
// vectors. This will be changed to become a struct containing NF vectors.
49+
// - NF enumerates the number of vectors whereby 1 implies a single vector,
50+
// with other values implying a struct of NF "NumEls x NumEls" vectors.
5251
//
5352
// - IsSigned is true for vectors of signed integer elements and
5453
// for vectors of floating-point elements.

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 28 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -9990,31 +9990,6 @@ Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
99909990
SmallVectorImpl<Value*> &Ops,
99919991
unsigned IntID) {
99929992
llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
9993-
9994-
unsigned N;
9995-
switch (IntID) {
9996-
case Intrinsic::aarch64_sve_ld2_sret:
9997-
case Intrinsic::aarch64_sve_ld1_pn_x2:
9998-
case Intrinsic::aarch64_sve_ldnt1_pn_x2:
9999-
case Intrinsic::aarch64_sve_ld2q_sret:
10000-
N = 2;
10001-
break;
10002-
case Intrinsic::aarch64_sve_ld3_sret:
10003-
case Intrinsic::aarch64_sve_ld3q_sret:
10004-
N = 3;
10005-
break;
10006-
case Intrinsic::aarch64_sve_ld4_sret:
10007-
case Intrinsic::aarch64_sve_ld1_pn_x4:
10008-
case Intrinsic::aarch64_sve_ldnt1_pn_x4:
10009-
case Intrinsic::aarch64_sve_ld4q_sret:
10010-
N = 4;
10011-
break;
10012-
default:
10013-
llvm_unreachable("unknown intrinsic!");
10014-
}
10015-
auto RetTy = llvm::VectorType::get(VTy->getElementType(),
10016-
VTy->getElementCount() * N);
10017-
100189993
Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
100199994
Value *BasePtr = Ops[1];
100209995

@@ -10023,15 +9998,7 @@ Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
100239998
BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
100249999

1002510000
Function *F = CGM.getIntrinsic(IntID, {VTy});
10026-
Value *Call = Builder.CreateCall(F, {Predicate, BasePtr});
10027-
unsigned MinElts = VTy->getMinNumElements();
10028-
Value *Ret = llvm::PoisonValue::get(RetTy);
10029-
for (unsigned I = 0; I < N; I++) {
10030-
Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10031-
Value *SRet = Builder.CreateExtractValue(Call, I);
10032-
Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
10033-
}
10034-
return Ret;
10001+
return Builder.CreateCall(F, {Predicate, BasePtr});
1003510002
}
1003610003

1003710004
Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags,
@@ -10304,6 +10271,19 @@ Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
1030410271
// view (when storing/reloading), whereas the svreinterpret builtin
1030510272
// implements bitwise equivalent cast from register point of view.
1030610273
// LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
10274+
10275+
if (auto *StructTy = dyn_cast<StructType>(Ty)) {
10276+
Value *Tuple = llvm::PoisonValue::get(Ty);
10277+
10278+
for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
10279+
Value *In = Builder.CreateExtractValue(Val, I);
10280+
Value *Out = Builder.CreateBitCast(In, StructTy->getTypeAtIndex(I));
10281+
Tuple = Builder.CreateInsertValue(Tuple, Out, I);
10282+
}
10283+
10284+
return Tuple;
10285+
}
10286+
1030710287
return Builder.CreateBitCast(Val, Ty);
1030810288
}
1030910289

@@ -10346,44 +10326,26 @@ CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags,
1034610326
}
1034710327

1034810328
Value *CodeGenFunction::EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags,
10349-
llvm::Type *Ty,
1035010329
ArrayRef<Value *> Ops) {
1035110330
assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
1035210331
"Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
10353-
10354-
unsigned I = cast<ConstantInt>(Ops[1])->getSExtValue();
10355-
auto *SingleVecTy = dyn_cast<llvm::ScalableVectorType>(
10356-
TypeFlags.isTupleSet() ? Ops[2]->getType() : Ty);
10357-
10358-
if (!SingleVecTy)
10359-
return nullptr;
10360-
10361-
Value *Idx = ConstantInt::get(CGM.Int64Ty,
10362-
I * SingleVecTy->getMinNumElements());
10332+
unsigned Idx = cast<ConstantInt>(Ops[1])->getZExtValue();
1036310333

1036410334
if (TypeFlags.isTupleSet())
10365-
return Builder.CreateInsertVector(Ty, Ops[0], Ops[2], Idx);
10366-
return Builder.CreateExtractVector(Ty, Ops[0], Idx);
10335+
return Builder.CreateInsertValue(Ops[0], Ops[2], Idx);
10336+
return Builder.CreateExtractValue(Ops[0], Idx);
1036710337
}
1036810338

1036910339
Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags,
10370-
llvm::Type *Ty,
10371-
ArrayRef<Value *> Ops) {
10340+
llvm::Type *Ty,
10341+
ArrayRef<Value *> Ops) {
1037210342
assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
1037310343

10374-
auto *SrcTy = dyn_cast<llvm::ScalableVectorType>(Ops[0]->getType());
10375-
10376-
if (!SrcTy)
10377-
return nullptr;
10344+
Value *Tuple = llvm::PoisonValue::get(Ty);
10345+
for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
10346+
Tuple = Builder.CreateInsertValue(Tuple, Ops[Idx], Idx);
1037810347

10379-
unsigned MinElts = SrcTy->getMinNumElements();
10380-
Value *Call = llvm::PoisonValue::get(Ty);
10381-
for (unsigned I = 0; I < Ops.size(); I++) {
10382-
Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10383-
Call = Builder.CreateInsertVector(Ty, Call, Ops[I], Idx);
10384-
}
10385-
10386-
return Call;
10348+
return Tuple;
1038710349
}
1038810350

1038910351
Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) {
@@ -10453,27 +10415,14 @@ void CodeGenFunction::GetAArch64SVEProcessedOperands(
1045310415
continue;
1045410416
}
1045510417

10456-
if (IsTupleGetOrSet || !isa<ScalableVectorType>(Arg->getType())) {
10457-
Ops.push_back(Arg);
10458-
continue;
10459-
}
10418+
if (isa<StructType>(Arg->getType()) && !IsTupleGetOrSet) {
10419+
for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
10420+
Ops.push_back(Builder.CreateExtractValue(Arg, I));
1046010421

10461-
auto *VTy = cast<ScalableVectorType>(Arg->getType());
10462-
unsigned MinElts = VTy->getMinNumElements();
10463-
bool IsPred = VTy->getElementType()->isIntegerTy(1);
10464-
unsigned N = (MinElts * VTy->getScalarSizeInBits()) / (IsPred ? 16 : 128);
10465-
10466-
if (N == 1) {
10467-
Ops.push_back(Arg);
1046810422
continue;
1046910423
}
1047010424

10471-
for (unsigned I = 0; I < N; ++I) {
10472-
Value *Idx = ConstantInt::get(CGM.Int64Ty, (I * MinElts) / N);
10473-
auto *NewVTy =
10474-
ScalableVectorType::get(VTy->getElementType(), MinElts / N);
10475-
Ops.push_back(Builder.CreateExtractVector(NewVTy, Arg, Idx));
10476-
}
10425+
Ops.push_back(Arg);
1047710426
}
1047810427
}
1047910428

@@ -10511,7 +10460,7 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
1051110460
else if (TypeFlags.isStructStore())
1051210461
return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
1051310462
else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
10514-
return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops);
10463+
return EmitSVETupleSetOrGet(TypeFlags, Ops);
1051510464
else if (TypeFlags.isTupleCreate())
1051610465
return EmitSVETupleCreate(TypeFlags, Ty, Ops);
1051710466
else if (TypeFlags.isUndef())

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4628,7 +4628,6 @@ class CodeGenFunction : public CodeGenTypeCache {
46284628
llvm::ScalableVectorType *getSVEType(const SVETypeFlags &TypeFlags);
46294629
llvm::ScalableVectorType *getSVEPredType(const SVETypeFlags &TypeFlags);
46304630
llvm::Value *EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags,
4631-
llvm::Type *ReturnType,
46324631
ArrayRef<llvm::Value *> Ops);
46334632
llvm::Value *EmitSVETupleCreate(const SVETypeFlags &TypeFlags,
46344633
llvm::Type *ReturnType,

clang/lib/CodeGen/CodeGenTypes.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -509,9 +509,20 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
509509
{
510510
ASTContext::BuiltinVectorTypeInfo Info =
511511
Context.getBuiltinVectorTypeInfo(cast<BuiltinType>(Ty));
512-
return llvm::ScalableVectorType::get(ConvertType(Info.ElementType),
513-
Info.EC.getKnownMinValue() *
514-
Info.NumVectors);
512+
auto VTy =
513+
llvm::VectorType::get(ConvertType(Info.ElementType), Info.EC);
514+
switch (Info.NumVectors) {
515+
default:
516+
llvm_unreachable("Expected 1, 2, 3 or 4 vectors!");
517+
case 1:
518+
return VTy;
519+
case 2:
520+
return llvm::StructType::get(VTy, VTy);
521+
case 3:
522+
return llvm::StructType::get(VTy, VTy, VTy);
523+
case 4:
524+
return llvm::StructType::get(VTy, VTy, VTy, VTy);
525+
}
515526
}
516527
case BuiltinType::SveCount:
517528
return llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");

0 commit comments

Comments
 (0)