@@ -9990,31 +9990,6 @@ Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
9990
9990
SmallVectorImpl<Value*> &Ops,
9991
9991
unsigned IntID) {
9992
9992
llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
9993
-
9994
- unsigned N;
9995
- switch (IntID) {
9996
- case Intrinsic::aarch64_sve_ld2_sret:
9997
- case Intrinsic::aarch64_sve_ld1_pn_x2:
9998
- case Intrinsic::aarch64_sve_ldnt1_pn_x2:
9999
- case Intrinsic::aarch64_sve_ld2q_sret:
10000
- N = 2;
10001
- break;
10002
- case Intrinsic::aarch64_sve_ld3_sret:
10003
- case Intrinsic::aarch64_sve_ld3q_sret:
10004
- N = 3;
10005
- break;
10006
- case Intrinsic::aarch64_sve_ld4_sret:
10007
- case Intrinsic::aarch64_sve_ld1_pn_x4:
10008
- case Intrinsic::aarch64_sve_ldnt1_pn_x4:
10009
- case Intrinsic::aarch64_sve_ld4q_sret:
10010
- N = 4;
10011
- break;
10012
- default:
10013
- llvm_unreachable("unknown intrinsic!");
10014
- }
10015
- auto RetTy = llvm::VectorType::get(VTy->getElementType(),
10016
- VTy->getElementCount() * N);
10017
-
10018
9993
Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10019
9994
Value *BasePtr = Ops[1];
10020
9995
@@ -10023,15 +9998,7 @@ Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
10023
9998
BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10024
9999
10025
10000
Function *F = CGM.getIntrinsic(IntID, {VTy});
10026
- Value *Call = Builder.CreateCall(F, {Predicate, BasePtr});
10027
- unsigned MinElts = VTy->getMinNumElements();
10028
- Value *Ret = llvm::PoisonValue::get(RetTy);
10029
- for (unsigned I = 0; I < N; I++) {
10030
- Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10031
- Value *SRet = Builder.CreateExtractValue(Call, I);
10032
- Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
10033
- }
10034
- return Ret;
10001
+ return Builder.CreateCall(F, {Predicate, BasePtr});
10035
10002
}
10036
10003
10037
10004
Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags,
@@ -10304,6 +10271,19 @@ Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
10304
10271
// view (when storing/reloading), whereas the svreinterpret builtin
10305
10272
// implements bitwise equivalent cast from register point of view.
10306
10273
// LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
10274
+
10275
+ if (auto *StructTy = dyn_cast<StructType>(Ty)) {
10276
+ Value *Tuple = llvm::PoisonValue::get(Ty);
10277
+
10278
+ for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
10279
+ Value *In = Builder.CreateExtractValue(Val, I);
10280
+ Value *Out = Builder.CreateBitCast(In, StructTy->getTypeAtIndex(I));
10281
+ Tuple = Builder.CreateInsertValue(Tuple, Out, I);
10282
+ }
10283
+
10284
+ return Tuple;
10285
+ }
10286
+
10307
10287
return Builder.CreateBitCast(Val, Ty);
10308
10288
}
10309
10289
@@ -10346,44 +10326,26 @@ CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags,
10346
10326
}
10347
10327
10348
10328
Value *CodeGenFunction::EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags,
10349
- llvm::Type *Ty,
10350
10329
ArrayRef<Value *> Ops) {
10351
10330
assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
10352
10331
"Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
10353
-
10354
- unsigned I = cast<ConstantInt>(Ops[1])->getSExtValue();
10355
- auto *SingleVecTy = dyn_cast<llvm::ScalableVectorType>(
10356
- TypeFlags.isTupleSet() ? Ops[2]->getType() : Ty);
10357
-
10358
- if (!SingleVecTy)
10359
- return nullptr;
10360
-
10361
- Value *Idx = ConstantInt::get(CGM.Int64Ty,
10362
- I * SingleVecTy->getMinNumElements());
10332
+ unsigned Idx = cast<ConstantInt>(Ops[1])->getZExtValue();
10363
10333
10364
10334
if (TypeFlags.isTupleSet())
10365
- return Builder.CreateInsertVector(Ty, Ops[0], Ops[2], Idx);
10366
- return Builder.CreateExtractVector(Ty, Ops[0], Idx);
10335
+ return Builder.CreateInsertValue( Ops[0], Ops[2], Idx);
10336
+ return Builder.CreateExtractValue( Ops[0], Idx);
10367
10337
}
10368
10338
10369
10339
Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags,
10370
- llvm::Type *Ty,
10371
- ArrayRef<Value *> Ops) {
10340
+ llvm::Type *Ty,
10341
+ ArrayRef<Value *> Ops) {
10372
10342
assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
10373
10343
10374
- auto *SrcTy = dyn_cast<llvm::ScalableVectorType>(Ops[0]->getType());
10375
-
10376
- if (!SrcTy)
10377
- return nullptr;
10344
+ Value *Tuple = llvm::PoisonValue::get(Ty);
10345
+ for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
10346
+ Tuple = Builder.CreateInsertValue(Tuple, Ops[Idx], Idx);
10378
10347
10379
- unsigned MinElts = SrcTy->getMinNumElements();
10380
- Value *Call = llvm::PoisonValue::get(Ty);
10381
- for (unsigned I = 0; I < Ops.size(); I++) {
10382
- Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10383
- Call = Builder.CreateInsertVector(Ty, Call, Ops[I], Idx);
10384
- }
10385
-
10386
- return Call;
10348
+ return Tuple;
10387
10349
}
10388
10350
10389
10351
Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) {
@@ -10453,27 +10415,14 @@ void CodeGenFunction::GetAArch64SVEProcessedOperands(
10453
10415
continue;
10454
10416
}
10455
10417
10456
- if (IsTupleGetOrSet || !isa<ScalableVectorType>(Arg->getType())) {
10457
- Ops.push_back(Arg);
10458
- continue;
10459
- }
10418
+ if (isa<StructType>(Arg->getType()) && !IsTupleGetOrSet) {
10419
+ for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
10420
+ Ops.push_back(Builder.CreateExtractValue(Arg, I));
10460
10421
10461
- auto *VTy = cast<ScalableVectorType>(Arg->getType());
10462
- unsigned MinElts = VTy->getMinNumElements();
10463
- bool IsPred = VTy->getElementType()->isIntegerTy(1);
10464
- unsigned N = (MinElts * VTy->getScalarSizeInBits()) / (IsPred ? 16 : 128);
10465
-
10466
- if (N == 1) {
10467
- Ops.push_back(Arg);
10468
10422
continue;
10469
10423
}
10470
10424
10471
- for (unsigned I = 0; I < N; ++I) {
10472
- Value *Idx = ConstantInt::get(CGM.Int64Ty, (I * MinElts) / N);
10473
- auto *NewVTy =
10474
- ScalableVectorType::get(VTy->getElementType(), MinElts / N);
10475
- Ops.push_back(Builder.CreateExtractVector(NewVTy, Arg, Idx));
10476
- }
10425
+ Ops.push_back(Arg);
10477
10426
}
10478
10427
}
10479
10428
@@ -10511,7 +10460,7 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
10511
10460
else if (TypeFlags.isStructStore())
10512
10461
return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10513
10462
else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
10514
- return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops);
10463
+ return EmitSVETupleSetOrGet(TypeFlags, Ops);
10515
10464
else if (TypeFlags.isTupleCreate())
10516
10465
return EmitSVETupleCreate(TypeFlags, Ty, Ops);
10517
10466
else if (TypeFlags.isUndef())
0 commit comments