@@ -376,36 +376,6 @@ static bool matchFPExtFromF16(Value *Arg, Value *&FPExtSrc) {
376
376
return false ;
377
377
}
378
378
379
- // Trim all zero components from the end of the vector \p UseV and return
380
- // an appropriate bitset with known elements.
381
- static APInt trimTrailingZerosInVector (InstCombiner &IC, Value *UseV,
382
- Instruction *I) {
383
- auto *VTy = cast<FixedVectorType>(UseV->getType ());
384
- unsigned VWidth = VTy->getNumElements ();
385
- APInt DemandedElts = APInt::getAllOnes (VWidth);
386
-
387
- for (int i = VWidth - 1 ; i >= 0 ; --i) {
388
- APInt DemandOneElt = APInt::getOneBitSet (VWidth, i);
389
- KnownFPClass KnownFPClass =
390
- computeKnownFPClass (UseV, DemandOneElt, IC.getDataLayout (),
391
- /* InterestedClasses=*/ fcAllFlags,
392
- /* Depth=*/ 0 , &IC.getTargetLibraryInfo (),
393
- &IC.getAssumptionCache (), I,
394
- &IC.getDominatorTree (),
395
- &IC.getOptimizationRemarkEmitter ());
396
- if (KnownFPClass.KnownFPClasses != fcPosZero)
397
- break ;
398
- DemandedElts.clearBit (i);
399
- }
400
- return DemandedElts;
401
- }
402
-
403
- static Value *simplifyAMDGCNMemoryIntrinsicDemanded (InstCombiner &IC,
404
- IntrinsicInst &II,
405
- APInt DemandedElts,
406
- int DMaskIdx = -1 ,
407
- bool IsLoad = true );
408
-
409
379
std::optional<Instruction *>
410
380
GCNTTIImpl::instCombineIntrinsic (InstCombiner &IC, IntrinsicInst &II) const {
411
381
Intrinsic::ID IID = II.getIntrinsicID ();
@@ -1120,65 +1090,26 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
1120
1090
return IC.replaceInstUsesWith (II, ConstantInt::getFalse (II.getType ()));
1121
1091
break ;
1122
1092
}
1123
- case Intrinsic::amdgcn_buffer_store:
1124
- case Intrinsic::amdgcn_buffer_store_format:
1125
- case Intrinsic::amdgcn_raw_buffer_store:
1126
- case Intrinsic::amdgcn_raw_buffer_store_format:
1127
- case Intrinsic::amdgcn_raw_tbuffer_store:
1128
- case Intrinsic::amdgcn_struct_buffer_store:
1129
- case Intrinsic::amdgcn_struct_buffer_store_format:
1130
- case Intrinsic::amdgcn_struct_tbuffer_store:
1131
- case Intrinsic::amdgcn_tbuffer_store:
1132
- case Intrinsic::amdgcn_image_store_1d:
1133
- case Intrinsic::amdgcn_image_store_1darray:
1134
- case Intrinsic::amdgcn_image_store_2d:
1135
- case Intrinsic::amdgcn_image_store_2darray:
1136
- case Intrinsic::amdgcn_image_store_2darraymsaa:
1137
- case Intrinsic::amdgcn_image_store_2dmsaa:
1138
- case Intrinsic::amdgcn_image_store_3d:
1139
- case Intrinsic::amdgcn_image_store_cube:
1140
- case Intrinsic::amdgcn_image_store_mip_1d:
1141
- case Intrinsic::amdgcn_image_store_mip_1darray:
1142
- case Intrinsic::amdgcn_image_store_mip_2d:
1143
- case Intrinsic::amdgcn_image_store_mip_2darray:
1144
- case Intrinsic::amdgcn_image_store_mip_3d:
1145
- case Intrinsic::amdgcn_image_store_mip_cube: {
1146
- if (!isa<FixedVectorType>(II.getArgOperand (0 )->getType ()))
1147
- break ;
1148
-
1149
- APInt DemandedElts =
1150
- trimTrailingZerosInVector (IC, II.getArgOperand (0 ), &II);
1151
-
1152
- int DMaskIdx = getAMDGPUImageDMaskIntrinsic (II.getIntrinsicID ()) ? 1 : -1 ;
1153
- if (simplifyAMDGCNMemoryIntrinsicDemanded (IC, II, DemandedElts, DMaskIdx,
1154
- false )) {
1155
- return IC.eraseInstFromFunction (II);
1093
+ default : {
1094
+ if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
1095
+ AMDGPU::getImageDimIntrinsicInfo (II.getIntrinsicID ())) {
1096
+ return simplifyAMDGCNImageIntrinsic (ST, ImageDimIntr, II, IC);
1156
1097
}
1157
-
1158
- break ;
1159
- }
1160
1098
}
1161
- if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
1162
- AMDGPU::getImageDimIntrinsicInfo (II.getIntrinsicID ())) {
1163
- return simplifyAMDGCNImageIntrinsic (ST, ImageDimIntr, II, IC);
1164
1099
}
1165
1100
return std::nullopt;
1166
1101
}
1167
1102
1168
1103
// / Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
1169
1104
// /
1170
- // / The result of simplifying amdgcn image and buffer store intrinsics is updating
1171
- // / definitions of the intrinsics vector argument, not Uses of the result like
1172
- // / image and buffer loads.
1173
1105
// / Note: This only supports non-TFE/LWE image intrinsic calls; those have
1174
1106
// / struct returns.
1175
1107
static Value *simplifyAMDGCNMemoryIntrinsicDemanded (InstCombiner &IC,
1176
1108
IntrinsicInst &II,
1177
1109
APInt DemandedElts,
1178
- int DMaskIdx, bool IsLoad ) {
1110
+ int DMaskIdx = - 1 ) {
1179
1111
1180
- auto *IIVTy = cast<FixedVectorType>(IsLoad ? II.getType ()
1181
- : II.getOperand (0 )->getType ());
1112
+ auto *IIVTy = cast<FixedVectorType>(II.getType ());
1182
1113
unsigned VWidth = IIVTy->getNumElements ();
1183
1114
if (VWidth == 1 )
1184
1115
return nullptr ;
@@ -1249,13 +1180,13 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
1249
1180
DemandedElts &= (1 << llvm::popcount (DMaskVal)) - 1 ;
1250
1181
1251
1182
unsigned NewDMaskVal = 0 ;
1252
- unsigned OrigLdStIdx = 0 ;
1183
+ unsigned OrigLoadIdx = 0 ;
1253
1184
for (unsigned SrcIdx = 0 ; SrcIdx < 4 ; ++SrcIdx) {
1254
1185
const unsigned Bit = 1 << SrcIdx;
1255
1186
if (!!(DMaskVal & Bit)) {
1256
- if (!!DemandedElts[OrigLdStIdx ])
1187
+ if (!!DemandedElts[OrigLoadIdx ])
1257
1188
NewDMaskVal |= Bit;
1258
- OrigLdStIdx ++;
1189
+ OrigLoadIdx ++;
1259
1190
}
1260
1191
}
1261
1192
@@ -1283,45 +1214,29 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
1283
1214
(NewNumElts == 1 ) ? EltTy : FixedVectorType::get (EltTy, NewNumElts);
1284
1215
OverloadTys[0 ] = NewTy;
1285
1216
1286
- if (!IsLoad) {
1287
- SmallVector<int , 8 > EltMask;
1288
- for (unsigned OrigStoreIdx = 0 ; OrigStoreIdx < VWidth; ++OrigStoreIdx)
1289
- if (DemandedElts[OrigStoreIdx])
1290
- EltMask.push_back (OrigStoreIdx);
1291
-
1292
- if (NewNumElts == 1 )
1293
- Args[0 ] = IC.Builder .CreateExtractElement (II.getOperand (0 ), EltMask[0 ]);
1294
- else
1295
- Args[0 ] = IC.Builder .CreateShuffleVector (II.getOperand (0 ), EltMask);
1296
- }
1297
-
1298
1217
Function *NewIntrin = Intrinsic::getDeclaration (
1299
1218
II.getModule (), II.getIntrinsicID (), OverloadTys);
1300
1219
CallInst *NewCall = IC.Builder .CreateCall (NewIntrin, Args);
1301
1220
NewCall->takeName (&II);
1302
1221
NewCall->copyMetadata (II);
1303
1222
1304
- if (IsLoad) {
1305
- if (NewNumElts == 1 ) {
1306
- return IC.Builder .CreateInsertElement (UndefValue::get (IIVTy), NewCall,
1307
- DemandedElts.countr_zero ());
1308
- }
1309
-
1310
- SmallVector<int , 8 > EltMask;
1311
- unsigned NewLoadIdx = 0 ;
1312
- for (unsigned OrigLoadIdx = 0 ; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
1313
- if (!!DemandedElts[OrigLoadIdx])
1314
- EltMask.push_back (NewLoadIdx++);
1315
- else
1316
- EltMask.push_back (NewNumElts);
1317
- }
1318
-
1319
- auto *Shuffle = IC.Builder .CreateShuffleVector (NewCall, EltMask);
1223
+ if (NewNumElts == 1 ) {
1224
+ return IC.Builder .CreateInsertElement (UndefValue::get (IIVTy), NewCall,
1225
+ DemandedElts.countr_zero ());
1226
+ }
1320
1227
1321
- return Shuffle;
1228
+ SmallVector<int , 8 > EltMask;
1229
+ unsigned NewLoadIdx = 0 ;
1230
+ for (unsigned OrigLoadIdx = 0 ; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
1231
+ if (!!DemandedElts[OrigLoadIdx])
1232
+ EltMask.push_back (NewLoadIdx++);
1233
+ else
1234
+ EltMask.push_back (NewNumElts);
1322
1235
}
1323
1236
1324
- return NewCall;
1237
+ Value *Shuffle = IC.Builder .CreateShuffleVector (NewCall, EltMask);
1238
+
1239
+ return Shuffle;
1325
1240
}
1326
1241
1327
1242
std::optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic (
0 commit comments