Skip to content

Commit 9c8c31e

Browse files
author
Mateja Marjanovic
committed
Revert "[AMDGPU] Trim zero components from buffer and image stores"
This reverts commit 3181a6e.
1 parent 38d3c6c commit 9c8c31e

File tree

4 files changed

+41
-232
lines changed

4 files changed

+41
-232
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -872,12 +872,10 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
872872

873873
defm int_amdgcn_image_store : AMDGPUImageDimIntrinsicsAll<
874874
"STORE", [], [AMDGPUArg<llvm_anyfloat_ty, "vdata">],
875-
[IntrWriteMem, IntrWillReturn], [SDNPMemOperand]>,
876-
AMDGPUImageDMaskIntrinsic;
875+
[IntrWriteMem, IntrWillReturn], [SDNPMemOperand]>;
877876
defm int_amdgcn_image_store_mip : AMDGPUImageDimIntrinsicsNoMsaa<
878877
"STORE_MIP", [], [AMDGPUArg<llvm_anyfloat_ty, "vdata">],
879-
[IntrWriteMem, IntrWillReturn], [SDNPMemOperand], 1>,
880-
AMDGPUImageDMaskIntrinsic;
878+
[IntrWriteMem, IntrWillReturn], [SDNPMemOperand], 1>;
881879

882880
//////////////////////////////////////////////////////////////////////////
883881
// MSAA intrinsics

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

Lines changed: 23 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -376,36 +376,6 @@ static bool matchFPExtFromF16(Value *Arg, Value *&FPExtSrc) {
376376
return false;
377377
}
378378

379-
// Trim all zero components from the end of the vector \p UseV and return
380-
// an appropriate bitset with known elements.
381-
static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV,
382-
Instruction *I) {
383-
auto *VTy = cast<FixedVectorType>(UseV->getType());
384-
unsigned VWidth = VTy->getNumElements();
385-
APInt DemandedElts = APInt::getAllOnes(VWidth);
386-
387-
for (int i = VWidth - 1; i >= 0; --i) {
388-
APInt DemandOneElt = APInt::getOneBitSet(VWidth, i);
389-
KnownFPClass KnownFPClass =
390-
computeKnownFPClass(UseV, DemandOneElt, IC.getDataLayout(),
391-
/*InterestedClasses=*/fcAllFlags,
392-
/*Depth=*/0, &IC.getTargetLibraryInfo(),
393-
&IC.getAssumptionCache(), I,
394-
&IC.getDominatorTree(),
395-
&IC.getOptimizationRemarkEmitter());
396-
if (KnownFPClass.KnownFPClasses != fcPosZero)
397-
break;
398-
DemandedElts.clearBit(i);
399-
}
400-
return DemandedElts;
401-
}
402-
403-
static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
404-
IntrinsicInst &II,
405-
APInt DemandedElts,
406-
int DMaskIdx = -1,
407-
bool IsLoad = true);
408-
409379
std::optional<Instruction *>
410380
GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
411381
Intrinsic::ID IID = II.getIntrinsicID();
@@ -1120,65 +1090,26 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
11201090
return IC.replaceInstUsesWith(II, ConstantInt::getFalse(II.getType()));
11211091
break;
11221092
}
1123-
case Intrinsic::amdgcn_buffer_store:
1124-
case Intrinsic::amdgcn_buffer_store_format:
1125-
case Intrinsic::amdgcn_raw_buffer_store:
1126-
case Intrinsic::amdgcn_raw_buffer_store_format:
1127-
case Intrinsic::amdgcn_raw_tbuffer_store:
1128-
case Intrinsic::amdgcn_struct_buffer_store:
1129-
case Intrinsic::amdgcn_struct_buffer_store_format:
1130-
case Intrinsic::amdgcn_struct_tbuffer_store:
1131-
case Intrinsic::amdgcn_tbuffer_store:
1132-
case Intrinsic::amdgcn_image_store_1d:
1133-
case Intrinsic::amdgcn_image_store_1darray:
1134-
case Intrinsic::amdgcn_image_store_2d:
1135-
case Intrinsic::amdgcn_image_store_2darray:
1136-
case Intrinsic::amdgcn_image_store_2darraymsaa:
1137-
case Intrinsic::amdgcn_image_store_2dmsaa:
1138-
case Intrinsic::amdgcn_image_store_3d:
1139-
case Intrinsic::amdgcn_image_store_cube:
1140-
case Intrinsic::amdgcn_image_store_mip_1d:
1141-
case Intrinsic::amdgcn_image_store_mip_1darray:
1142-
case Intrinsic::amdgcn_image_store_mip_2d:
1143-
case Intrinsic::amdgcn_image_store_mip_2darray:
1144-
case Intrinsic::amdgcn_image_store_mip_3d:
1145-
case Intrinsic::amdgcn_image_store_mip_cube: {
1146-
if (!isa<FixedVectorType>(II.getArgOperand(0)->getType()))
1147-
break;
1148-
1149-
APInt DemandedElts =
1150-
trimTrailingZerosInVector(IC, II.getArgOperand(0), &II);
1151-
1152-
int DMaskIdx = getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID()) ? 1 : -1;
1153-
if (simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts, DMaskIdx,
1154-
false)) {
1155-
return IC.eraseInstFromFunction(II);
1093+
default: {
1094+
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
1095+
AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) {
1096+
return simplifyAMDGCNImageIntrinsic(ST, ImageDimIntr, II, IC);
11561097
}
1157-
1158-
break;
1159-
}
11601098
}
1161-
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
1162-
AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) {
1163-
return simplifyAMDGCNImageIntrinsic(ST, ImageDimIntr, II, IC);
11641099
}
11651100
return std::nullopt;
11661101
}
11671102

11681103
/// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
11691104
///
1170-
/// The result of simplifying amdgcn image and buffer store intrinsics is updating
1171-
/// definitions of the intrinsics vector argument, not Uses of the result like
1172-
/// image and buffer loads.
11731105
/// Note: This only supports non-TFE/LWE image intrinsic calls; those have
11741106
/// struct returns.
11751107
static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
11761108
IntrinsicInst &II,
11771109
APInt DemandedElts,
1178-
int DMaskIdx, bool IsLoad) {
1110+
int DMaskIdx = -1) {
11791111

1180-
auto *IIVTy = cast<FixedVectorType>(IsLoad ? II.getType()
1181-
: II.getOperand(0)->getType());
1112+
auto *IIVTy = cast<FixedVectorType>(II.getType());
11821113
unsigned VWidth = IIVTy->getNumElements();
11831114
if (VWidth == 1)
11841115
return nullptr;
@@ -1249,13 +1180,13 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
12491180
DemandedElts &= (1 << llvm::popcount(DMaskVal)) - 1;
12501181

12511182
unsigned NewDMaskVal = 0;
1252-
unsigned OrigLdStIdx = 0;
1183+
unsigned OrigLoadIdx = 0;
12531184
for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
12541185
const unsigned Bit = 1 << SrcIdx;
12551186
if (!!(DMaskVal & Bit)) {
1256-
if (!!DemandedElts[OrigLdStIdx])
1187+
if (!!DemandedElts[OrigLoadIdx])
12571188
NewDMaskVal |= Bit;
1258-
OrigLdStIdx++;
1189+
OrigLoadIdx++;
12591190
}
12601191
}
12611192

@@ -1283,45 +1214,29 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
12831214
(NewNumElts == 1) ? EltTy : FixedVectorType::get(EltTy, NewNumElts);
12841215
OverloadTys[0] = NewTy;
12851216

1286-
if (!IsLoad) {
1287-
SmallVector<int, 8> EltMask;
1288-
for (unsigned OrigStoreIdx = 0; OrigStoreIdx < VWidth; ++OrigStoreIdx)
1289-
if (DemandedElts[OrigStoreIdx])
1290-
EltMask.push_back(OrigStoreIdx);
1291-
1292-
if (NewNumElts == 1)
1293-
Args[0] = IC.Builder.CreateExtractElement(II.getOperand(0), EltMask[0]);
1294-
else
1295-
Args[0] = IC.Builder.CreateShuffleVector(II.getOperand(0), EltMask);
1296-
}
1297-
12981217
Function *NewIntrin = Intrinsic::getDeclaration(
12991218
II.getModule(), II.getIntrinsicID(), OverloadTys);
13001219
CallInst *NewCall = IC.Builder.CreateCall(NewIntrin, Args);
13011220
NewCall->takeName(&II);
13021221
NewCall->copyMetadata(II);
13031222

1304-
if (IsLoad) {
1305-
if (NewNumElts == 1) {
1306-
return IC.Builder.CreateInsertElement(UndefValue::get(IIVTy), NewCall,
1307-
DemandedElts.countr_zero());
1308-
}
1309-
1310-
SmallVector<int, 8> EltMask;
1311-
unsigned NewLoadIdx = 0;
1312-
for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
1313-
if (!!DemandedElts[OrigLoadIdx])
1314-
EltMask.push_back(NewLoadIdx++);
1315-
else
1316-
EltMask.push_back(NewNumElts);
1317-
}
1318-
1319-
auto *Shuffle = IC.Builder.CreateShuffleVector(NewCall, EltMask);
1223+
if (NewNumElts == 1) {
1224+
return IC.Builder.CreateInsertElement(UndefValue::get(IIVTy), NewCall,
1225+
DemandedElts.countr_zero());
1226+
}
13201227

1321-
return Shuffle;
1228+
SmallVector<int, 8> EltMask;
1229+
unsigned NewLoadIdx = 0;
1230+
for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
1231+
if (!!DemandedElts[OrigLoadIdx])
1232+
EltMask.push_back(NewLoadIdx++);
1233+
else
1234+
EltMask.push_back(NewNumElts);
13221235
}
13231236

1324-
return NewCall;
1237+
Value *Shuffle = IC.Builder.CreateShuffleVector(NewCall, EltMask);
1238+
1239+
return Shuffle;
13251240
}
13261241

13271242
std::optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(

llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ define double @test_constant_fold_rcp_f64_43() nounwind {
6666

6767
define float @test_constant_fold_rcp_f32_43_strictfp() nounwind strictfp {
6868
; CHECK-LABEL: @test_constant_fold_rcp_f32_43_strictfp(
69-
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) #[[ATTR13:[0-9]+]]
69+
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) #[[ATTR14:[0-9]+]]
7070
; CHECK-NEXT: ret float [[VAL]]
7171
;
7272
%val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) strictfp nounwind readnone
@@ -107,7 +107,7 @@ define double @test_constant_fold_sqrt_f64_undef() nounwind {
107107

108108
define half @test_constant_fold_sqrt_f16_0() nounwind {
109109
; CHECK-LABEL: @test_constant_fold_sqrt_f16_0(
110-
; CHECK-NEXT: [[VAL:%.*]] = call half @llvm.amdgcn.sqrt.f16(half 0xH0000) #[[ATTR14:[0-9]+]]
110+
; CHECK-NEXT: [[VAL:%.*]] = call half @llvm.amdgcn.sqrt.f16(half 0xH0000) #[[ATTR15:[0-9]+]]
111111
; CHECK-NEXT: ret half [[VAL]]
112112
;
113113
%val = call half @llvm.amdgcn.sqrt.f16(half 0.0) nounwind readnone
@@ -116,7 +116,7 @@ define half @test_constant_fold_sqrt_f16_0() nounwind {
116116

117117
define float @test_constant_fold_sqrt_f32_0() nounwind {
118118
; CHECK-LABEL: @test_constant_fold_sqrt_f32_0(
119-
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 0.000000e+00) #[[ATTR14]]
119+
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 0.000000e+00) #[[ATTR15]]
120120
; CHECK-NEXT: ret float [[VAL]]
121121
;
122122
%val = call float @llvm.amdgcn.sqrt.f32(float 0.0) nounwind readnone
@@ -125,7 +125,7 @@ define float @test_constant_fold_sqrt_f32_0() nounwind {
125125

126126
define double @test_constant_fold_sqrt_f64_0() nounwind {
127127
; CHECK-LABEL: @test_constant_fold_sqrt_f64_0(
128-
; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double 0.000000e+00) #[[ATTR14]]
128+
; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double 0.000000e+00) #[[ATTR15]]
129129
; CHECK-NEXT: ret double [[VAL]]
130130
;
131131
%val = call double @llvm.amdgcn.sqrt.f64(double 0.0) nounwind readnone
@@ -134,7 +134,7 @@ define double @test_constant_fold_sqrt_f64_0() nounwind {
134134

135135
define half @test_constant_fold_sqrt_f16_neg0() nounwind {
136136
; CHECK-LABEL: @test_constant_fold_sqrt_f16_neg0(
137-
; CHECK-NEXT: [[VAL:%.*]] = call half @llvm.amdgcn.sqrt.f16(half 0xH8000) #[[ATTR14]]
137+
; CHECK-NEXT: [[VAL:%.*]] = call half @llvm.amdgcn.sqrt.f16(half 0xH8000) #[[ATTR15]]
138138
; CHECK-NEXT: ret half [[VAL]]
139139
;
140140
%val = call half @llvm.amdgcn.sqrt.f16(half -0.0) nounwind readnone
@@ -143,7 +143,7 @@ define half @test_constant_fold_sqrt_f16_neg0() nounwind {
143143

144144
define float @test_constant_fold_sqrt_f32_neg0() nounwind {
145145
; CHECK-LABEL: @test_constant_fold_sqrt_f32_neg0(
146-
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float -0.000000e+00) #[[ATTR14]]
146+
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float -0.000000e+00) #[[ATTR15]]
147147
; CHECK-NEXT: ret float [[VAL]]
148148
;
149149
%val = call float @llvm.amdgcn.sqrt.f32(float -0.0) nounwind readnone
@@ -152,7 +152,7 @@ define float @test_constant_fold_sqrt_f32_neg0() nounwind {
152152

153153
define double @test_constant_fold_sqrt_f64_neg0() nounwind {
154154
; CHECK-LABEL: @test_constant_fold_sqrt_f64_neg0(
155-
; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double -0.000000e+00) #[[ATTR14]]
155+
; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double -0.000000e+00) #[[ATTR15]]
156156
; CHECK-NEXT: ret double [[VAL]]
157157
;
158158
%val = call double @llvm.amdgcn.sqrt.f64(double -0.0) nounwind readnone
@@ -644,7 +644,7 @@ define i1 @test_class_isnan_f32(float %x) nounwind {
644644

645645
define i1 @test_class_isnan_f32_strict(float %x) nounwind {
646646
; CHECK-LABEL: @test_class_isnan_f32_strict(
647-
; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.amdgcn.class.f32(float [[X:%.*]], i32 3) #[[ATTR15:[0-9]+]]
647+
; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.amdgcn.class.f32(float [[X:%.*]], i32 3) #[[ATTR16:[0-9]+]]
648648
; CHECK-NEXT: ret i1 [[VAL]]
649649
;
650650
%val = call i1 @llvm.amdgcn.class.f32(float %x, i32 3) strictfp
@@ -662,7 +662,7 @@ define i1 @test_class_is_p0_n0_f32(float %x) nounwind {
662662

663663
define i1 @test_class_is_p0_n0_f32_strict(float %x) nounwind {
664664
; CHECK-LABEL: @test_class_is_p0_n0_f32_strict(
665-
; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.amdgcn.class.f32(float [[X:%.*]], i32 96) #[[ATTR15]]
665+
; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.amdgcn.class.f32(float [[X:%.*]], i32 96) #[[ATTR16]]
666666
; CHECK-NEXT: ret i1 [[VAL]]
667667
;
668668
%val = call i1 @llvm.amdgcn.class.f32(float %x, i32 96) strictfp
@@ -1275,8 +1275,8 @@ define i32 @ubfe_offset_0_width_0(i32 %src) {
12751275

12761276
define i32 @ubfe_offset_0_width_3(i32 %src) {
12771277
; CHECK-LABEL: @ubfe_offset_0_width_3(
1278-
; CHECK-NEXT: [[BFE:%.*]] = and i32 [[SRC:%.*]], 7
1279-
; CHECK-NEXT: ret i32 [[BFE]]
1278+
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[SRC:%.*]], 7
1279+
; CHECK-NEXT: ret i32 [[TMP1]]
12801280
;
12811281
%bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 3)
12821282
ret i32 %bfe
@@ -1793,7 +1793,7 @@ define i64 @icmp_constant_inputs_false() {
17931793

17941794
define i64 @icmp_constant_inputs_true() {
17951795
; CHECK-LABEL: @icmp_constant_inputs_true(
1796-
; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0:![0-9]+]]) #[[ATTR16:[0-9]+]]
1796+
; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0:![0-9]+]]) #[[ATTR17:[0-9]+]]
17971797
; CHECK-NEXT: ret i64 [[RESULT]]
17981798
;
17991799
%result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 34)
@@ -2500,7 +2500,7 @@ define i64 @fcmp_constant_inputs_false() {
25002500

25012501
define i64 @fcmp_constant_inputs_true() {
25022502
; CHECK-LABEL: @fcmp_constant_inputs_true(
2503-
; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR16]]
2503+
; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR17]]
25042504
; CHECK-NEXT: ret i64 [[RESULT]]
25052505
;
25062506
%result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 4)
@@ -2542,7 +2542,7 @@ define i64 @ballot_zero_64() {
25422542

25432543
define i64 @ballot_one_64() {
25442544
; CHECK-LABEL: @ballot_one_64(
2545-
; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR16]]
2545+
; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR17]]
25462546
; CHECK-NEXT: ret i64 [[B]]
25472547
;
25482548
%b = call i64 @llvm.amdgcn.ballot.i64(i1 1)
@@ -2568,7 +2568,7 @@ define i32 @ballot_zero_32() {
25682568

25692569
define i32 @ballot_one_32() {
25702570
; CHECK-LABEL: @ballot_one_32(
2571-
; CHECK-NEXT: [[B:%.*]] = call i32 @llvm.read_register.i32(metadata [[META1:![0-9]+]]) #[[ATTR16]]
2571+
; CHECK-NEXT: [[B:%.*]] = call i32 @llvm.read_register.i32(metadata [[META1:![0-9]+]]) #[[ATTR17]]
25722572
; CHECK-NEXT: ret i32 [[B]]
25732573
;
25742574
%b = call i32 @llvm.amdgcn.ballot.i32(i1 1)
@@ -5586,7 +5586,7 @@ define double @trig_preop_constfold() {
55865586

55875587
define double @trig_preop_constfold_strictfp() {
55885588
; CHECK-LABEL: @trig_preop_constfold_strictfp(
5589-
; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 5) #[[ATTR15]]
5589+
; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 5) #[[ATTR16]]
55905590
; CHECK-NEXT: ret double [[VAL]]
55915591
;
55925592
%val = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 5) strictfp

0 commit comments

Comments
 (0)