-
Notifications
You must be signed in to change notification settings - Fork 13.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Fix typing error in multi dimensional promote alloca #131763
Merged
Conversation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-backend-amdgpu Author: Carl Ritson (perlfu) ChangesFix type error when GEP uses i64 index introduced in #127973. Full diff: https://github.com/llvm/llvm-project/pull/131763.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 2e3fcdc3d3d7f..361bbf355632e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -444,29 +444,30 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
if (VarOffsets.size() > 1)
return nullptr;
- APInt Quot;
+ APInt IndexQuot;
uint64_t Rem;
- APInt::udivrem(ConstOffset, VecElemSize, Quot, Rem);
+ APInt::udivrem(ConstOffset, VecElemSize, IndexQuot, Rem);
if (Rem != 0)
return nullptr;
-
- ConstantInt *ConstIndex = ConstantInt::get(GEP->getContext(), Quot);
if (VarOffsets.size() == 0)
- return ConstIndex;
+ return ConstantInt::get(GEP->getContext(), IndexQuot);
IRBuilder<> Builder(GEP);
const auto &VarOffset = VarOffsets.front();
- APInt::udivrem(VarOffset.second, VecElemSize, Quot, Rem);
- if (Rem != 0 || Quot.isZero())
+ APInt OffsetQuot;
+ APInt::udivrem(VarOffset.second, VecElemSize, OffsetQuot, Rem);
+ if (Rem != 0 || OffsetQuot.isZero())
return nullptr;
Value *Offset = VarOffset.first;
- if (!Quot.isOne()) {
- auto *OffsetType = dyn_cast<IntegerType>(Offset->getType());
- if (!OffsetType)
- return nullptr;
- ConstantInt *ConstMul = ConstantInt::get(OffsetType, Quot.getZExtValue());
+ auto *OffsetType = dyn_cast<IntegerType>(Offset->getType());
+ if (!OffsetType)
+ return nullptr;
+
+ if (!OffsetQuot.isOne()) {
+ ConstantInt *ConstMul =
+ ConstantInt::get(OffsetType, OffsetQuot.getZExtValue());
Offset = Builder.CreateMul(Offset, ConstMul);
if (Instruction *NewInst = dyn_cast<Instruction>(Offset))
NewInsts.push_back(NewInst);
@@ -474,6 +475,8 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
if (ConstOffset.isZero())
return Offset;
+ ConstantInt *ConstIndex =
+ ConstantInt::get(OffsetType, IndexQuot.getZExtValue());
Value *IndexAdd = Builder.CreateAdd(ConstIndex, Offset);
if (Instruction *NewInst = dyn_cast<Instruction>(IndexAdd))
NewInsts.push_back(NewInst);
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll
index c5b9bff8b663a..55b0c39aaee39 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll
@@ -240,6 +240,49 @@ define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset(ptr %out) {
ret void
}
+define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset_index(ptr %out) {
+; CHECK-LABEL: define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset_index(
+; CHECK-SAME: ptr [[OUT:%.*]]) {
+; CHECK-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3
+; CHECK-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3
+; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2
+; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]]
+; CHECK-NEXT: [[SEL3:%.*]] = zext i32 [[SEL2]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[SEL3]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 6, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <6 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5>, i64 [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <3 x i64> poison, i64 [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <6 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5>, i64 [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <3 x i64> [[TMP4]], i64 [[TMP6]], i64 1
+; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP2]], 2
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <6 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5>, i64 [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <3 x i64> [[TMP7]], i64 [[TMP9]], i64 2
+; CHECK-NEXT: [[ELEM:%.*]] = extractelement <3 x i64> [[TMP10]], i32 2
+; CHECK-NEXT: store i64 [[ELEM]], ptr [[OUT]], align 8
+; CHECK-NEXT: ret void
+;
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+ %c1 = icmp uge i32 %x, 3
+ %c2 = icmp uge i32 %y, 3
+ %sel1 = select i1 %c1, i32 1, i32 2
+ %sel2 = select i1 %c2, i32 0, i32 %sel1
+ %sel3 = zext i32 %sel2 to i64
+ %alloca = alloca [2 x [3 x i64]], align 16, addrspace(5)
+ %gep.00 = getelementptr inbounds [2 x [3 x i64]], ptr addrspace(5) %alloca, i32 0
+ %gep.01 = getelementptr inbounds [2 x [3 x i64]], ptr addrspace(5) %alloca, i32 0, i32 1, i32 0
+ store <3 x i64> <i64 0, i64 1, i64 2>, ptr addrspace(5) %gep.00
+ store <3 x i64> <i64 3, i64 4, i64 5>, ptr addrspace(5) %gep.01
+ %gep = getelementptr inbounds [2 x [3 x i64]], ptr addrspace(5) %alloca, i64 1, i64 %sel3
+ %load = load <3 x i64>, ptr addrspace(5) %gep
+ %elem = extractelement <3 x i64> %load, i32 2
+ store i64 %elem, ptr %out
+ ret void
+}
+
define amdgpu_kernel void @i64_2d_load_store_subvec_4(ptr %out) {
; CHECK-LABEL: define amdgpu_kernel void @i64_2d_load_store_subvec_4(
; CHECK-SAME: ptr [[OUT:%.*]]) {
|
arsenm
approved these changes
Mar 18, 2025
0d6d749
to
4657185
Compare
|
Fix type error when GEP uses i64 index introduced in llvm#127973.
4657185
to
4a784b3
Compare
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Fix type error when GEP uses i64 index introduced in #127973.