Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AMDGPU] Fix typing error in multi dimensional promote alloca #131763

Merged
merged 1 commit into from
Mar 18, 2025

Conversation

perlfu
Copy link
Contributor

@perlfu perlfu commented Mar 18, 2025

Fix type error when GEP uses i64 index introduced in #127973.

@llvmbot
Copy link
Member

llvmbot commented Mar 18, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Carl Ritson (perlfu)

Changes

Fix type error when GEP uses i64 index introduced in #127973.


Full diff: https://github.com/llvm/llvm-project/pull/131763.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp (+15-12)
  • (modified) llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll (+43)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 2e3fcdc3d3d7f..361bbf355632e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -444,29 +444,30 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
   if (VarOffsets.size() > 1)
     return nullptr;
 
-  APInt Quot;
+  APInt IndexQuot;
   uint64_t Rem;
-  APInt::udivrem(ConstOffset, VecElemSize, Quot, Rem);
+  APInt::udivrem(ConstOffset, VecElemSize, IndexQuot, Rem);
   if (Rem != 0)
     return nullptr;
-
-  ConstantInt *ConstIndex = ConstantInt::get(GEP->getContext(), Quot);
   if (VarOffsets.size() == 0)
-    return ConstIndex;
+    return ConstantInt::get(GEP->getContext(), IndexQuot);
 
   IRBuilder<> Builder(GEP);
 
   const auto &VarOffset = VarOffsets.front();
-  APInt::udivrem(VarOffset.second, VecElemSize, Quot, Rem);
-  if (Rem != 0 || Quot.isZero())
+  APInt OffsetQuot;
+  APInt::udivrem(VarOffset.second, VecElemSize, OffsetQuot, Rem);
+  if (Rem != 0 || OffsetQuot.isZero())
     return nullptr;
 
   Value *Offset = VarOffset.first;
-  if (!Quot.isOne()) {
-    auto *OffsetType = dyn_cast<IntegerType>(Offset->getType());
-    if (!OffsetType)
-      return nullptr;
-    ConstantInt *ConstMul = ConstantInt::get(OffsetType, Quot.getZExtValue());
+  auto *OffsetType = dyn_cast<IntegerType>(Offset->getType());
+  if (!OffsetType)
+    return nullptr;
+
+  if (!OffsetQuot.isOne()) {
+    ConstantInt *ConstMul =
+        ConstantInt::get(OffsetType, OffsetQuot.getZExtValue());
     Offset = Builder.CreateMul(Offset, ConstMul);
     if (Instruction *NewInst = dyn_cast<Instruction>(Offset))
       NewInsts.push_back(NewInst);
@@ -474,6 +475,8 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
   if (ConstOffset.isZero())
     return Offset;
 
+  ConstantInt *ConstIndex =
+      ConstantInt::get(OffsetType, IndexQuot.getZExtValue());
   Value *IndexAdd = Builder.CreateAdd(ConstIndex, Offset);
   if (Instruction *NewInst = dyn_cast<Instruction>(IndexAdd))
     NewInsts.push_back(NewInst);
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll
index c5b9bff8b663a..55b0c39aaee39 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll
@@ -240,6 +240,49 @@ define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset(ptr %out) {
   ret void
 }
 
+define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset_index(ptr %out) {
+; CHECK-LABEL: define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset_index(
+; CHECK-SAME: ptr [[OUT:%.*]]) {
+; CHECK-NEXT:    [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT:    [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT:    [[C1:%.*]] = icmp uge i32 [[X]], 3
+; CHECK-NEXT:    [[C2:%.*]] = icmp uge i32 [[Y]], 3
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2
+; CHECK-NEXT:    [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]]
+; CHECK-NEXT:    [[SEL3:%.*]] = zext i32 [[SEL2]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[SEL3]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 6, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <6 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5>, i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <3 x i64> poison, i64 [[TMP3]], i64 0
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <6 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5>, i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <3 x i64> [[TMP4]], i64 [[TMP6]], i64 1
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[TMP2]], 2
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <6 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5>, i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <3 x i64> [[TMP7]], i64 [[TMP9]], i64 2
+; CHECK-NEXT:    [[ELEM:%.*]] = extractelement <3 x i64> [[TMP10]], i32 2
+; CHECK-NEXT:    store i64 [[ELEM]], ptr [[OUT]], align 8
+; CHECK-NEXT:    ret void
+;
+  %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+  %c1 = icmp uge i32 %x, 3
+  %c2 = icmp uge i32 %y, 3
+  %sel1 = select i1 %c1, i32 1, i32 2
+  %sel2 = select i1 %c2, i32 0, i32 %sel1
+  %sel3 = zext i32 %sel2 to i64
+  %alloca = alloca [2 x [3 x i64]], align 16, addrspace(5)
+  %gep.00 = getelementptr inbounds [2 x [3 x i64]], ptr addrspace(5) %alloca, i32 0
+  %gep.01 = getelementptr inbounds [2 x [3 x i64]], ptr addrspace(5) %alloca, i32 0, i32 1, i32 0
+  store <3 x i64> <i64 0, i64 1, i64 2>, ptr addrspace(5) %gep.00
+  store <3 x i64> <i64 3, i64 4, i64 5>, ptr addrspace(5) %gep.01
+  %gep = getelementptr inbounds [2 x [3 x i64]], ptr addrspace(5) %alloca, i64 1, i64 %sel3
+  %load = load <3 x i64>, ptr addrspace(5) %gep
+  %elem = extractelement <3 x i64> %load, i32 2
+  store i64 %elem, ptr %out
+  ret void
+}
+
 define amdgpu_kernel void @i64_2d_load_store_subvec_4(ptr %out) {
 ; CHECK-LABEL: define amdgpu_kernel void @i64_2d_load_store_subvec_4(
 ; CHECK-SAME: ptr [[OUT:%.*]]) {

@perlfu perlfu force-pushed the amdgpu-fix-127973-2 branch from 0d6d749 to 4657185 Compare March 18, 2025 11:58
@perlfu
Copy link
Contributor Author

perlfu commented Mar 18, 2025

  • Rebased

Fix type error when GEP uses i64 index introduced in llvm#127973.
@perlfu perlfu force-pushed the amdgpu-fix-127973-2 branch from 4657185 to 4a784b3 Compare March 18, 2025 12:30
@perlfu perlfu merged commit 0e4116a into llvm:main Mar 18, 2025
11 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants