From 34fa14584e637484aa02b66394cdbdd1369d23ff Mon Sep 17 00:00:00 2001 From: Jerry Mannil <65309407+jerrymannil@users.noreply.github.com> Date: Tue, 14 Oct 2025 16:33:08 -0700 Subject: [PATCH] [ROCm] Adjust grid size for non-unit stride backwards indexing (#2710) cherry-pick of https://github.com/pytorch/pytorch/commit/01a2812f48e719f86f6da1c5cfd7a56df7619ac9 --- aten/src/ATen/native/cuda/Indexing.cu | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/aten/src/ATen/native/cuda/Indexing.cu b/aten/src/ATen/native/cuda/Indexing.cu index df43bfac16f7..dacef18c79b6 100644 --- a/aten/src/ATen/native/cuda/Indexing.cu +++ b/aten/src/ATen/native/cuda/Indexing.cu @@ -710,6 +710,9 @@ void index_put_with_sort_kernel(Tensor & self, const c10::List(at::cuda::getCurrentDeviceProperties()->maxGridSize[1], ceil_div(sliceSize, (int64_t) (warp_size))) : grid.y, + grid.z); dim3 new_grid(ceil_div(num_indices, (int64_t) (indices_per_block * warp_size)), grid.y, grid.z); size_t smem_dups_size = indices_per_block * warp_size * sizeof(int64_t); #define KERNEL_GRID new_grid @@ -788,7 +791,7 @@ void index_put_with_sort_kernel(Tensor & self, const c10::List<<>>( + indexing_backward_kernel_many_indices<<>>( sorted_indices.const_data_ptr(), orig_indices.const_data_ptr(), expandedValue.const_data_ptr(),