From abb772f574a65c7467b96bb69743b8efbf9fe645 Mon Sep 17 00:00:00 2001 From: Jerry Mannil <65309407+jerrymannil@users.noreply.github.com> Date: Wed, 15 Oct 2025 11:24:09 -0700 Subject: [PATCH] [ROCm] Adjust grid size for non-unit stride backwards indexing cherry-pick of https://github.com/pytorch/pytorch/commit/01a2812f48e719f86f6da1c5cfd7a56df7619ac9 --- aten/src/ATen/native/cuda/Indexing.cu | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/aten/src/ATen/native/cuda/Indexing.cu b/aten/src/ATen/native/cuda/Indexing.cu index 39ffc1abcddb5..20b268b733adc 100644 --- a/aten/src/ATen/native/cuda/Indexing.cu +++ b/aten/src/ATen/native/cuda/Indexing.cu @@ -716,6 +716,9 @@ void index_put_with_sort_kernel(Tensor & self, const c10::List(at::cuda::getCurrentDeviceProperties()->maxGridSize[1], ceil_div(sliceSize, (int64_t) (warp_size))) : grid.y, + grid.z); dim3 new_grid(ceil_div(num_indices, (int64_t) (indices_per_block * warp_size)), grid.y, grid.z); size_t smem_dups_size = indices_per_block * warp_size * sizeof(int64_t); #define KERNEL_GRID new_grid @@ -794,7 +797,7 @@ void index_put_with_sort_kernel(Tensor & self, const c10::List<<>>( + indexing_backward_kernel_many_indices<<>>( sorted_indices.const_data_ptr(), orig_indices.const_data_ptr(), expandedValue.const_data_ptr(),