Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions hipamd/include/hip/amd_detail/hip_api_trace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -671,12 +671,14 @@ typedef hipError_t (*t_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor)(
typedef hipError_t (*t_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags)(
int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags);
typedef hipError_t (*t_hipModuleOccupancyMaxPotentialBlockSize)(int* gridSize, int* blockSize,
hipFunction_t f,
hipFunction_t f,
hipOccupancyB2DSize_t
blkSizeToDynSMemSize,
size_t dynSharedMemPerBlk,
int blockSizeLimit);
typedef hipError_t (*t_hipModuleOccupancyMaxPotentialBlockSizeWithFlags)(
int* gridSize, int* blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, int blockSizeLimit,
unsigned int flags);
int* gridSize, int* blockSize, hipFunction_t f, hipOccupancyB2DSize_t blkSizeToDynSMemSize,
size_t dynSharedMemPerBlk, int blockSizeLimit, unsigned int flags);
typedef hipError_t (*t_hipModuleUnload)(hipModule_t module);
typedef hipError_t (*t_hipOccupancyMaxActiveBlocksPerMultiprocessor)(int* numBlocks, const void* f,
int blockSize,
Expand Down
8 changes: 5 additions & 3 deletions hipamd/src/hip_api_trace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -553,11 +553,13 @@ hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, hi
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags);
hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, hipFunction_t f,
hipOccupancyB2DSize_t blkSizeToDynSMemSize,
size_t dynSharedMemPerBlk, int blockSizeLimit);
hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize,
hipFunction_t f,
size_t dynSharedMemPerBlk,
int blockSizeLimit, unsigned int flags);
hipFunction_t f, hipOccupancyB2DSize_t
blkSizeToDynSMemSize, size_t
dynSharedMemPerBlk, int blockSizeLimit,
unsigned int flags);
hipError_t hipModuleUnload(hipModule_t module);
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, const void* f,
int blockSize, size_t dynSharedMemPerBlk);
Expand Down
2 changes: 1 addition & 1 deletion hipamd/src/hip_module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ hipError_t ihipLaunchKernel_validate(hipFunction_t f, const amd::LaunchParams& l
int block_size = launch_params.local_.product();
hipError_t err = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
&num_blocks, &max_blocks_per_grid, &best_block_size, *device, f, block_size,
launch_params.sharedMemBytes_, true);
launch_params.sharedMemBytes_, true, nullptr);
if (err != hipSuccess) {
return err;
}
Expand Down
27 changes: 17 additions & 10 deletions hipamd/src/hip_platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,8 @@ hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memor
namespace hip_impl {
hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
int* maxBlocksPerCU, int* numBlocksPerGrid, int* bestBlockSize, const amd::Device& device,
hipFunction_t func, int inputBlockSize, size_t dynamicSMemSize, bool bCalcPotentialBlkSz) {
hipFunction_t func, int inputBlockSize, size_t dynamicSMemSize, bool bCalcPotentialBlkSz,
hipOccupancyB2DSize_t blockSizeToDynamicSMemSize) {
hip::DeviceFunc* function = hip::DeviceFunc::asFunction(func);
const amd::Kernel& kernel = *function->kernel();

Expand Down Expand Up @@ -420,7 +421,11 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
const int alu_limited_threads = alu_occupancy * wrkGrpInfo->wavefrontSize_;

int lds_occupancy_wgs = INT_MAX;
const size_t total_used_lds = wrkGrpInfo->usedLDSSize_ + dynamicSMemSize;
size_t dynamicSMemSizeFinal = dynamicSMemSize;
if (blockSizeToDynamicSMemSize != nullptr) {
dynamicSMemSizeFinal = (*blockSizeToDynamicSMemSize)(inputBlockSize);
}
const size_t total_used_lds = wrkGrpInfo->usedLDSSize_ + dynamicSMemSizeFinal;
if (total_used_lds != 0) {
lds_occupancy_wgs = static_cast<int>(device.info().localMemSize_ / total_used_lds);
}
Expand Down Expand Up @@ -475,7 +480,7 @@ hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, cons
int best_block_size = 0;
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
&num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSizeLimit,
dynSharedMemPerBlk, true);
dynSharedMemPerBlk, true, nullptr);
if (ret == hipSuccess) {
*blockSize = best_block_size;
*gridSize = max_blocks_per_grid;
Expand All @@ -484,6 +489,7 @@ hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, cons
}

hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, hipFunction_t f,
hipOccupancyB2DSize_t blockSizeToDynamicSMemSize,
size_t dynSharedMemPerBlk, int blockSizeLimit) {
HIP_INIT_API(hipModuleOccupancyMaxPotentialBlockSize, f, dynSharedMemPerBlk, blockSizeLimit);
if ((gridSize == nullptr) || (blockSize == nullptr) || (f == nullptr)) {
Expand All @@ -495,7 +501,7 @@ hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize
int best_block_size = 0;
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSizeLimit,
dynSharedMemPerBlk, true);
dynSharedMemPerBlk, true, blockSizeToDynamicSMemSize);
if (ret == hipSuccess) {
*blockSize = best_block_size;
*gridSize = max_blocks_per_grid;
Expand All @@ -504,7 +510,8 @@ hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize
}

hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize,
hipFunction_t f,
hipFunction_t f, hipOccupancyB2DSize_t
blockSizeToDynamicSMemSize,
size_t dynSharedMemPerBlk,
int blockSizeLimit,
unsigned int flags) {
Expand All @@ -522,7 +529,7 @@ hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int*
int best_block_size = 0;
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSizeLimit,
dynSharedMemPerBlk, true);
dynSharedMemPerBlk, true, blockSizeToDynamicSMemSize);
if (ret == hipSuccess) {
*blockSize = best_block_size;
*gridSize = max_blocks_per_grid;
Expand All @@ -545,7 +552,7 @@ hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, hi
int best_block_size = 0;
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSize, dynSharedMemPerBlk,
false);
false, nullptr);
*numBlocks = num_blocks;
HIP_RETURN(ret);
}
Expand All @@ -567,7 +574,7 @@ hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
int best_block_size = 0;
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSize, dynSharedMemPerBlk,
false);
false, nullptr);
*numBlocks = num_blocks;
HIP_RETURN(ret);
}
Expand All @@ -592,7 +599,7 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, const vo
int best_block_size = 0;
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
&num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSize, dynamicSMemSize,
false);
false, nullptr);
*numBlocks = num_blocks;
HIP_RETURN(ret);
}
Expand Down Expand Up @@ -622,7 +629,7 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks,
int best_block_size = 0;
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
&num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSize, dynamicSMemSize,
false);
false, nullptr);
*numBlocks = num_blocks;
HIP_RETURN(ret);
}
Expand Down
3 changes: 2 additions & 1 deletion hipamd/src/hip_platform.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ namespace hip_impl {

hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
int* maxBlocksPerCU, int* numBlocksPerGrid, int* bestBlockSize, const amd::Device& device,
hipFunction_t func, int inputBlockSize, size_t dynamicSMemSize, bool bCalcPotentialBlkSz);
hipFunction_t func, int inputBlockSize, size_t dynamicSMemSize, bool bCalcPotentialBlkSz,
hipOccupancyB2DSize_t blockSizeToDynamicSMemSize);
} // namespace hip_impl

// Unique file descriptor class
Expand Down
11 changes: 6 additions & 5 deletions hipamd/src/hip_table_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1298,17 +1298,18 @@ extern "C" hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlag
numBlocks, f, blockSize, dynSharedMemPerBlk, flags);
}
extern "C" hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
hipFunction_t f,
hipFunction_t f, hipOccupancyB2DSize_t
blkSizeToDynSMemSize,
size_t dynSharedMemPerBlk,
int blockSizeLimit) {
return hip::GetHipDispatchTable()->hipModuleOccupancyMaxPotentialBlockSize_fn(
gridSize, blockSize, f, dynSharedMemPerBlk, blockSizeLimit);
gridSize, blockSize, f, blkSizeToDynSMemSize, dynSharedMemPerBlk, blockSizeLimit);
}
extern "C" hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(
int* gridSize, int* blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, int blockSizeLimit,
unsigned int flags) {
int* gridSize, int* blockSize, hipFunction_t f, hipOccupancyB2DSize_t blkSizeToDynSMemSize,
size_t dynSharedMemPerBlk, int blockSizeLimit, unsigned int flags) {
return hip::GetHipDispatchTable()->hipModuleOccupancyMaxPotentialBlockSizeWithFlags_fn(
gridSize, blockSize, f, dynSharedMemPerBlk, blockSizeLimit, flags);
gridSize, blockSize, f, blkSizeToDynSMemSize, dynSharedMemPerBlk, blockSizeLimit, flags);
}
hipError_t hipModuleUnload(hipModule_t module) {
return hip::GetHipDispatchTable()->hipModuleUnload_fn(module);
Expand Down
Loading