Skip to content
This repository has been archived by the owner on Jan 26, 2024. It is now read-only.

Commit

Permalink
SWDEV-301947 - Use new enum for CU count
Browse files Browse the repository at this point in the history
Use HSA_AMD_AGENT_INFO_COOPERATIVE_COMPUTE_UNIT_COUNT to get compute
units. This is needed to work around assymentric CU harvesting bug on
gfx90a. Add a new device property to get the max available CUs on the
device.

Change-Id: I878f38f14f16c1af01fc0a77157aea1e816a63b8
  • Loading branch information
saleelk committed Feb 1, 2022
1 parent 100dd37 commit 319ab3e
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 3 deletions.
5 changes: 4 additions & 1 deletion device/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,9 +234,12 @@ struct Info : public amd::EmbeddedObject {
//! A unique device vendor identifier.
uint32_t vendorId_;

//! The number of parallel compute cores on the compute device.
//! The available number of parallel compute cores on the compute device.
uint32_t maxComputeUnits_;

//! The max number of parallel compute cores on the compute device.
uint32_t maxBoostComputeUnits_;

//! Maximum dimensions that specify the global and local work-item IDs
// used by the data-parallel execution model.
uint32_t maxWorkItemDimensions_;
Expand Down
1 change: 1 addition & 0 deletions device/gpu/gpudevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,7 @@ void NullDevice::fillDeviceInfo(const CALdeviceattribs& calAttr, const gslMemInf
info_.type_ = CL_DEVICE_TYPE_GPU;
info_.vendorId_ = 0x1002;
info_.maxComputeUnits_ = calAttr.numberOfSIMD;
info_.maxBoostComputeUnits_ = calAttr.numberOfSIMD;
info_.maxWorkItemDimensions_ = 3;
info_.numberOfShaderEngines = calAttr.numberOfShaderEngines;

Expand Down
2 changes: 1 addition & 1 deletion device/pal/paldevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
info_.maxComputeUnits_ = settings().enableWgpMode_
? palProp.gfxipProperties.shaderCore.numAvailableCus / 2
: palProp.gfxipProperties.shaderCore.numAvailableCus;

info_.maxBoostComputeUnits_ = info_.maxComputeUnits_;
info_.numberOfShaderEngines = palProp.gfxipProperties.shaderCore.numShaderEngines;

// SI parts are scalar. Also, reads don't need to be 128-bits to get peak rates.
Expand Down
14 changes: 13 additions & 1 deletion device/rocm/rocdevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1075,7 +1075,8 @@ bool Device::populateOCLDeviceConstants() {
}

if (HSA_STATUS_SUCCESS !=
hsa_agent_get_info(_bkendDevice, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT,
hsa_agent_get_info(_bkendDevice,
(hsa_agent_info_t)HSA_AMD_AGENT_INFO_COOPERATIVE_COMPUTE_UNIT_COUNT,
&info_.maxComputeUnits_)) {
return false;
}
Expand All @@ -1085,6 +1086,17 @@ bool Device::populateOCLDeviceConstants() {
? info_.maxComputeUnits_ / 2
: info_.maxComputeUnits_;

if (HSA_STATUS_SUCCESS !=
hsa_agent_get_info(_bkendDevice, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT,
&info_.maxBoostComputeUnits_)) {
return false;
}
assert(info_.maxBoostComputeUnits_ > 0);

info_.maxBoostComputeUnits_ = settings().enableWgpMode_
? info_.maxBoostComputeUnits_ / 2
: info_.maxBoostComputeUnits_;

if (HSA_STATUS_SUCCESS != hsa_agent_get_info(_bkendDevice,
(hsa_agent_info_t)HSA_AMD_AGENT_INFO_CACHELINE_SIZE,
&info_.globalMemCacheLineSize_)) {
Expand Down

0 comments on commit 319ab3e

Please sign in to comment.