Skip to content

Commit

Permalink
bdfid fix for partition & xgmi nodes
Browse files Browse the repository at this point in the history
* Updates:
    - [API] After discovering all amd gpus, we now properly
      map correct bdf (xgmi nodes). Especially important for
      partition changes - aka secondary nodes.
    - [API] While adding new secondary nodes we now have
      better grouping -> due to resorting based on
      kfd properties list & matching to primary uniqueid
    - [API] All secondary nodes are now AddToDeviceList
      with correct bdf (location id), provided by kfd
    - [API] Modified AddToDeviceList(..., uint64_t bdfid):
      providing an optional field - bdfid. This allows working
      around primary pcie cards with xgmi nodes
    - [API] Utils - cpplint minor fixes
    - [Example] Removed all endl references w/ newline, fixed
      spacing, and some incorrect values displaying as hex
      (needed dec representation)
    - [API] kfd node functions - now print full path of file
      for trace logs
    - [Tests] power_read.cc: Added in generic power test to
      confirm guaranteeing specific return values

Change-Id: I143474e8d64c4915a966e789be6bcea4fa7f4472
Signed-off-by: Charis Poag <Charis.Poag@amd.com>
  • Loading branch information
charis-poag-amd authored and dmitrii-galantsev committed Oct 14, 2023
1 parent 2a7589a commit 6f1afd2
Show file tree
Hide file tree
Showing 6 changed files with 137 additions and 33 deletions.
2 changes: 1 addition & 1 deletion include/rocm_smi/rocm_smi_main.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ class RocmSMI {
std::map<std::pair<uint32_t, uint32_t>, std::shared_ptr<IOLink>>
io_link_map_;
std::map<uint32_t, uint32_t> dev_ind_to_node_ind_map_;
void AddToDeviceList(std::string dev_name);
void AddToDeviceList(std::string dev_name, uint64_t bdfid = 0);
void GetEnvVariables(void);
std::shared_ptr<Monitor> FindMonitor(std::string monitor_path);

Expand Down
23 changes: 12 additions & 11 deletions rocm_smi/example/rocm_smi_example.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@
#define PRINT_RSMI_ERR(RET) { \
if (RET != RSMI_STATUS_SUCCESS) { \
std::cout << "[ERROR] RSMI call returned " << (RET) \
<< " at line " << __LINE__ << std::endl; \
std::cout << amd::smi::getRSMIStatusString(RET) << std::endl; \
<< " at line " << __LINE__ << "\n"; \
std::cout << amd::smi::getRSMIStatusString(RET) << "\n"; \
} \
}

Expand Down Expand Up @@ -718,7 +718,7 @@ int main() {

rsmi_num_monitor_devices(&num_monitor_devs);
for (uint32_t i = 0; i < num_monitor_devs; ++i) {
std::cout << "\t**Device #: " << std::dec << i << std::endl;
std::cout << "\t**Device #: " << std::dec << i << "\n";
ret = rsmi_dev_id_get(i, &val_ui16);
CHK_RSMI_RET_I(ret)
std::cout << "\t**Device ID: 0x" << std::hex << val_ui16 << "\n";
Expand Down Expand Up @@ -765,8 +765,9 @@ int main() {
uint64_t max_bandwidth = 0;
ret = rsmi_minmax_bandwidth_get(0, i, &min_bandwidth, &max_bandwidth);
CHK_RSMI_NOT_SUPPORTED_OR_UNEXPECTED_DATA_RET(ret)
std::cout << "\nMinimum Bandwidth: " << min_bandwidth
<< "\nMaximum Bandwidth: " << max_bandwidth;
std::cout << "\n\t**\tMinimum Bandwidth: " << std::dec << min_bandwidth
<< "\n\t**\tMaximum Bandwidth: " << std::dec
<< max_bandwidth << "\n";
} else {
std::cout << "Not Supported\n";
}
Expand Down Expand Up @@ -813,15 +814,15 @@ int main() {
ret = rsmi_dev_temp_metric_get(i, RSMI_TEMP_TYPE_EDGE,
rsmi_temperature_metric_t::RSMI_TEMP_CURRENT, &val_i64);
if (ret == RSMI_STATUS_SUCCESS) {
std::cout << val_i64/1000 << "C" << "\n";
std::cout << std::dec << val_i64/1000 << " C" << "\n";
}
CHK_RSMI_NOT_SUPPORTED_RET(ret)

std::cout << "\t**Temperature (junction): ";
ret = rsmi_dev_temp_metric_get(i, RSMI_TEMP_TYPE_JUNCTION,
rsmi_temperature_metric_t::RSMI_TEMP_CURRENT, &val_i64);
if (ret == RSMI_STATUS_SUCCESS) {
std::cout << (val_i64 / 1000) << "C" << std::endl;
std::cout << std::dec << (val_i64 / 1000) << " C" << "\n";
}
CHK_RSMI_NOT_SUPPORTED_RET(ret)

Expand Down Expand Up @@ -869,22 +870,22 @@ int main() {
std::cout << "\t**Average Power Usage: ";
ret = rsmi_dev_power_ave_get(i, 0, &val_ui64);
if (ret == RSMI_STATUS_SUCCESS) {
std::cout << convert_mw_to_w(val_ui64) << " W" << std::endl;
std::cout << convert_mw_to_w(val_ui64) << " W" << "\n";
}
CHK_RSMI_NOT_SUPPORTED_RET(ret)

std::cout << "\t**Current Socket Power Usage: ";
ret = rsmi_dev_current_socket_power_get(i, &val_ui64);
if (ret == RSMI_STATUS_SUCCESS) {
std::cout << convert_mw_to_w(val_ui64) << " W" << std::endl;
std::cout << convert_mw_to_w(val_ui64) << " W" << "\n";
}
CHK_RSMI_NOT_SUPPORTED_RET(ret)

std::cout << "\t**Generic Power Usage: ";
ret = rsmi_dev_power_get(i, &val_ui64, &power_type);
if (ret == RSMI_STATUS_SUCCESS) {
std::cout << "[" << amd::smi::power_type_string(power_type) << "] "
<< convert_mw_to_w(val_ui64) << " W" << std::endl;
<< convert_mw_to_w(val_ui64) << " W" << "\n";
}
CHK_RSMI_NOT_SUPPORTED_RET(ret)
std::cout << "\t=======" << "\n";
Expand All @@ -897,7 +898,7 @@ int main() {
return 0;
}

for (uint32_t i = 0; i< num_monitor_devs; ++i) {
for (uint32_t i = 0; i < num_monitor_devs; ++i) {
ret = test_set_overdrive(i);
CHK_AND_PRINT_RSMI_ERR_RET(ret)

Expand Down
10 changes: 10 additions & 0 deletions src/rocm_smi_kfd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -890,9 +890,12 @@ int KFDNode::get_used_memory(uint64_t* used) {
int read_node_properties(uint32_t node, std::string property_name,
uint64_t *val) {
std::ostringstream ss;
std::string propertiesFullPath = "/sys/class/kfd/kfd/topology/nodes/"
+ std::to_string(node) + "/properties";
int retVal = EINVAL;
if (property_name.empty() || val == nullptr) {
ss << __PRETTY_FUNCTION__
<< " | File: " << propertiesFullPath
<< " | Issue: Could not read node #" << std::to_string(node)
<< ", property_name is empty or *val is nullptr "
<< " | return = " << std::to_string(retVal)
Expand All @@ -905,6 +908,7 @@ int read_node_properties(uint32_t node, std::string property_name,
if (KFDNodeSupported(node)) {
retVal = myNode->get_property_value(property_name, val);
ss << __PRETTY_FUNCTION__
<< " | File: " << propertiesFullPath
<< " | Successfully read node #" << std::to_string(node)
<< " for property_name = " << property_name
<< " | Data (" << property_name << ") * val = "
Expand All @@ -915,6 +919,7 @@ int read_node_properties(uint32_t node, std::string property_name,
} else {
retVal = 1;
ss << __PRETTY_FUNCTION__
<< " | File: " << propertiesFullPath
<< " | Issue: Could not read node #" << std::to_string(node)
<< ", KFD node was an unsupported node."
<< " | return = " << std::to_string(retVal)
Expand All @@ -927,9 +932,12 @@ int read_node_properties(uint32_t node, std::string property_name,
// /sys/class/kfd/kfd/topology/nodes/*/gpu_id
int get_gpu_id(uint32_t node, uint64_t *gpu_id) {
std::ostringstream ss;
std::string gpu_id_FullPath = "/sys/class/kfd/kfd/topology/nodes/"
+ std::to_string(node) + "/gpu_id";
int retVal = EINVAL;
if (gpu_id == nullptr) {
ss << __PRETTY_FUNCTION__
<< " | File: " << gpu_id_FullPath
<< " | Issue: Could not read node #" << std::to_string(node)
<< ", gpu_id is a nullptr "
<< " | return = " << std::to_string(retVal)
Expand All @@ -942,6 +950,7 @@ int get_gpu_id(uint32_t node, uint64_t *gpu_id) {
if (KFDNodeSupported(node)) {
retVal = ReadKFDGpuId(node, gpu_id);
ss << __PRETTY_FUNCTION__
<< " | File: " << gpu_id_FullPath
<< " | Successfully read node #" << std::to_string(node)
<< " for gpu_id"
<< " | Data (gpu_id) *gpu_id = "
Expand All @@ -952,6 +961,7 @@ int get_gpu_id(uint32_t node, uint64_t *gpu_id) {
} else {
retVal = 1;
ss << __PRETTY_FUNCTION__
<< " | File: " << gpu_id_FullPath
<< " | Issue: Could not read node #" << std::to_string(node)
<< ", KFD node was an unsupported node."
<< " | return = " << std::to_string(retVal)
Expand Down
127 changes: 109 additions & 18 deletions src/rocm_smi_main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,7 @@ RocmSMI::Initialize(uint64_t flags) {
auto i = 0;
uint32_t ret;
int i_ret;
std::ostringstream ss;

LOG_ALWAYS("=============== ROCM SMI initialize ================");
ROCmLogging::Logger::getInstance()->enableAllLogLevels();
Expand Down Expand Up @@ -355,9 +356,32 @@ RocmSMI::Initialize(uint64_t flags) {
if (ConstructBDFID(device->path(), &bdfid) != 0) {
std::cerr << "Failed to construct BDFID." << std::endl;
ret = 1;
} else if (device->bdfid() != UINT64_MAX && device->bdfid() != bdfid) {
// handles secondary partitions - compute partition feature nodes
ss << __PRETTY_FUNCTION__
<< " | [before] device->path() = " << device->path()
<< "\n | bdfid = " << bdfid
<< "\n | device->bdfid() = " << device->bdfid()
<< "\n | (xgmi node) setting to setting "
<< "device->set_bdfid(device->bdfid())";
LOG_TRACE(ss);
device->set_bdfid(device->bdfid());
} else {
// legacy & pcie card updates
ss << __PRETTY_FUNCTION__
<< " | [before] device->path() = " << device->path()
<< "\n | bdfid = " << bdfid
<< "\n | device->bdfid() = " << device->bdfid()
<< "\n | (legacy/pcie card) setting device->set_bdfid(bdfid)";
LOG_TRACE(ss);
device->set_bdfid(bdfid);
}
ss << __PRETTY_FUNCTION__
<< " | [after] device->path() = " << device->path()
<< "\n | bdfid = " << bdfid
<< "\n | device->bdfid() = " << device->bdfid()
<< "\n | final update: device->bdfid() holds correct device bdf";
LOG_TRACE(ss);
}
if (ret != 0) {
throw amd::smi::rsmi_exception(RSMI_INITIALIZATION_ERROR,
Expand Down Expand Up @@ -386,7 +410,6 @@ RocmSMI::Initialize(uint64_t flags) {

// Remove any drm nodes that don't have a corresponding readable kfd node.
// kfd nodes will not be added if their properties file is not readable.
std::ostringstream ss;
auto dev_iter = devices_.begin();
while (dev_iter != devices_.end()) {
uint64_t bdfid = (*dev_iter)->bdfid();
Expand Down Expand Up @@ -665,8 +688,8 @@ RocmSMI::FindMonitor(std::string monitor_path) {

return m;
}
void
RocmSMI::AddToDeviceList(std::string dev_name) {

void RocmSMI::AddToDeviceList(std::string dev_name, uint64_t bdfid) {
std::ostringstream ss;
ss << __PRETTY_FUNCTION__ << " | ======= start =======";
LOG_TRACE(ss);
Expand All @@ -684,10 +707,15 @@ RocmSMI::AddToDeviceList(std::string dev_name) {
dev->set_drm_render_minor(GetDrmRenderMinor(dev_path));
dev->set_card_index(card_indx);
GetSupportedEventGroups(card_indx, dev->supported_event_groups());
if (bdfid != 0) {
dev->set_bdfid(bdfid);
}

devices_.push_back(dev);
ss << __PRETTY_FUNCTION__ << " | Adding to device list dev_name = "
<< dev_name << " | path = " << dev_path
ss << __PRETTY_FUNCTION__
<< " | Adding to device list dev_name = " << dev_name
<< " | path = " << dev_path
<< " | bdfid = " << bdfid
<< " | card index = " << std::to_string(card_indx) << " | ";
LOG_DEBUG(ss);
}
Expand Down Expand Up @@ -768,19 +796,24 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) {
uint32_t s_node_id = 0;
uint64_t s_gpu_id = 0;
uint64_t s_unique_id = 0;
uint64_t s_location_id = 0;
};
// allSystemNodes[key = unique_id] => {node_id, gpu_id, unique_id}
// allSystemNodes[key = unique_id] => {node_id, gpu_id, unique_id,
// location_id}
std::multimap<uint64_t, systemNode> allSystemNodes;
uint32_t node_id = 0;
while (true) {
uint64_t gpu_id = 0, unique_id = 0;
uint64_t gpu_id = 0, unique_id = 0, location_id = 0;
int ret_gpu_id = get_gpu_id(node_id, &gpu_id);
int ret_unique_id = read_node_properties(node_id, "unique_id", &unique_id);
if (ret_gpu_id == 0 || ret_unique_id == 0) {
int ret_loc_id =
read_node_properties(node_id, "location_id", &location_id);
if (ret_gpu_id == 0 || ret_unique_id == 0 || ret_loc_id == 0) {
systemNode myNode;
myNode.s_node_id = node_id;
myNode.s_gpu_id = gpu_id;
myNode.s_unique_id = unique_id;
myNode.s_location_id = location_id;
if (gpu_id != 0) { // only add gpu nodes, 0 = CPU
allSystemNodes.emplace(unique_id, myNode);
}
Expand All @@ -795,6 +828,7 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) {
ss << "\n[node_id = " << std::to_string(i.second.s_node_id)
<< "; gpu_id = " << std::to_string(i.second.s_gpu_id)
<< "; unique_id = " << std::to_string(i.second.s_unique_id)
<< "; location_id = " << std::to_string(i.second.s_location_id)
<< "], ";
}
ss << "}";
Expand All @@ -807,27 +841,55 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) {
path += "/card";
path += std::to_string(cardId);
uint64_t primary_unique_id = 0;
uint64_t device_uuid = 0;
bool doesDeviceSupportPartitions = false;
// get current partition
int kSize = 256;
char computePartition[kSize];
std::string strCompPartition = "UNKNOWN";
uint32_t numMonDevices = 0;
rsmi_num_monitor_devices(&numMonDevices);

// each identified gpu card node is a primary node for
// potential matching unique ids
if (isAMDGPU(path) ||
(init_options_ & RSMI_INIT_FLAG_ALL_GPUS)) {
std::string d_name = "card";
d_name += std::to_string(cardId);
AddToDeviceList(d_name);
uint32_t numMonDevices = 0;
rsmi_num_monitor_devices(&numMonDevices);
if (rsmi_dev_compute_partition_get(cardAdded, computePartition, kSize)
== RSMI_STATUS_SUCCESS) {
strCompPartition = computePartition;
doesDeviceSupportPartitions = true;
}
rsmi_status_t ret_unique_id =
rsmi_dev_unique_id_get(cardAdded, &device_uuid);
auto temp_numb_nodes = allSystemNodes.count(device_uuid);
auto primaryBdfId =
allSystemNodes.lower_bound(device_uuid)->second.s_location_id;
if (doesDeviceSupportPartitions && temp_numb_nodes > 1
&& ret_unique_id == RSMI_STATUS_SUCCESS) {
// helps identify xgmi nodes (secondary nodes) easier
AddToDeviceList(d_name, primaryBdfId);
} else {
AddToDeviceList(d_name, UINT64_MAX);
}

ss << __PRETTY_FUNCTION__
<< " | Ordered system nodes seen in lookup = {";
for (auto i : allSystemNodes) {
ss << "\n[node_id = " << std::to_string(i.second.s_node_id)
<< "; gpu_id = " << std::to_string(i.second.s_gpu_id)
<< "; unique_id = " << std::to_string(i.second.s_unique_id)
<< "; location_id = " << std::to_string(i.second.s_location_id)
<< "], ";
}
ss << "}";
LOG_DEBUG(ss);

uint64_t temp_primary_unique_id = 0;
uint64_t primary_location_id = 0;
if (allSystemNodes.empty()) {
cardAdded++;
ss << __PRETTY_FUNCTION__
Expand All @@ -837,16 +899,11 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) {
}

// get current partition
const int kSize = 256;
char computePartition[kSize];
std::string strCompPartition = "UNKNOWN";
uint32_t numMonDevices = 0;
rsmi_num_monitor_devices(&numMonDevices);
if (rsmi_dev_compute_partition_get(cardAdded, computePartition, kSize)
== RSMI_STATUS_SUCCESS) {
strCompPartition = computePartition;
}
uint64_t device_uuid = 0;
if (rsmi_dev_unique_id_get(cardAdded, &device_uuid)
!= RSMI_STATUS_SUCCESS) {
cardAdded++;
Expand All @@ -860,7 +917,7 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) {

temp_primary_unique_id =
allSystemNodes.find(device_uuid)->second.s_unique_id;
auto temp_numb_nodes = allSystemNodes.count(temp_primary_unique_id);
temp_numb_nodes = allSystemNodes.count(temp_primary_unique_id);

ss << __PRETTY_FUNCTION__
<< " | device/node id (cardId) = " << std::to_string(cardId)
Expand Down Expand Up @@ -892,12 +949,46 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) {
LOG_DEBUG(ss);
while (numb_nodes > 1) {
std::string secNode = "card";
secNode += std::to_string(cardId); // add the primary node id
AddToDeviceList(secNode);
secNode += std::to_string(cardId); // maps the primary node card to
// secondary - allows get/sets
auto it = allSystemNodes.lower_bound(device_uuid);
auto it_end = allSystemNodes.upper_bound(device_uuid);
if (numb_nodes == temp_numb_nodes) {
auto removalNodeId = it->second.s_node_id;
auto removalGpuId = it->second.s_gpu_id;
auto removalUniqueId = it->second.s_unique_id;
auto removalLocId = it->second.s_location_id;
auto nodesErased = 1;
primary_location_id = removalLocId;
allSystemNodes.erase(it++);
ss << __PRETTY_FUNCTION__
<< "\nPRIMARY --> num_nodes == temp_numb_nodes; ERASING "
<< std::to_string(nodesErased) << " node -> [node_id = "
<< std::to_string(removalNodeId)
<< "; gpu_id = " << std::to_string(removalGpuId)
<< "; unique_id = " << std::to_string(removalUniqueId)
<< "; location_id = " << std::to_string(removalLocId)
<< "]";
LOG_DEBUG(ss);
}
if (it == it_end) {
break;
}
auto myBdfId = it->second.s_location_id;
AddToDeviceList(secNode, myBdfId);
ss << __PRETTY_FUNCTION__
<< "\nSECONDARY --> After adding new node; ERASING -> [node_id = "
<< std::to_string(it->second.s_node_id)
<< "; gpu_id = " << std::to_string(it->second.s_gpu_id)
<< "; unique_id = " << std::to_string(it->second.s_unique_id)
<< "; location_id = " << std::to_string(it->second.s_location_id)
<< "]";
LOG_DEBUG(ss);
allSystemNodes.erase(it++);
numb_nodes--;
cardAdded++;
}
// remove already added nodes associated with current card
// remove any remaining nodes associated with current card
auto erasedNodes = allSystemNodes.erase(primary_unique_id);
ss << __PRETTY_FUNCTION__ << " | After finding primary_unique_id = "
<< std::to_string(primary_unique_id) << " erased "
Expand Down
Loading

0 comments on commit 6f1afd2

Please sign in to comment.