Skip to content

Commit

Permalink
[PROTON] Roctracer: convert agent id to gpu id for gpu ops (triton-la…
Browse files Browse the repository at this point in the history
…ng#4090)

Roctracer reports (global) agent ids for the location of async ops, e.g.
kernels and copies.
The profiler would be better suited with gpu indexes (zero based).

Created a mapping function to apply to values stored in
KernelMetric::DeviceId.

Caveat: if devices are hidden using HIP_VISIBLE_DEVICES then the hip
device id, e.g. via hipGetDevice()/hipSetDevice(), will not match the
reported unfiltered id. Additional support in hip will be needed to map
through the filtering correctly.

---------

Co-authored-by: Keren Zhou <robinho364@gmail.com>
  • Loading branch information
mwootton and Jokeren committed Jun 7, 2024
1 parent 107fed4 commit 60613fb
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 1 deletion.
2 changes: 2 additions & 0 deletions third_party/proton/csrc/include/Driver/GPU/HipApi.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ template <bool CheckSuccess>
hipError_t deviceGetAttribute(int *value, hipDeviceAttribute_t attribute,
int deviceId);

template <bool CheckSuccess> hipError_t getDeviceCount(int *count);

Device getDevice(uint64_t index);

const char *getKernelNameRef(const hipFunction_t f);
Expand Down
23 changes: 23 additions & 0 deletions third_party/proton/csrc/include/Driver/GPU/HsaApi.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#ifndef PROTON_DRIVER_GPU_HSA_H_
#define PROTON_DRIVER_GPU_HSA_H_

#include "Driver/Device.h"
#include "hsa/hsa_ext_amd.h"

namespace proton {

namespace hsa {

template <bool CheckSuccess>
hsa_status_t agentGetInfo(hsa_agent_t agent, hsa_agent_info_t attribute,
void *value);

hsa_status_t iterateAgents(hsa_status_t (*callback)(hsa_agent_t agent,
void *data),
void *data);

} // namespace hsa

} // namespace proton

#endif // PROTON_DRIVER_GPU_HSA_H_
2 changes: 2 additions & 0 deletions third_party/proton/csrc/lib/Driver/GPU/HipApi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ DEFINE_DISPATCH(ExternLibHip, deviceSynchronize, hipDeviceSynchronize)
DEFINE_DISPATCH(ExternLibHip, deviceGetAttribute, hipDeviceGetAttribute, int *,
hipDeviceAttribute_t, int);

DEFINE_DISPATCH(ExternLibHip, getDeviceCount, hipGetDeviceCount, int *);

Device getDevice(uint64_t index) {
int clockRate;
(void)hip::deviceGetAttribute<true>(&clockRate, hipDeviceAttributeClockRate,
Expand Down
35 changes: 35 additions & 0 deletions third_party/proton/csrc/lib/Driver/GPU/HsaApi.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#include "Driver/GPU/HsaApi.h"
#include "Driver/Dispatch.h"

namespace proton {

namespace hsa {

struct ExternLibHsa : public ExternLibBase {
using RetType = hsa_status_t;
static constexpr const char *name = "libhsa-runtime64.so";
static constexpr RetType success = HSA_STATUS_SUCCESS;
static void *lib;
};

void *ExternLibHsa::lib = nullptr;

DEFINE_DISPATCH(ExternLibHsa, agentGetInfo, hsa_agent_get_info, hsa_agent_t,
hsa_agent_info_t, void *);

hsa_status_t iterateAgents(hsa_status_t (*callback)(hsa_agent_t agent,
void *data),
void *data) {
typedef hsa_status_t (*hsa_iterate_agents_t)(
hsa_status_t (*)(hsa_agent_t, void *), void *data);
static hsa_iterate_agents_t func = nullptr;
Dispatch<ExternLibHsa>::init(ExternLibHsa::name, &ExternLibHsa::lib);
if (func == nullptr)
func = reinterpret_cast<hsa_iterate_agents_t>(
dlsym(ExternLibHsa::lib, "hsa_iterate_agents"));
return (func ? func(callback, data) : HSA_STATUS_ERROR_FATAL);
}

} // namespace hsa

} // namespace proton
32 changes: 31 additions & 1 deletion third_party/proton/csrc/lib/Profiler/RoctracerProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "Context/Context.h"
#include "Data/Metric.h"
#include "Driver/GPU/HipApi.h"
#include "Driver/GPU/HsaApi.h"
#include "Driver/GPU/RoctracerApi.h"

#include "hip/amd_detail/hip_runtime_prof.h"
Expand All @@ -26,6 +27,34 @@ thread_local GPUProfiler<RoctracerProfiler>::ProfilerState

namespace {

// Node to device id mapping
int deviceOffset = 0x7fffffff;

void createDeviceMap() {
int dc = 0;
auto ret = hip::getDeviceCount<true>(&dc);
hsa::iterateAgents(
[](hsa_agent_t agent, void *data) {
auto &deviceOffset = *static_cast<int *>(data);
int nodeId;
hsa::agentGetInfo<true>(
agent,
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_DRIVER_NODE_ID),
&nodeId);
int deviceType;
hsa::agentGetInfo<true>(
agent, static_cast<hsa_agent_info_t>(HSA_AGENT_INFO_DEVICE),
&deviceType);
if ((nodeId < deviceOffset) && (deviceType == HSA_DEVICE_TYPE_GPU))
deviceOffset = nodeId;

return HSA_STATUS_SUCCESS;
},
&deviceOffset);
};

int mapDeviceId(int id) { return id - deviceOffset; };

std::shared_ptr<Metric>
convertActivityToMetric(const roctracer_record_t *activity) {
std::shared_ptr<Metric> metric;
Expand All @@ -34,7 +63,7 @@ convertActivityToMetric(const roctracer_record_t *activity) {
metric = std::make_shared<KernelMetric>(
static_cast<uint64_t>(activity->begin_ns),
static_cast<uint64_t>(activity->end_ns), 1,
static_cast<uint64_t>(activity->device_id),
static_cast<uint64_t>(mapDeviceId(activity->device_id)),
static_cast<uint64_t>(DeviceType::HIP));
break;
}
Expand Down Expand Up @@ -306,6 +335,7 @@ void RoctracerProfiler::RoctracerProfilerPimpl::doStop() {

RoctracerProfiler::RoctracerProfiler() {
pImpl = std::make_unique<RoctracerProfilerPimpl>(*this);
createDeviceMap();
}

RoctracerProfiler::~RoctracerProfiler() = default;
Expand Down

0 comments on commit 60613fb

Please sign in to comment.