diff --git a/src/api/rocprofiler_singleton.h b/src/api/rocprofiler_singleton.h index d992515b..f0f09628 100644 --- a/src/api/rocprofiler_singleton.h +++ b/src/api/rocprofiler_singleton.h @@ -85,6 +85,9 @@ class ROCProfiler_Singleton { uint64_t GetUniqueRecordId(); uint64_t GetUniqueKernelDispatchId(); + std::mutex signals_timestamps_map_lock; + std::map signals_timestamps; + private: rocprofiler_session_id_t current_session_id_{0}; std::mutex session_map_lock_; diff --git a/src/core/hsa/hsa_support.cpp b/src/core/hsa/hsa_support.cpp index 6cb5d21a..471a2943 100644 --- a/src/core/hsa/hsa_support.cpp +++ b/src/core/hsa/hsa_support.cpp @@ -496,6 +496,25 @@ hsa_status_t ExecutableDestroyIntercept(hsa_executable_t executable) { return rocprofiler::hsa_support::GetCoreApiTable().hsa_executable_destroy_fn(executable); } +hsa_status_t GetDispatchTimestamps(hsa_agent_t agent, hsa_signal_t signal, + hsa_amd_profiling_dispatch_time_t* time) { + hsa_status_t status = HSA_STATUS_SUCCESS; + { + std::lock_guard lock(rocprofiler::GetROCProfilerSingleton()->signals_timestamps_map_lock); + if (rocprofiler::GetROCProfilerSingleton()->signals_timestamps.find(signal.handle) != + rocprofiler::GetROCProfilerSingleton()->signals_timestamps.end()) { + hsa_signal_t new_signal = rocprofiler::GetROCProfilerSingleton()->signals_timestamps[signal.handle]; + rocprofiler::GetROCProfilerSingleton()->signals_timestamps.erase(signal.handle); + status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_profiling_get_dispatch_time_fn( + agent, new_signal, time); + } else { + status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_profiling_get_dispatch_time_fn( + agent, signal, time); + } + return status; + } +} + std::atomic profiling_async_copy_enable{false}; hsa_status_t ProfilingAsyncCopyEnableIntercept(bool enable) { @@ -953,6 +972,9 @@ void Initialize(HsaApiTable* table) { table->core_->hsa_executable_freeze_fn = roctracer::hsa_support::ExecutableFreezeIntercept; table->core_->hsa_executable_destroy_fn = roctracer::hsa_support::ExecutableDestroyIntercept; + table->amd_ext_->hsa_amd_profiling_get_dispatch_time_fn = + roctracer::hsa_support::GetDispatchTimestamps; + // Install the HSA_API wrappers roctracer::hsa_support::detail::InstallCoreApiWrappers(table->core_); roctracer::hsa_support::detail::InstallAmdExtWrappers(table->amd_ext_); diff --git a/src/core/hsa/hsa_support.h b/src/core/hsa/hsa_support.h index b209c04a..79773b9b 100644 --- a/src/core/hsa/hsa_support.h +++ b/src/core/hsa/hsa_support.h @@ -28,6 +28,7 @@ #include #include +#include #include #include "hsa_common.h" diff --git a/src/core/hsa/queues/queue.cpp b/src/core/hsa/queues/queue.cpp index c173dcab..946d9872 100644 --- a/src/core/hsa/queues/queue.cpp +++ b/src/core/hsa/queues/queue.cpp @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -410,7 +411,7 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) { return true; hsa_amd_profiling_dispatch_time_t time; hsa_support::GetAmdExtTable().hsa_amd_profiling_get_dispatch_time_fn( - queue_info_session->agent, pending->original_signal, &time); + queue_info_session->agent, pending->new_signal, &time); uint32_t record_count = 1; bool is_individual_xcc_mode = false; uint32_t xcc_count = queue_info_session->xcc_count; @@ -481,8 +482,8 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) { } delete pending->context; } - if (pending->new_signal.handle) - hsa_support::GetCoreApiTable().hsa_signal_destroy_fn(pending->new_signal); + // if (pending->new_signal.handle) + // hsasupport_singleton.GetCoreApiTable().hsa_signal_destroy_fn(pending->new_signal); if (queue_info_session->interrupt_signal.handle) hsa_support::GetCoreApiTable().hsa_signal_destroy_fn(queue_info_session->interrupt_signal); } @@ -553,8 +554,11 @@ bool AsyncSignalHandlerATT(hsa_signal_value_t /* signal */, void* data) { void CreateBarrierPacket(const hsa_signal_t& packet_completion_signal, std::vector* transformed_packets) { - hsa_barrier_and_packet_t barrier{0}; - barrier.header = HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE; + hsa_barrier_and_packet_t barrier{}; + barrier.header = (HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE) | + (1 << HSA_PACKET_HEADER_BARRIER) | + (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) | + (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE); barrier.dep_signal[0] = packet_completion_signal; void* barrier_ptr = &barrier; transformed_packets->emplace_back(*reinterpret_cast(barrier_ptr)); @@ -855,6 +859,7 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt uint64_t correlation_id = dispatch_packet.reserved2; CreateSignal(HSA_AMD_SIGNAL_AMD_GPU_ONLY, &packet.completion_signal); + // Adding the dispatch packet newly created signal to the pending signals // list to be processed by the signal interrupt rocprofiler_kernel_properties_t kernel_properties = @@ -865,28 +870,36 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt record_id); if (session_data_count > 0 && profile.second) { session->GetProfiler()->AddPendingSignals( - writer_id, record_id, original_packet.completion_signal, - dispatch_packet.completion_signal, session_id, buffer_id, profile.first, - session_data_count, profile.second, kernel_properties, (uint32_t)syscall(__NR_gettid), - user_pkt_index, correlation_id); + writer_id, record_id, original_packet.completion_signal, packet.completion_signal, + session_id, buffer_id, profile.first, session_data_count, profile.second, + kernel_properties, (uint32_t)syscall(__NR_gettid), user_pkt_index, correlation_id); } else { session->GetProfiler()->AddPendingSignals( - writer_id, record_id, original_packet.completion_signal, - dispatch_packet.completion_signal, session_id, buffer_id, nullptr, session_data_count, - nullptr, kernel_properties, (uint32_t)syscall(__NR_gettid), user_pkt_index, - correlation_id); + writer_id, record_id, original_packet.completion_signal, packet.completion_signal, + session_id, buffer_id, nullptr, session_data_count, nullptr, kernel_properties, + (uint32_t)syscall(__NR_gettid), user_pkt_index, correlation_id); } } // Make a copy of the original packet, adding its signal to a barrier // packet and create a new signal for it to get timestamps if (original_packet.completion_signal.handle) { - hsa_barrier_and_packet_t barrier{0}; - barrier.header = HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE; + hsa_barrier_and_packet_t barrier{}; + barrier.header = (HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE) | + (1 << HSA_PACKET_HEADER_BARRIER) | + (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) | + (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE); Packet::packet_t* __attribute__((__may_alias__)) pkt = (reinterpret_cast(&barrier)); transformed_packets.emplace_back(*pkt).completion_signal = original_packet.completion_signal; + + { + std::lock_guard lock( + rocprofiler::GetROCProfilerSingleton()->signals_timestamps_map_lock); + rocprofiler::GetROCProfilerSingleton()->signals_timestamps[original_packet.completion_signal.handle] = + packet.completion_signal; + } } hsa_signal_t interrupt_signal{}; @@ -907,8 +920,11 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt // Added Interrupt Signal with barrier and provided handler for it CreateBarrierPacket(interrupt_signal, &transformed_packets); } else { - hsa_barrier_and_packet_t barrier{0}; - barrier.header = HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE; + hsa_barrier_and_packet_t barrier{}; + barrier.header = (HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE) | + (1 << HSA_PACKET_HEADER_BARRIER) | + (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) | + (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE); barrier.completion_signal = interrupt_signal; Packet::packet_t* __attribute__((__may_alias__)) pkt = (reinterpret_cast(&barrier)); @@ -989,6 +1005,7 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt auto& dispatch_packet = reinterpret_cast(packet); CreateSignal(HSA_AMD_SIGNAL_AMD_GPU_ONLY, &packet.completion_signal); + // Adding the dispatch packet newly created signal to the pending signals // list to be processed by the signal interrupt rocprofiler_kernel_properties_t kernel_properties = @@ -999,7 +1016,7 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt if (session && profile) { session->GetAttTracer()->AddPendingSignals( writer_id, record_id, original_packet.completion_signal, - dispatch_packet.completion_signal, session_id_snapshot, buffer_id, profile, + packet.completion_signal, session_id_snapshot, buffer_id, profile, kernel_properties, (uint32_t)syscall(__NR_gettid), user_pkt_index); } else { session->GetAttTracer()->AddPendingSignals( @@ -1012,6 +1029,7 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt if (original_packet.completion_signal.handle) { hsa_barrier_and_packet_t barrier{0}; barrier.header = HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE; + barrier.dep_signal[0] = packet.completion_signal; Packet::packet_t* __attribute__((__may_alias__)) pkt = (reinterpret_cast(&barrier)); transformed_packets.emplace_back(*pkt).completion_signal = diff --git a/tests-v2/featuretests/profiler/profiler_gtest.cpp b/tests-v2/featuretests/profiler/profiler_gtest.cpp index 3b9bf01e..80f83af1 100644 --- a/tests-v2/featuretests/profiler/profiler_gtest.cpp +++ b/tests-v2/featuretests/profiler/profiler_gtest.cpp @@ -274,7 +274,7 @@ TEST_F(HelloWorldTest, WhenRunningProfilerWithAppThenEndTimeIsGreaterThenStartTi for (auto& itr : current_kernel_info) { if (!(itr.begin_time).empty() && !(itr.end_time).empty()) { - EXPECT_GT(itr.end_time, itr.begin_time); + EXPECT_GT(get_timestamp_value(itr.end_time), get_timestamp_value(itr.begin_time)); } } } @@ -340,7 +340,7 @@ TEST_F(VectorAddTest, WhenRunningProfilerWithAppThenEndTimeIsGreaterThenStartTim for (auto& itr : current_kernel_info) { if (!(itr.begin_time).empty() && !(itr.end_time).empty()) { - EXPECT_GT(itr.end_time, itr.begin_time); + EXPECT_GT(get_timestamp_value(itr.end_time), get_timestamp_value(itr.begin_time)); } } } diff --git a/tests-v2/featuretests/tracer/tracer_gtest.cpp b/tests-v2/featuretests/tracer/tracer_gtest.cpp index d8c4855f..d0b121b0 100644 --- a/tests-v2/featuretests/tracer/tracer_gtest.cpp +++ b/tests-v2/featuretests/tracer/tracer_gtest.cpp @@ -245,6 +245,22 @@ TEST_F(HelloWorldTest, WhenRunningTracerWithAppThenKernelDurationShouldBePositiv EXPECT_GT(current_kernel_info.size(), 0); } +// Test:4 Compares end-time is greater than start-time in current +// tracer output +TEST_F(HelloWorldTest, WhenRunningTracerWithAppThenEndTimeIsGreaterThenStartTime) { + // kernel info in current profiler run + std::vector current_kernel_info; + + GetKernelInfoForRunningApplication(¤t_kernel_info); + ASSERT_TRUE(current_kernel_info.size()); + + for (auto& itr : current_kernel_info) { + if (!(itr.begin_time).empty() && !(itr.end_time).empty()) { + EXPECT_GT(get_timestamp_value(itr.end_time), get_timestamp_value(itr.begin_time)); + } + } +} + /* * ################################################### diff --git a/tests-v2/featuretests/utils/test_utils.cpp b/tests-v2/featuretests/utils/test_utils.cpp index bcbcf002..4e5813ed 100644 --- a/tests-v2/featuretests/utils/test_utils.cpp +++ b/tests-v2/featuretests/utils/test_utils.cpp @@ -20,6 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "test_utils.h" +#include namespace rocprofiler { namespace tests { @@ -130,10 +131,6 @@ void tokenize_tracer_output(std::string line, tracer_kernel_info_t& kinfo) { std::getline(tokenStream, token, ','); kinfo.function = token; std::getline(tokenStream, token, ','); - kinfo.operation = token; - std::getline(tokenStream, token, ','); - kinfo.kernel_name = token; - std::getline(tokenStream, token, ','); kinfo.begin_time = token; std::getline(tokenStream, token, ','); kinfo.end_time = token; @@ -145,6 +142,19 @@ void tokenize_tracer_output(std::string line, tracer_kernel_info_t& kinfo) { kinfo.roxtx_msg = token; } +// get numeric value of timestamp token +uint64_t get_timestamp_value(const std::string& str) { + std::regex pattern("(\\d+)"); + std::smatch match; + + if (regex_search(str, match, pattern)) { + return stoul(match[1]); + } else { + return -1; + } +} + + } // namespace utility } // namespace tests } // namespace rocprofiler diff --git a/tests-v2/featuretests/utils/test_utils.h b/tests-v2/featuretests/utils/test_utils.h index 0ca85cb8..80543ab9 100644 --- a/tests-v2/featuretests/utils/test_utils.h +++ b/tests-v2/featuretests/utils/test_utils.h @@ -27,6 +27,7 @@ THE SOFTWARE. #include // for backtrace #include +#include #include #include #include @@ -86,6 +87,9 @@ void tokenize_profiler_output(std::string line, profiler_kernel_info_t& kinfo); // tokenize tracer output void tokenize_tracer_output(std::string line, tracer_kernel_info_t& kinfo); +// get numeric value of timestamp token +uint64_t get_timestamp_value(const std::string& str); + } // namespace utility } // namespace tests } // namespace rocprofiler @@ -94,6 +98,7 @@ void tokenize_tracer_output(std::string line, tracer_kernel_info_t& kinfo); // path for executable int main(int argc, char** argv); +using rocprofiler::tests::utility::get_timestamp_value; using rocprofiler::tests::utility::GetNumberOfCores; using rocprofiler::tests::utility::GetRunningPath; using rocprofiler::tests::utility::is_installed_path;