Skip to content

Commit

Permalink
SWDEV-380521 : Fixing wrong end Timestamp
Browse files Browse the repository at this point in the history
Change-Id: Ifc781f4c6b831a441a0d80a7fb2a00e02387ba20
  • Loading branch information
bgopesh committed Sep 7, 2023
1 parent 1f9ba32 commit 11afaae
Show file tree
Hide file tree
Showing 8 changed files with 99 additions and 24 deletions.
3 changes: 3 additions & 0 deletions src/api/rocprofiler_singleton.h
Expand Up @@ -85,6 +85,9 @@ class ROCProfiler_Singleton {
uint64_t GetUniqueRecordId();
uint64_t GetUniqueKernelDispatchId();

std::mutex signals_timestamps_map_lock;
std::map<uint64_t, hsa_signal_t> signals_timestamps;

private:
rocprofiler_session_id_t current_session_id_{0};
std::mutex session_map_lock_;
Expand Down
22 changes: 22 additions & 0 deletions src/core/hsa/hsa_support.cpp
Expand Up @@ -496,6 +496,25 @@ hsa_status_t ExecutableDestroyIntercept(hsa_executable_t executable) {
return rocprofiler::hsa_support::GetCoreApiTable().hsa_executable_destroy_fn(executable);
}

hsa_status_t GetDispatchTimestamps(hsa_agent_t agent, hsa_signal_t signal,
hsa_amd_profiling_dispatch_time_t* time) {
hsa_status_t status = HSA_STATUS_SUCCESS;
{
std::lock_guard<std::mutex> lock(rocprofiler::GetROCProfilerSingleton()->signals_timestamps_map_lock);
if (rocprofiler::GetROCProfilerSingleton()->signals_timestamps.find(signal.handle) !=
rocprofiler::GetROCProfilerSingleton()->signals_timestamps.end()) {
hsa_signal_t new_signal = rocprofiler::GetROCProfilerSingleton()->signals_timestamps[signal.handle];
rocprofiler::GetROCProfilerSingleton()->signals_timestamps.erase(signal.handle);
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_profiling_get_dispatch_time_fn(
agent, new_signal, time);
} else {
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_profiling_get_dispatch_time_fn(
agent, signal, time);
}
return status;
}
}

std::atomic<bool> profiling_async_copy_enable{false};

hsa_status_t ProfilingAsyncCopyEnableIntercept(bool enable) {
Expand Down Expand Up @@ -953,6 +972,9 @@ void Initialize(HsaApiTable* table) {
table->core_->hsa_executable_freeze_fn = roctracer::hsa_support::ExecutableFreezeIntercept;
table->core_->hsa_executable_destroy_fn = roctracer::hsa_support::ExecutableDestroyIntercept;

table->amd_ext_->hsa_amd_profiling_get_dispatch_time_fn =
roctracer::hsa_support::GetDispatchTimestamps;

// Install the HSA_API wrappers
roctracer::hsa_support::detail::InstallCoreApiWrappers(table->core_);
roctracer::hsa_support::detail::InstallAmdExtWrappers(table->amd_ext_);
Expand Down
1 change: 1 addition & 0 deletions src/core/hsa/hsa_support.h
Expand Up @@ -28,6 +28,7 @@
#include <hsa/hsa_ven_amd_loader.h>

#include <atomic>
#include <cstdint>
#include <string>

#include "hsa_common.h"
Expand Down
54 changes: 36 additions & 18 deletions src/core/hsa/queues/queue.cpp
Expand Up @@ -22,6 +22,7 @@

#include <atomic>
#include <map>
#include <mutex>
#include <string>
#include <vector>
#include <utility>
Expand Down Expand Up @@ -410,7 +411,7 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) {
return true;
hsa_amd_profiling_dispatch_time_t time;
hsa_support::GetAmdExtTable().hsa_amd_profiling_get_dispatch_time_fn(
queue_info_session->agent, pending->original_signal, &time);
queue_info_session->agent, pending->new_signal, &time);
uint32_t record_count = 1;
bool is_individual_xcc_mode = false;
uint32_t xcc_count = queue_info_session->xcc_count;
Expand Down Expand Up @@ -481,8 +482,8 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) {
}
delete pending->context;
}
if (pending->new_signal.handle)
hsa_support::GetCoreApiTable().hsa_signal_destroy_fn(pending->new_signal);
// if (pending->new_signal.handle)
// hsasupport_singleton.GetCoreApiTable().hsa_signal_destroy_fn(pending->new_signal);
if (queue_info_session->interrupt_signal.handle)
hsa_support::GetCoreApiTable().hsa_signal_destroy_fn(queue_info_session->interrupt_signal);
}
Expand Down Expand Up @@ -553,8 +554,11 @@ bool AsyncSignalHandlerATT(hsa_signal_value_t /* signal */, void* data) {

void CreateBarrierPacket(const hsa_signal_t& packet_completion_signal,
std::vector<Packet::packet_t>* transformed_packets) {
hsa_barrier_and_packet_t barrier{0};
barrier.header = HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE;
hsa_barrier_and_packet_t barrier{};
barrier.header = (HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE) |
(1 << HSA_PACKET_HEADER_BARRIER) |
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
barrier.dep_signal[0] = packet_completion_signal;
void* barrier_ptr = &barrier;
transformed_packets->emplace_back(*reinterpret_cast<Packet::packet_t*>(barrier_ptr));
Expand Down Expand Up @@ -855,6 +859,7 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt
uint64_t correlation_id = dispatch_packet.reserved2;

CreateSignal(HSA_AMD_SIGNAL_AMD_GPU_ONLY, &packet.completion_signal);

// Adding the dispatch packet newly created signal to the pending signals
// list to be processed by the signal interrupt
rocprofiler_kernel_properties_t kernel_properties =
Expand All @@ -865,28 +870,36 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt
record_id);
if (session_data_count > 0 && profile.second) {
session->GetProfiler()->AddPendingSignals(
writer_id, record_id, original_packet.completion_signal,
dispatch_packet.completion_signal, session_id, buffer_id, profile.first,
session_data_count, profile.second, kernel_properties, (uint32_t)syscall(__NR_gettid),
user_pkt_index, correlation_id);
writer_id, record_id, original_packet.completion_signal, packet.completion_signal,
session_id, buffer_id, profile.first, session_data_count, profile.second,
kernel_properties, (uint32_t)syscall(__NR_gettid), user_pkt_index, correlation_id);
} else {
session->GetProfiler()->AddPendingSignals(
writer_id, record_id, original_packet.completion_signal,
dispatch_packet.completion_signal, session_id, buffer_id, nullptr, session_data_count,
nullptr, kernel_properties, (uint32_t)syscall(__NR_gettid), user_pkt_index,
correlation_id);
writer_id, record_id, original_packet.completion_signal, packet.completion_signal,
session_id, buffer_id, nullptr, session_data_count, nullptr, kernel_properties,
(uint32_t)syscall(__NR_gettid), user_pkt_index, correlation_id);
}
}

// Make a copy of the original packet, adding its signal to a barrier
// packet and create a new signal for it to get timestamps
if (original_packet.completion_signal.handle) {
hsa_barrier_and_packet_t barrier{0};
barrier.header = HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE;
hsa_barrier_and_packet_t barrier{};
barrier.header = (HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE) |
(1 << HSA_PACKET_HEADER_BARRIER) |
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
Packet::packet_t* __attribute__((__may_alias__)) pkt =
(reinterpret_cast<Packet::packet_t*>(&barrier));
transformed_packets.emplace_back(*pkt).completion_signal =
original_packet.completion_signal;

{
std::lock_guard<std::mutex> lock(
rocprofiler::GetROCProfilerSingleton()->signals_timestamps_map_lock);
rocprofiler::GetROCProfilerSingleton()->signals_timestamps[original_packet.completion_signal.handle] =
packet.completion_signal;
}
}

hsa_signal_t interrupt_signal{};
Expand All @@ -907,8 +920,11 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt
// Added Interrupt Signal with barrier and provided handler for it
CreateBarrierPacket(interrupt_signal, &transformed_packets);
} else {
hsa_barrier_and_packet_t barrier{0};
barrier.header = HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE;
hsa_barrier_and_packet_t barrier{};
barrier.header = (HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE) |
(1 << HSA_PACKET_HEADER_BARRIER) |
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
barrier.completion_signal = interrupt_signal;
Packet::packet_t* __attribute__((__may_alias__)) pkt =
(reinterpret_cast<Packet::packet_t*>(&barrier));
Expand Down Expand Up @@ -989,6 +1005,7 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt
auto& dispatch_packet = reinterpret_cast<hsa_kernel_dispatch_packet_t&>(packet);

CreateSignal(HSA_AMD_SIGNAL_AMD_GPU_ONLY, &packet.completion_signal);

// Adding the dispatch packet newly created signal to the pending signals
// list to be processed by the signal interrupt
rocprofiler_kernel_properties_t kernel_properties =
Expand All @@ -999,7 +1016,7 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt
if (session && profile) {
session->GetAttTracer()->AddPendingSignals(
writer_id, record_id, original_packet.completion_signal,
dispatch_packet.completion_signal, session_id_snapshot, buffer_id, profile,
packet.completion_signal, session_id_snapshot, buffer_id, profile,
kernel_properties, (uint32_t)syscall(__NR_gettid), user_pkt_index);
} else {
session->GetAttTracer()->AddPendingSignals(
Expand All @@ -1012,6 +1029,7 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt
if (original_packet.completion_signal.handle) {
hsa_barrier_and_packet_t barrier{0};
barrier.header = HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE;
barrier.dep_signal[0] = packet.completion_signal;
Packet::packet_t* __attribute__((__may_alias__)) pkt =
(reinterpret_cast<Packet::packet_t*>(&barrier));
transformed_packets.emplace_back(*pkt).completion_signal =
Expand Down
4 changes: 2 additions & 2 deletions tests-v2/featuretests/profiler/profiler_gtest.cpp
Expand Up @@ -274,7 +274,7 @@ TEST_F(HelloWorldTest, WhenRunningProfilerWithAppThenEndTimeIsGreaterThenStartTi

for (auto& itr : current_kernel_info) {
if (!(itr.begin_time).empty() && !(itr.end_time).empty()) {
EXPECT_GT(itr.end_time, itr.begin_time);
EXPECT_GT(get_timestamp_value(itr.end_time), get_timestamp_value(itr.begin_time));
}
}
}
Expand Down Expand Up @@ -340,7 +340,7 @@ TEST_F(VectorAddTest, WhenRunningProfilerWithAppThenEndTimeIsGreaterThenStartTim

for (auto& itr : current_kernel_info) {
if (!(itr.begin_time).empty() && !(itr.end_time).empty()) {
EXPECT_GT(itr.end_time, itr.begin_time);
EXPECT_GT(get_timestamp_value(itr.end_time), get_timestamp_value(itr.begin_time));
}
}
}
Expand Down
16 changes: 16 additions & 0 deletions tests-v2/featuretests/tracer/tracer_gtest.cpp
Expand Up @@ -245,6 +245,22 @@ TEST_F(HelloWorldTest, WhenRunningTracerWithAppThenKernelDurationShouldBePositiv
EXPECT_GT(current_kernel_info.size(), 0);
}

// Test:4 Compares end-time is greater than start-time in current
// tracer output
TEST_F(HelloWorldTest, WhenRunningTracerWithAppThenEndTimeIsGreaterThenStartTime) {
// kernel info in current profiler run
std::vector<tracer_kernel_info_t> current_kernel_info;

GetKernelInfoForRunningApplication(&current_kernel_info);
ASSERT_TRUE(current_kernel_info.size());

for (auto& itr : current_kernel_info) {
if (!(itr.begin_time).empty() && !(itr.end_time).empty()) {
EXPECT_GT(get_timestamp_value(itr.end_time), get_timestamp_value(itr.begin_time));
}
}
}


/*
* ###################################################
Expand Down
18 changes: 14 additions & 4 deletions tests-v2/featuretests/utils/test_utils.cpp
Expand Up @@ -20,6 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "test_utils.h"
#include <regex>

namespace rocprofiler {
namespace tests {
Expand Down Expand Up @@ -130,10 +131,6 @@ void tokenize_tracer_output(std::string line, tracer_kernel_info_t& kinfo) {
std::getline(tokenStream, token, ',');
kinfo.function = token;
std::getline(tokenStream, token, ',');
kinfo.operation = token;
std::getline(tokenStream, token, ',');
kinfo.kernel_name = token;
std::getline(tokenStream, token, ',');
kinfo.begin_time = token;
std::getline(tokenStream, token, ',');
kinfo.end_time = token;
Expand All @@ -145,6 +142,19 @@ void tokenize_tracer_output(std::string line, tracer_kernel_info_t& kinfo) {
kinfo.roxtx_msg = token;
}

// get numeric value of timestamp token
uint64_t get_timestamp_value(const std::string& str) {
std::regex pattern("(\\d+)");
std::smatch match;

if (regex_search(str, match, pattern)) {
return stoul(match[1]);
} else {
return -1;
}
}


} // namespace utility
} // namespace tests
} // namespace rocprofiler
5 changes: 5 additions & 0 deletions tests-v2/featuretests/utils/test_utils.h
Expand Up @@ -27,6 +27,7 @@ THE SOFTWARE.
#include <execinfo.h> // for backtrace

#include <algorithm>
#include <cstdint>
#include <cstdlib>
#include <fstream>
#include <sstream>
Expand Down Expand Up @@ -86,6 +87,9 @@ void tokenize_profiler_output(std::string line, profiler_kernel_info_t& kinfo);
// tokenize tracer output
void tokenize_tracer_output(std::string line, tracer_kernel_info_t& kinfo);

// get numeric value of timestamp token
uint64_t get_timestamp_value(const std::string& str);

} // namespace utility
} // namespace tests
} // namespace rocprofiler
Expand All @@ -94,6 +98,7 @@ void tokenize_tracer_output(std::string line, tracer_kernel_info_t& kinfo);
// path for executable
int main(int argc, char** argv);

using rocprofiler::tests::utility::get_timestamp_value;
using rocprofiler::tests::utility::GetNumberOfCores;
using rocprofiler::tests::utility::GetRunningPath;
using rocprofiler::tests::utility::is_installed_path;
Expand Down

0 comments on commit 11afaae

Please sign in to comment.