From 8d2c8b841c5f302e3766129010d62121a1f4e203 Mon Sep 17 00:00:00 2001 From: Aleksey Date: Thu, 14 Oct 2021 16:15:32 +0300 Subject: [PATCH] SWDEV-294351 Fixed zero bandwith for hsa-api and hip-api traces Hsa ops traces correlation id is number of hsa async-copy called. Hsa api does not log any correlation id, but can be calculated implicitly as number of current hsa_amd_memory_async_copy called. Hip ops correlation id index in rec_vals was 7 instead of 8. Hip api and hip ops correlation id were consistent. String manipulation in copy_line calculation in register_copy and register_activity is increasing copy_line length. Replaced string manipulation with class instance and fields modification. Alexey.Akimov@amd.com --- bin/mem_manager.py | 188 +++++++++++++++++++++++++++------------------ bin/tblextr.py | 13 +++- 2 files changed, 121 insertions(+), 80 deletions(-) mode change 100755 => 100644 bin/mem_manager.py diff --git a/bin/mem_manager.py b/bin/mem_manager.py old mode 100755 new mode 100644 index d466c375..344d54cc --- a/bin/mem_manager.py +++ b/bin/mem_manager.py @@ -24,7 +24,7 @@ from sqlitedb import SQLiteDB pinned = ['hipMallocHost', 'hipHostMalloc', 'hipHostAlloc'] -ondevice = ['hipMalloc', 'hipMallocPitch', 'hipMallocArray', 'hipMalloc3DArray'] +ondevice = ['hipMalloc', 'hipMallocPitch', 'hipMallocArray', 'hipMalloc3DArray', 'hsa_amd_memory_pool_allocate'] mm_table_descr = [ ['BeginNs', 'EndNs', 'pid', 'tid', 'Name', 'Direction', 'SrcType', 'DstType', 'Size', 'BW', 'Async'], @@ -37,6 +37,33 @@ def fatal(msg): DELIM = ',' +class MemManagerRecord: + def __init__(self): + self.start_time = 0 + self.end_time = 0 + self.pid = 0 + self.tid = 0 + self.event = '' + self.direction = '' + self.srcptr_type = '' + self.dstptr_type = '' + self.size = 0 + self.bandwidth = 0 + self.is_async = False + + def str(self): + return str(self.start_time) + DELIM \ + + str(self.end_time) + DELIM \ + + str(self.pid) + DELIM \ + + str(self.tid) + DELIM \ + + self.event + DELIM \ + + 'Direction=' + self.direction + DELIM \ + + 'SrcType=' + self.srcptr_type + DELIM \ + + 'DstType=' + self.dstptr_type + DELIM \ + + 'Size=' + str(self.size) + DELIM \ + + 'BW=' + str(self.bandwidth) + DELIM \ + + 'Async=' + str(self.is_async) + # Mem copy manager class class MemManager: @@ -69,9 +96,12 @@ def parse_hsa_handles(self, infile): # register alloc and memcpy API calls # ['BeginNs', 'EndNs', 'pid', 'tid', 'Name', 'args', 'Index', 'Data'], - def register_api(self, rec_vals): + def register_api(self, rec_vals, copy_index): res = '' record_name = rec_vals[4] # 'Name' + if record_name == "hipMemcpyToArray": + return res + record_args = rec_vals[5] # 'args' malloc_ptrn = re.compile(r'hip.*Malloc|hsa_amd_memory_pool_allocate') mcopy_ptrn = re.compile(r'hipMemcpy|hsa_amd_memory_async_copy') @@ -79,84 +109,69 @@ def register_api(self, rec_vals): if malloc_ptrn.match(record_name): self.add_allocation(record_name, record_args) elif mcopy_ptrn.match(record_name): - res = self.add_memcpy(rec_vals) + res = self.add_memcpy(rec_vals, copy_index) return res + # register memcpy asynchronous activity + # rec_vals: ['BeginNs', 'EndNs', 'dev-id', 'queue-id', 'Name', 'pid', 'tid', 'Index', 'Data', ... + def register_activity(self, rec_vals): + procid = rec_vals[5] # 'pid' + recordid = rec_vals[8] # 'Index' + tid = rec_vals[6] + async_copy_start_time = rec_vals[0] + async_copy_end_time = rec_vals[1] + + return self.register_copy_impl(async_copy_start_time, async_copy_end_time, procid, tid, recordid) + # register memcpy asynchronous copy # ['BeginNs', 'EndNs', 'Name', 'pid', 'tid', 'Index', ... def register_copy(self, rec_vals): - data = '' - event = rec_vals[2] # 'Name' procid = rec_vals[3] # 'pid' recordid = rec_vals[5] # 'Index' - size_ptrn = re.compile(DELIM + 'Size=(\d+)' + DELIM) - - # query syncronous memcopy API record - key = (recordid, procid, 0) - if key in self.memcopies: - data = self.memcopies[key] - - # query asyncronous memcopy API record - key = (recordid, procid, 1) - if key in self.memcopies: - if data != '': fatal('register_copy: corrupted record sync/async') - - async_copy_start_time = rec_vals[0] - async_copy_end_time = rec_vals[1] - - duration = int(async_copy_end_time) - int(async_copy_start_time) - size = 0 - m = size_ptrn.search(self.memcopies[key]) - if m: - size = m.group(1) - bandwidth = round(float(size) * 1000 / duration, 2) - - tid = rec_vals[4] - copy_line_header = str(async_copy_start_time) + DELIM + str(async_copy_end_time) + DELIM + str(procid) + DELIM + str(tid) - copy_line_footer = 'BW=' + str(bandwidth) + DELIM + 'Async=' + str(1) - data = copy_line_header + self.memcopies[key] + copy_line_footer - self.memcopies[key] = data + async_copy_start_time = rec_vals[0] + async_copy_end_time = rec_vals[1] + tid = rec_vals[4] - return data - - # register memcpy asynchronous activity - # rec_vals: ['BeginNs', 'EndNs', 'dev-id', 'queue-id', 'Name', 'pid', 'tid', 'Index', 'Data', ... - def register_activity(self, rec_vals): - data = '' - event = rec_vals[4] # 'Name' - procid = rec_vals[5] # 'pid' - recordid = rec_vals[7] # 'Index' - size_ptrn = re.compile(DELIM + 'Size=(\d+)' + DELIM) + return self.register_copy_impl(async_copy_start_time, async_copy_end_time, procid, tid, recordid) + def register_copy_impl(self, start_time, end_time, procid, tid, recordid): + data = MemManagerRecord() # query syncronous memcopy API record - key = (recordid, procid, 0) - if key in self.memcopies: - data = self.memcopies[key] + sync_key = (recordid, procid, 0) + async_key = (recordid, procid, 1) - # query asyncronous memcopy API record - key = (recordid, procid, 1) - if key in self.memcopies: - if data != '': fatal('register_activity: corrupted record sync/async') - - async_copy_start_time = rec_vals[0] - async_copy_end_time = rec_vals[1] + if sync_key in self.memcopies and async_key in self.memcopies: + fatal('register_copy_impl: corrupted record sync/async') - duration = int(async_copy_end_time) - int(async_copy_start_time) - size = 0 - m = size_ptrn.search(self.memcopies[key]) - if m: - size = m.group(1) - bandwidth = round(float(size) * 1000 / duration, 2) + if sync_key in self.memcopies: + return self.memcopies[sync_key].str() - tid = rec_vals[6] - copy_line_header = str(async_copy_start_time) + DELIM + str(async_copy_end_time) + DELIM + str(procid) + DELIM + str(tid) - copy_line_footer = 'BW=' + str(bandwidth) + DELIM + 'Async=' + str(1) - data = copy_line_header + self.memcopies[key] + copy_line_footer - self.memcopies[key] = data - - return data + # query asyncronous memcopy API record + if not async_key in self.memcopies: + # is this a valid case? mb fatal? + # writeImage is not added through hipMemcpyArray + # fatal("async_key not found") + return '' + + new_data = self.memcopies[async_key] + new_data.start_time = start_time + new_data.end_time = end_time + + duration = int(end_time) - int(start_time) + # hsa api method "record" can have duration equal to 0 + # if duration == 0: + # fatal('register_copy_impl: zero duration') + + new_data.bandwidth = round(float(new_data.size) * 1000 / duration, 2) if duration != 0 else 0 + # if (new_data.bandwidth == 0): + # fatal("register_copy_impl: zero bandwith") + new_data.tid = tid + new_data.is_async = True + + self.memcopies[async_key] = new_data + return new_data.str() # add allocation to map def add_allocation(self, event, args): @@ -221,8 +236,8 @@ def get_ptr_type(self, ptr): return addr_type # add memcpy to map - def add_memcpy(self, recvals): - recordid = recvals[6] #same as corrid + def add_memcpy(self, recvals, copy_index): + recordid = copy_index #recvals[6] #same as corrid event = recvals[4] start_time = recvals[0] # sync time stamp end_time = recvals[1] # sync time stamp @@ -239,6 +254,9 @@ def add_memcpy(self, recvals): # hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, # size_t count, hipMemcpyKind kind); hip_memcpy_ptrn3 = re.compile(r'\(\s*dst\((.*)\) .* src\((.*)\) count\((\d+)\).*\)') + # hipMemcpyToSymbol(const void* symbolName, const void* src, size_t sizeBytes, + # size_t offset = 0, hipMemcpyKind kind) + hip_memcpy_ptrn4 = re.compile(r'\(\s*symbol\((.*)\) src\((.*)\) sizeBytes\((\d+)\).*\)') # memcopy with kind argument hip_memcpy_ptrn_kind = re.compile(r'.* kind\((\d+)\)\s*.*') #hsa_amd_memory_async_copy(void* dst, hsa_agent_t dst_agent, const void* src, @@ -257,13 +275,14 @@ def add_memcpy(self, recvals): # aysnc memcopy async_event_ptrn = re.compile(r'Async|async') m_basic_hip = hip_memcpy_ptrn.match(args) + m_basic_hsa3 = hip_memcpy_ptrn4.match(args) m_basic_hsa_prev = hsa_memcpy_ptrn_prev.match(args) m_basic_hsa = hsa_memcpy_ptrn.match(args) m_basic_hsa2 = hsa_memcpy_ptrn2.match(args) is_hip = True if not (m_basic_hsa_prev or m_basic_hsa or m_basic_hsa2) else False m_2d = hip_memcpy_ptrn2.match(args) m_array = hip_memcpy_ptrn3.match(args) - is_async = 1 if async_event_ptrn.search(event) else 0 + is_async = True if async_event_ptrn.search(event) else False async_copy_start_time = -1 async_copy_end_time = -1 copy_line = '' @@ -299,6 +318,7 @@ def add_memcpy(self, recvals): srcptr_type = self.get_ptr_type(src_agent_ptr) size = int(m_basic_hsa_prev.group(5)) condition_matched = True + if m_basic_hsa: dstptr = m_basic_hsa.group(1) dst_agent_ptr = m_basic_hsa.group(2) @@ -322,6 +342,14 @@ def add_memcpy(self, recvals): size = x*y*z condition_matched = True + if m_basic_hsa3: + dstptr = m_basic_hsa3.group(1) + dstptr_type = self.get_ptr_type(dstptr) + srcptr = m_basic_hsa3.group(2) + srcptr_type = self.get_ptr_type(srcptr) + size = int(m_basic_hsa3.group(3)) + condition_matched = True + if m_array: dstptr = m_array.group(1) dstptr_type = self.get_ptr_type(dstptr) @@ -370,15 +398,22 @@ def add_memcpy(self, recvals): if self.hsa_agent_types[dst_agent_ptr] == 1: direction += 'D' elif self.hsa_agent_types[dst_agent_ptr] == 0: direction += 'H' - copy_line_header = '' - copy_line_footer = '' - copy_line_header = str(start_time) + DELIM + str(end_time) + DELIM + str(pid) + DELIM + str(tid) - copy_line_footer = "BW=" + str(bandwidth) + DELIM + 'Async=' + str(is_async) + mem_entry = MemManagerRecord() - copy_line = copy_line_header + DELIM + event + DELIM + 'Direction=' + direction + DELIM + 'SrcType=' + srcptr_type + DELIM + 'DstType=' + dstptr_type + DELIM + "Size=" + str(size) + DELIM + copy_line_footer + mem_entry.start_time = start_time + mem_entry.end_time = end_time + mem_entry.pid = pid + mem_entry.tid = tid + mem_entry.event = event + mem_entry.direction = direction + mem_entry.srcptr_type = srcptr_type + mem_entry.dstptr_type = dstptr_type + mem_entry.size = size + mem_entry.bandwidth = bandwidth + mem_entry.is_async = is_async - self.memcopies[(recordid, procid, is_async)] = copy_line - return copy_line; + self.memcopies[(recordid, procid, is_async)] = mem_entry + return mem_entry.str(); def dump_data(self, table_name, file_name): # To create memcopy info table in DB @@ -388,7 +423,7 @@ def dump_data(self, table_name, file_name): fld_ptrn = re.compile(r'(.*)=(.*)') for (key, record) in self.memcopies.items(): rec_vals_array = [] - for rec in record.split(DELIM): + for rec in record.str().split(DELIM): fld_ptrnm = fld_ptrn.match(rec) if fld_ptrnm: rec_vals_array.append(fld_ptrnm.group(2)) @@ -398,3 +433,4 @@ def dump_data(self, table_name, file_name): # To dump the memcopy info table as CSV self.db.dump_csv(table_name, file_name) + diff --git a/bin/tblextr.py b/bin/tblextr.py index deafb199..418e665e 100755 --- a/bin/tblextr.py +++ b/bin/tblextr.py @@ -391,7 +391,6 @@ def fill_api_db(table_name, db, indir, api_name, api_pid, dep_pid, dep_list, dep line_index += 1 record = line[:-1] - corr_id = 0 m = ptrn_corr_id.search(record) if m: @@ -478,6 +477,7 @@ def fill_api_db(table_name, db, indir, api_name, api_pid, dep_pid, dep_list, dep # asyncronous opeartion API found op_found = 0 mcopy_found = 0 + hsa_mcopy_found = 0 # extract kernel name string (kernel_str, kernel_found) = get_field(record_args, 'kernel') @@ -496,10 +496,10 @@ def fill_api_db(table_name, db, indir, api_name, api_pid, dep_pid, dep_list, dep if hsa_mcopy_ptrn.match(record_name): mcopy_found = 1 op_found = 1 + hsa_mcopy_found = 1 stream_id = thread_id hsa_patch_data[(copy_index, proc_id)] = thread_id - copy_index += 1 if op_found: ops_patch_data[(corr_id, proc_id)] = (thread_id, stream_id, kernel_str) @@ -521,7 +521,12 @@ def fill_api_db(table_name, db, indir, api_name, api_pid, dep_pid, dep_list, dep if expl_id: dep_str['id'].append(corr_id) # memcopy registering - api_data = memory_manager.register_api(rec_vals) if mcopy_data_enabled else '' + if hsa_mcopy_found: + api_data = memory_manager.register_api(rec_vals, copy_index) if mcopy_data_enabled else '' + copy_index += 1 + else: + api_data = memory_manager.register_api(rec_vals, corr_id) if mcopy_data_enabled else '' + rec_vals.append(api_data) # setting section and lane @@ -647,7 +652,7 @@ def fill_ops_db(kernel_table_name, mcopy_table_name, db, indir): name = m.group(1) corr_id = int(m.group(2)) proc_id = int(m.group(3)) - + # checking name for memcopy pattern is_barrier = 0 if ptrn_mcopy.search(name):