In [1]:
import json
import pandas as pd

def read_data(filename):
  with open(filename, 'r') as f:
    data = json.load(f)
    external_id_map = {}
    for item in data['traceEvents']:
      if 'args' in item:
        if 'External id' in item['args']:
          if item['args']['External id'] not in external_id_map:
            external_id_map[item['args']['External id']] = {}
          ref = external_id_map[item['args']['External id']]
          if item['name'] not in ref:
            ref[item['name']] = {'type': item['cat'], 'dur': item['dur']}
    return external_id_map

def calculation(external_id_map, filename):
  headers = ['ext_id', 'cpu_op', 'kernel', 'cuda_runtime', 'gpu_memcpy', 'gpu_memset', 'cpu_op_dur', 'kernel_dur', 'cuda_runtime_dur', 'gpu_memcpy_dur', 'gpu_memset_dur']
  header_idx = {
    'cpu_op': 1,
    'kernel': 2,
    'cuda_runtime': 3,
    'gpu_memcpy': 4,
    'gpu_memset': 5
  }
  rows = []
  for ext_id in external_id_map:
    row = [ext_id] + [0] * (len(headers) - 1)
    for name in external_id_map[ext_id].keys():
      ref = external_id_map[ext_id][name]
      row[header_idx[ref['type']]] = name
      row[header_idx[ref['type']] + len(header_idx)] = ref['dur']
    rows.append(row)

  df = pd.DataFrame(rows, columns=headers)
  # 1. output the I/O time
  df.pipe(lambda x: x[x['cpu_op'] == 'aten::copy_']).to_csv(f'{filename}.io_time.csv', index=False)
  # 2. output the calculation time, take mul as an example
  df.pipe(lambda x: x[x['cpu_op'] == 'aten::mul']).to_csv(f'{filename}.mul_time.csv', index=False)

In [2]:
filename = "/fs/resource/ywp/policy_logs/alter.join"
data = read_data(filename)
print('finish reading data')

calculation(data, filename)

finish reading data


In [3]:
filename = "/fs/resource/ywp/policy_logs/default.join"
data = read_data(filename)
print("finish reading data")

calculation(data, filename)

finish reading data


In [None]:
filename = '8B_cpu.log'
data = read_data('8B_cpu.log')
print('finish reading data')
calculation(data, filename)

In [None]:
filename = 'opt_8B.log'
data = read_data(filename)
print('finish reading data')
calculation(data, filename)

In [None]:
filename = 'opt_8B_cpu.log'
data = read_data(filename)
print('finish reading data')
calculation(data, filename)