In [2]:
import os
import json


In [4]:
def convert_size(size_in_bytes):
    """Convert bytes to a human-readable format and return size and unit separately."""
    for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
        if size_in_bytes < 1024:
            return round(size_in_bytes, 2), unit
        size_in_bytes /= 1024


In [5]:
file_path = "/home/ajassani/feb12_2025/iteration_6/rank1_trace.json"
file_size = os.path.getsize(file_path)  # Size in bytes
num, unit = convert_size(file_size)
print(f"File size: {num} {unit}")


File size: 350.5 MB


In [8]:
# Efficiently load large JSON
with open(file_path, "rb") as f:
    data = json.loads(f.read())

# Check structure
events = data["traceEvents"]
print(f"Total Entries: {len(events)}")
print("Sample Entry:", events[0] if isinstance(events, list) else "Not a list")


Total Entries: 1310398
Sample Entry: {'ph': 'X', 'cat': 'cpu_op', 'name': 'autograd::engine::evaluate_function: NllLossBackward0', 'pid': 153, 'tid': 930, 'ts': 1425746284084.65, 'dur': 410.825, 'args': {'External id': 8193, 'Record function id': 0, 'Sequence number': 1663, 'Fwd thread id': 1, 'Ev Idx': 0}}


In [9]:
from collections import defaultdict
import sys

def analyze_memory(data):
    field_sizes = defaultdict(int)

    for entry in data:
        for key, value in entry.items():
            field_sizes[key] += sys.getsizeof(value)  # Estimate memory per field

    # Sort by memory consumption
    sorted_fields = sorted(field_sizes.items(), key=lambda x: x[1], reverse=True)
    
    return sorted_fields

# Analyze the largest fields
largest_fields = analyze_memory(events)
print("Top 10 Largest Fields (Key -> Approx Size in Bytes):")
for key, size in largest_fields[:10]:
    print(f"{key}: {size / (1024 * 1024):.2f} MB")


Top 10 Largest Fields (Key -> Approx Size in Bytes):
args: 291.27 MB
name: 119.88 MB
cat: 78.66 MB
ph: 62.48 MB
pid: 34.99 MB
tid: 34.89 MB
ts: 29.99 MB
dur: 27.71 MB
bp: 2.26 MB
id: 1.74 MB


In [10]:
from collections import defaultdict

# Aggregate memory usage by 'cat' field
category_memory = defaultdict(int)

for event in events:
    category = event.get("cat", "Unknown")  # Default to "Unknown" if 'cat' is missing
    category_memory[category] += sys.getsizeof(event)

# Sort categories by memory usage
sorted_categories = sorted(category_memory.items(), key=lambda x: x[1], reverse=True)

# Display top memory-consuming categories
print("Top Memory-Consuming Categories:")
for cat, size in sorted_categories[:10]:  # Show top 10 categories
    print(f"{cat}: {size / (1024 * 1024):.2f} MB")


Top Memory-Consuming Categories:
python_function: 375.46 MB
ac2g: 22.11 MB
cpu_op: 16.82 MB
cuda_runtime: 16.13 MB
cpu_instant_event: 11.84 MB
kernel: 5.81 MB
user_annotation: 0.84 MB
gpu_user_annotation: 0.44 MB
fwdbwd: 0.23 MB
gpu_memcpy: 0.17 MB


In [12]:
unique_cats = set()
for event in events:
    unique_cats.add(event.get('cat'))
unique_cats

{None,
 'Trace',
 'ac2g',
 'cpu_instant_event',
 'cpu_op',
 'cuda_runtime',
 'fwdbwd',
 'gpu_memcpy',
 'gpu_memset',
 'gpu_user_annotation',
 'kernel',
 'python_function',
 'user_annotation'}

In [17]:
# get example
count = 5
for event in events:
    if event.get('cat') is None:
        print(event)
        # count -= 1
        # if count<0:
        #     break

{'name': 'process_name', 'ph': 'M', 'ts': 1425740863831.302, 'pid': 153, 'tid': 0, 'args': {'name': 'python'}}
{'name': 'process_labels', 'ph': 'M', 'ts': 1425740863831.302, 'pid': 153, 'tid': 0, 'args': {'labels': 'CPU'}}
{'name': 'process_sort_index', 'ph': 'M', 'ts': 1425740863831.302, 'pid': 153, 'tid': 0, 'args': {'sort_index': 153}}
{'name': 'process_name', 'ph': 'M', 'ts': 1425740863831.302, 'pid': 0, 'tid': 0, 'args': {'name': 'python'}}
{'name': 'process_labels', 'ph': 'M', 'ts': 1425740863831.302, 'pid': 0, 'tid': 0, 'args': {'labels': 'GPU 0'}}
{'name': 'process_sort_index', 'ph': 'M', 'ts': 1425740863831.302, 'pid': 0, 'tid': 0, 'args': {'sort_index': 5000000}}
{'name': 'process_name', 'ph': 'M', 'ts': 1425740863831.302, 'pid': 1, 'tid': 0, 'args': {'name': 'python'}}
{'name': 'process_labels', 'ph': 'M', 'ts': 1425740863831.302, 'pid': 1, 'tid': 0, 'args': {'labels': 'GPU 1'}}
{'name': 'process_sort_index', 'ph': 'M', 'ts': 1425740863831.302, 'pid': 1, 'tid': 0, 'args': {'

In [None]:
# get example
count = 5
for event in events:
    if event.get('cat') is None:
        print(event)
        # count -= 1
        # if count<0:
        #     break