In [None]:
from tqdm import tqdm
from pprint import pprint
import json
import pandas as pd
import sys
sys.path.append('..')
from Trace2Tree.trace_to_tree import TraceToTree
from tree_perf import TreePerfAnalyzer

In [None]:
def get_merged_comparison_with_mapping(name1, df_agg1, name2, df_agg2, mapping, include_counts=False):
    """
    Merges two DataFrames by creating an artificial merge key based on a mapping.
    If the name is in the mapping or mapping values, it is replaced by "key/val" or "val/key".
    """
    # Create bidirectional mapping
    reverse_mapping = {v: k for k, v in mapping.items()}
    full_mapping = {**mapping, **reverse_mapping}
    
    # Apply mapping to create the merge key
    df_agg1['merge_key'] = df_agg1['name'].apply(
        lambda x: f"{x}/{full_mapping.get(x, x)}" if x in mapping else x
    )
    df_agg2['merge_key'] = df_agg2['name'].apply(
        lambda x: f"{full_mapping.get(x, x)}/{x}" if x in mapping.values() else x
    )
    
    # Merge using the artificial merge_key
    merged_df = pd.merge(
        df_agg1,
        df_agg2,
        on='merge_key',
        how='inner',
        suffixes=(f'_{name1}', f'_{name2}')
    )    
    # Replace merge_key with original names for clarity
    merged_df['name_1'] = merged_df[f'name_{name1}']
    merged_df['name_2'] = merged_df[f'name_{name2}']
    
    # Calculate duration ratio for matched rows
    merged_df[f'Kernel Duration (ms) Ratio ({name1}/{name2})'] = (
        merged_df[f'total_direct_kernel_time_sum_{name1}'] /
        merged_df[f'total_direct_kernel_time_sum_{name2}']
    )
    
    # Rearrange columns
    hardware_specific_columns = [
        f'total_direct_kernel_time_sum_{name1}', f'total_direct_kernel_time_sum_{name2}',
    ]
    if include_counts:
        hardware_specific_columns.extend([f'Count_{name1}', f'Count_{name2}'])
    computed_columns = [
        f'Kernel Duration (ms) Ratio ({name1}/{name2})',
    ]
    columns_to_keep = [f'name_{name1}', f'name_{name2}'] + hardware_specific_columns + computed_columns
    
    # Select only the necessary columns
    final_df = merged_df[columns_to_keep]
    
    return final_df


In [None]:
path = '/path/to/A_pytorch_profile.json'
with open(path, 'r') as f:
    data = json.load(f)
tree_A = TraceToTree(data['traceEvents'])
tree_A.build_tree(add_python_func=False)
perf_analyzer_A = TreePerfAnalyzer(tree_A)
df_A_kernel_launchers = perf_analyzer_A.get_df_kernel_launchers()
df_agg_A_kernel_launchers = perf_analyzer_A.get_df_kernel_launchers_summary(df_A_kernel_launchers)


In [None]:
path = '/path/to/B_pytorch_profile.json'

with open(path, 'r') as f:
    data = json.load(f)

events = data['traceEvents']
tree_B = TraceToTree(events)
tree_B.build_tree(add_python_func=False)
perf_analyzer_B = TreePerfAnalyzer(tree_B)
df_B_kernel_launchers = perf_analyzer_B.get_df_kernel_launchers()
df_agg_B_kernel_launchers = perf_analyzer_B.get_df_kernel_launchers_summary(df_B_kernel_launchers)


In [None]:
name_mapping = {
    "aten::cudnn_convolution": "aten::miopen_convolution",
    "aten::native_batch_norm_backward": "aten::miopen_batch_norm_backward",
    "aten::native_batch_norm": "aten::miopen_batch_norm",
    "FlashAttnFuncBackward": "flash_attn::_flash_attn_backward",
    "FlashAttnFunc": "flash_attn::_flash_attn_forward"
}


# Generate the merged comparison
df_comparison_agg_kernel_launchers = get_merged_comparison_with_mapping('A', df_agg_A_kernel_launchers,
                                                           'B', df_agg_B_kernel_launchers, name_mapping)
df_comparison_agg_kernel_launchers