<!--
Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved.

See LICENSE for license information.
-->

## Using Trace Diff to find the differences between two Pytorch Kineto Traces

This notebook provides a step-by-step guide for comparing two PyTorch Kineto traces using TraceLens's TraceDiff tool. You will:

- Load and parse trace files into event trees
- Identify differences and points of difference (PODs) between traces
- Merge the event trees and generate detailed and summary reports
- Use the UID mapping feature to cross-reference events between traces

**Requirements:**
- Two Kineto trace files (JSON format)
- TraceLens installed and available in your Python environment

**Outputs:**
- Merged tree visualization
- CSV files with kernel and op statistics
- UID mapping for cross-referencing events

> **Tip:** You can customize output folder paths and use the UID map to link events between traces for deeper analysis.


In [1]:
%load_ext autoreload
%autoreload 2


In [None]:
import importlib
import TraceLens
# from TraceLens import TraceToTree, TreePerfAnalyzer
importlib.reload(TraceLens)

<module 'TraceLens' from '/home/khoffmey/dev/TraceLens/TraceLens/__init__.py'>

In [2]:
# Load and build tree perf analyzer from two trace files
#
# This cell loads two PyTorch Kineto trace files and initializes TraceLens's TreePerfAnalyzer for each.
# This TreePerfAnalyzer internally builds a calls stack tree using TraceLens's TraceToTree.
# After running this cell, you will have two trees ready for comparison and analysis.

import json
from TraceLens import TraceToTree, TreePerfAnalyzer
import pandas as pd

trace_file1 = "../tests/traces/mi300_test_trace/mi300_test_trace_iteration_0.json"
trace_file2 = "../tests/traces/h100_test_trace/h100_test_trace_iteration_0.json"

perf_analyzer1 = TreePerfAnalyzer.from_file(trace_file1, add_python_func=True)
perf_analyzer2 = TreePerfAnalyzer.from_file(trace_file2, add_python_func=True)
tree1 = perf_analyzer1.tree
tree2 = perf_analyzer2.tree

Building tree with add_python_func=True
Building CPU op tree with add_python_func=True
Caching nn.Module stack

Time taken to cache stack: 0.012619256973266602 seconds
Building tree with add_python_func=True
Building CPU op tree with add_python_func=True
Caching nn.Module stack

Time taken to cache stack: 0.011942625045776367 seconds


In [17]:
from TraceLens import TraceDiff
# # from TraceLens import TraceToTree, TreePerfAnalyzer, TraceDiff
# import importlib
# # # import TraceDiff
# importlib.reload(TraceDiff)

# --- Step 2: Merge and analyze the trace trees ---

# This step merges the two event trees and generates data structures that store the important diff information.
# These data structures are then used to generate diff metrics and reports.
#
# After running this cell, you can:
#   - Use the TraceDiff object to access the DataFrames directly for further analysis (see next cells).
#   - Write the reports to files using td.print_tracediff_report_files(output_folder) (see later cell).

# Merge and generate DataFrames (does NOT write files)
td = TraceDiff(tree1, tree2)
td.generate_tracediff_report()

In [13]:
# The `diff_stats_df` DataFrame contains a detailed, row-by-row comparison 
# This is the most granular report, useful for deep dives.
df_diff_stats = td.diff_stats_df
df_diff_stats_paired = td.diff_stats_paired_df

In [14]:
df_diff_stats

Unnamed: 0,name,prev_combined,nn_module_stack,input_shape_trace1,input_shape_trace2,concrete_inputs_trace1,concrete_inputs_trace2,input_strides_trace1,input_strides_trace2,input_type_trace1,input_type_trace2,kernel_time_trace1,kernel_time_trace2,kernel_names_trace1,kernel_names_trace2
0,_vllm_fa3_C::get_scheduler_metadata,,[],,"[[], [], [], [], [], [], [], [], [1], [2], [],...",,"['1', '3609', '3609', '28', '4', '128', '128',...",,"[[], [], [], [], [], [], [], [], [1], [1], [],...",,"['Scalar', 'Scalar', 'Scalar', 'Scalar', 'Scal...",0.000000,2.337036,,"[flash::prepare_varlen_num_blocks_kernel(int, ..."
1,aten::copy_,,[],,"[[2], [2], []]",,"['', '', 'False']",,"[[1], [1], []]",,"['int', 'int', 'Scalar']",0.000000,1.567993,,[Memcpy DtoD (Device -> Device)]
2,vllm::rocm_unquantized_gemm,,"[nn.Module: Qwen2VisionTransformer_0, nn.Modul...","[[14308, 1, 1280], [3840, 1280], [3840]]",,"['', '', '']",,"[[1280, 1280, 1], [1280, 1], [1]]",,"['c10::Half', 'c10::Half', 'c10::Half']",,276.732056,0.000000,[Cijk_Alik_Bljk_HHS_BH_Bias_HA_S_SAV_UserArgs_...,
3,aten::linear,,"[nn.Module: Qwen2VisionTransformer_0, nn.Modul...",,"[[14308, 1, 1280], [3840, 1280], [3840]]",,"['', '', '']",,"[[1280, 1280, 1], [1280, 1], [1]]",,"['c10::Half', 'c10::Half', 'c10::Half']",0.000000,189.918945,,"[Memset (Device), nvjet_hsh_256x152_64x4_1x2_h..."
4,ApplyRotaryEmb,,"[nn.Module: Qwen2VisionTransformer_0, nn.Modul...",,"[[2, 14308, 16, 80], [14308, 40], [14308, 40],...",,"['', '', '', 'False', 'False', '0', '', '']",,"[[18314240, 1280, 80, 1], [40, 1], [40, 1], []...",,"['c10::Half', 'c10::Half', 'c10::Half', 'Scala...",0.000000,58.719971,,[rotary_kernel]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
614,aten::mm,,"[nn.Module: Qwen2VLForConditionalGeneration_0,...",,"[[3609, 3584], [3584, 37888], [3609, 37888]]",,"['', '', '']",,"[[3584, 1], [1, 3584], [37888, 1]]",,"['c10::Half', 'c10::Half', 'c10::Half']",0.000000,1395.708008,,"[Memset (Device), nvjet_hsh_256x152_64x4_1x2_h..."
615,triton_poi_fused_mul_silu_slice_1,,"[nn.Module: Qwen2VLForConditionalGeneration_0,...",,"[[3609, 37888], [3609, 18944], []]",,"['', '', '68368896']",,"[[37888, 1], [18944, 1], []]",,"['c10::Half', 'c10::Half', 'Scalar']",0.000000,130.942993,,[triton_poi_fused_mul_silu_slice_1]
616,aten::mm,,"[nn.Module: Qwen2VLForConditionalGeneration_0,...",,"[[3609, 18944], [18944, 3584], [3609, 3584]]",,"['', '', '']",,"[[18944, 1], [1, 18944], [3584, 1]]",,"['c10::Half', 'c10::Half', 'c10::Half']",0.000000,714.237061,,"[Memset (Device), nvjet_hsh_256x136_64x4_2x1_v..."
617,triton_red_fused__to_copy_add_mean_mul_pow_rsq...,,"[nn.Module: Qwen2VLForConditionalGeneration_0,...",,"[[3609, 3584], [3609, 3584], [3609, 3584], [35...",,"['', '', '', '', '3609', '3584']",,"[[3584, 1], [3584, 1], [3584, 1], [1], [], []]",,"['c10::Half', 'c10::Half', 'c10::Half', 'c10::...",0.000000,38.656006,,[triton_red_fused__to_copy_add_mean_mul_pow_rs...


In [15]:
df_diff_stats_paired

Unnamed: 0,name,prev_combined,index_trace1,index_trace2,nn_module_stack,input_shape_trace1,input_shape_trace2,concrete_inputs_trace1,concrete_inputs_trace2,input_strides_trace1,input_strides_trace2,input_type_trace1,input_type_trace2,kernel_time_trace1,kernel_time_trace2,kernel_names_trace1,kernel_names_trace2,is_merged,trace
0,vllm::rocm_unquantized_gemm | aten::linear,,2.0,3.0,"['nn.Module: Qwen2VisionTransformer_0', 'nn.Mo...","[[14308, 1, 1280], [3840, 1280], [3840]]","[[14308, 1, 1280], [3840, 1280], [3840]]","['', '', '']","['', '', '']","[[1280, 1280, 1], [1280, 1], [1]]","[[1280, 1280, 1], [1280, 1], [1]]","['c10::Half', 'c10::Half', 'c10::Half']","['c10::Half', 'c10::Half', 'c10::Half']",276.732056,189.918945,[Cijk_Alik_Bljk_HHS_BH_Bias_HA_S_SAV_UserArgs_...,"[Memset (Device), nvjet_hsh_256x152_64x4_1x2_h...",True,
1,FlashAttnVarlenFunc | ApplyRotaryEmb,,5.0,4.0,"['nn.Module: Qwen2VisionTransformer_0', 'nn.Mo...","[[14308, 16, 80], [14308, 16, 80], [14308, 16,...","[[2, 14308, 16, 80], [14308, 40], [14308, 40],...","['', '', '', '', '', '14308', '14308', '0.', '...","['', '', '', 'False', 'False', '0', '', '']","[[1280, 80, 1], [1280, 80, 1], [3840, 80, 1], ...","[[18314240, 1280, 80, 1], [40, 1], [40, 1], []...","['c10::Half', 'c10::Half', 'c10::Half', 'int',...","['c10::Half', 'c10::Half', 'c10::Half', 'Scala...",4489.543945,58.719971,[_ZN7ck_tile6kentryILi256ELi2ENS_13FmhaFwdKern...,[rotary_kernel],True,
2,FlashAttnVarlenFunc | _vllm_fa2_C::varlen_fwd,,5.0,6.0,"['nn.Module: Qwen2VisionTransformer_0', 'nn.Mo...","[[14308, 16, 80], [14308, 16, 80], [14308, 16,...","[[14308, 16, 80], [14308, 16, 80], [14308, 16,...","['', '', '', '', '', '14308', '14308', '0.', '...","['', '', '', '', '', '', '', '', '', '', '1430...","[[1280, 80, 1], [1280, 80, 1], [3840, 80, 1], ...","[[1280, 80, 1], [1280, 80, 1], [3840, 80, 1], ...","['c10::Half', 'c10::Half', 'c10::Half', 'int',...","['c10::Half', 'c10::Half', 'c10::Half', '', 'i...",4489.543945,3610.996948,[_ZN7ck_tile6kentryILi256ELi2ENS_13FmhaFwdKern...,[void flash::flash_fwd_kernel<Flash_fwd_kernel...,True,
3,vllm::rocm_unquantized_gemm | aten::linear,,7.0,8.0,"['nn.Module: Qwen2VisionTransformer_0', 'nn.Mo...","[[14308, 1, 1280], [1280, 1280], [1280]]","[[14308, 1, 1280], [1280, 1280], [1280]]","['', '', '']","['', '', '']","[[1280, 1280, 1], [1280, 1], [1]]","[[1280, 1280, 1], [1280, 1], [1]]","['c10::Half', 'c10::Half', 'c10::Half']","['c10::Half', 'c10::Half', 'c10::Half']",108.520996,67.166992,[Cijk_Alik_Bljk_HHS_BH_Bias_HA_S_SAV_UserArgs_...,"[Memset (Device), nvjet_hsh_256x144_64x4_1x2_h...",True,
4,vllm::rocm_unquantized_gemm | aten::linear,,9.0,10.0,"['nn.Module: Qwen2VisionTransformer_0', 'nn.Mo...","[[14308, 1, 1280], [5120, 1280], [5120]]","[[14308, 1, 1280], [5120, 1280], [5120]]","['', '', '']","['', '', '']","[[1280, 1280, 1], [1280, 1], [1]]","[[1280, 1280, 1], [1280, 1], [1]]","['c10::Half', 'c10::Half', 'c10::Half']","['c10::Half', 'c10::Half', 'c10::Half']",347.678955,262.206909,[Cijk_Alik_Bljk_HHS_BH_Bias_HA_S_SAV_UserArgs_...,"[Memset (Device), nvjet_hsh_128x256_64x4_2x1_v...",True,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
528,aten::mm,,,,"[nn.Module: Qwen2VLForConditionalGeneration_0,...",,"[[3609, 3584], [3584, 37888], [3609, 37888]]",,"['', '', '']",,"[[3584, 1], [1, 3584], [37888, 1]]",,"['c10::Half', 'c10::Half', 'c10::Half']",0.000000,1395.708008,,"[Memset (Device), nvjet_hsh_256x152_64x4_1x2_h...",False,2
529,triton_poi_fused_mul_silu_slice_1,,,,"[nn.Module: Qwen2VLForConditionalGeneration_0,...",,"[[3609, 37888], [3609, 18944], []]",,"['', '', '68368896']",,"[[37888, 1], [18944, 1], []]",,"['c10::Half', 'c10::Half', 'Scalar']",0.000000,130.942993,,[triton_poi_fused_mul_silu_slice_1],False,2
530,aten::mm,,,,"[nn.Module: Qwen2VLForConditionalGeneration_0,...",,"[[3609, 18944], [18944, 3584], [3609, 3584]]",,"['', '', '']",,"[[18944, 1], [1, 18944], [3584, 1]]",,"['c10::Half', 'c10::Half', 'c10::Half']",0.000000,714.237061,,"[Memset (Device), nvjet_hsh_256x136_64x4_2x1_v...",False,2
531,triton_red_fused__to_copy_add_mean_mul_pow_rsq...,,,,"[nn.Module: Qwen2VLForConditionalGeneration_0,...",,"[[3609, 3584], [3609, 3584], [3609, 3584], [35...",,"['', '', '', '', '3609', '3584']",,"[[3584, 1], [3584, 1], [3584, 1], [1], [], []]",,"['c10::Half', 'c10::Half', 'c10::Half', 'c10::...",0.000000,38.656006,,[triton_red_fused__to_copy_add_mean_mul_pow_rs...,False,2


In [28]:
# Show rows where the same name appears with multiple different prev_combineds
name_counts = df_diff_stats.groupby('name')['prev_combined'].nunique()
multi_prev_combined_names = name_counts[name_counts > 1].index
df_diff_stats[df_diff_stats['name'].isin(multi_prev_combined_names)]

Unnamed: 0,name,prev_combined,nn_module_stack,input_shape_trace1,input_shape_trace2,concrete_inputs_trace1,concrete_inputs_trace2,input_strides_trace1,input_strides_trace2,input_type_trace1,input_type_trace2,kernel_time_trace1,kernel_time_trace2,kernel_names_trace1,kernel_names_trace2,trace
14,cudaLaunchKernel,aten::sub,[],,,,,,,,,0.000000,1.728027,,[void at::native::vectorized_elementwise_kerne...,
21,vllm::rocm_unquantized_gemm,aten::native_layer_norm,"[nn.Module: Qwen2VisionTransformer_0, nn.Modul...","[[14308, 1, 1280], [3840, 1280], [3840]]",,"['', '', '']",,"[[1280, 1280, 1], [1280, 1], [1]]",,"['c10::Half', 'c10::Half', 'c10::Half']",,276.732056,0.000000,[Cijk_Alik_Bljk_HHS_BH_Bias_HA_S_SAV_UserArgs_...,,1
22,aten::linear,aten::native_layer_norm,"[nn.Module: Qwen2VisionTransformer_0, nn.Modul...",,"[[14308, 1, 1280], [3840, 1280], [3840]]",,"['', '', '']",,"[[1280, 1280, 1], [1280, 1], [1]]",,"['c10::Half', 'c10::Half', 'c10::Half']",0.000000,189.918945,,"[Memset (Device), nvjet_hsh_256x152_64x4_1x2_h...",2
27,vllm::rocm_unquantized_gemm,aten::cat,"[nn.Module: Qwen2VisionTransformer_0, nn.Modul...","[[14308, 1, 1280], [1280, 1280], [1280]]",,"['', '', '']",,"[[1280, 1280, 1], [1280, 1], [1]]",,"['c10::Half', 'c10::Half', 'c10::Half']",,108.520996,0.000000,[Cijk_Alik_Bljk_HHS_BH_Bias_HA_S_SAV_UserArgs_...,,1
28,aten::linear,aten::cat,"[nn.Module: Qwen2VisionTransformer_0, nn.Modul...",,"[[14308, 1, 1280], [1280, 1280], [1280]]",,"['', '', '']",,"[[1280, 1280, 1], [1280, 1], [1]]",,"['c10::Half', 'c10::Half', 'c10::Half']",0.000000,67.166992,,"[Memset (Device), nvjet_hsh_256x144_64x4_1x2_h...",2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
565,cudaLaunchKernel,aten::cat,[],,,,,,,,,0.000000,1.728027,,[at::native::(anonymous namespace)::masked_sca...,
566,cudaLaunchKernel,aten::cat,[],,,,,,,,,0.000000,69.918945,,[void at::native::vectorized_elementwise_kerne...,
877,cudaLaunchKernel,aten::cat,[],,,,,,,,,0.000000,2.624023,,[void at::native::unrolled_elementwise_kernel<...,
878,cudaLaunchKernel,aten::cat,[],,,,,,,,,0.000000,4.959961,,[void at::native::index_elementwise_kernel<128...,


In [None]:
# The `diff_stats_unique_args_summary_df` DataFrame summarizes the above 'df_diff_stats' DataFrame, across unique argument combinations.
df_unique_args = td.diff_stats_unique_args_summary_df
df_unique_args.head(10)

Unnamed: 0,name,prev_combined,input_shape_trace1,input_shape_trace2,concrete_inputs_trace1,concrete_inputs_trace2,input_strides_trace1,input_strides_trace2,input_type_trace1,input_type_trace2,kernel_names_trace1,kernel_names_trace2,kernel_time_trace1_mean,kernel_time_trace1_sum,kernel_time_trace2_mean,kernel_time_trace2_sum,diff_mean,diff_sum,abs_diff_mean,abs_diff_sum
0,FlashAttnVarlenFunc,,"[[14308, 16, 80], [14308, 16, 80], [14308, 16,...",,"['', '', '', '', '', '14308', '14308', '0.', '...",,"[[1280, 80, 1], [1280, 80, 1], [3840, 80, 1], ...",,"['c10::Half', 'c10::Half', 'c10::Half', 'int',...",,[_ZN7ck_tile6kentryILi256ELi2ENS_13FmhaFwdKern...,,4356.606941,74062.317993,0.0,0.0,-4356.606941,-74062.317993,4356.606941,74062.317993
1,vllm::rocm_unquantized_gemm,aten::sigmoid,"[[14308, 1, 5120], [1280, 5120], [1280]]",,"['', '', '']",,"[[5120, 5120, 1], [5120, 1], [1]]",,"['c10::Half', 'c10::Half', 'c10::Half']",,[Cijk_Alik_Bljk_HHS_BH_Bias_HA_S_SAV_UserArgs_...,,329.399285,5599.787842,0.0,0.0,-329.399285,-5599.787842,329.399285,5599.787842
2,vllm::rocm_unquantized_gemm,,"[[14308, 1, 1280], [5120, 1280], [5120]]",,"['', '', '']",,"[[1280, 1280, 1], [1280, 1], [1]]",,"['c10::Half', 'c10::Half', 'c10::Half']",,[Cijk_Alik_Bljk_HHS_BH_Bias_HA_S_SAV_UserArgs_...,,327.94058,5574.989868,0.0,0.0,-327.94058,-5574.989868,327.94058,5574.989868
3,vllm::rocm_unquantized_gemm,,"[[14308, 1, 1280], [3840, 1280], [3840]]",,"['', '', '']",,"[[1280, 1280, 1], [1280, 1], [1]]",,"['c10::Half', 'c10::Half', 'c10::Half']",,[Cijk_Alik_Bljk_HHS_BH_Bias_HA_S_SAV_UserArgs_...,,274.546672,4941.840088,0.0,0.0,-274.546672,-4941.840088,274.546672,4941.840088
4,aten::mul,,"[[14308, 1, 5120], [14308, 1, 5120]]","[[14308, 1, 5120], [14308, 1, 5120]]","['', '']","['', '']","[[5120, 5120, 1], [5120, 5120, 1]]","[[5120, 5120, 1], [5120, 5120, 1]]","['c10::Half', 'c10::Half']","['c10::Half', 'c10::Half']",[void at::native::vectorized_elementwise_kerne...,[void at::native::vectorized_elementwise_kerne...,117.404764,1995.880981,144.697933,2459.864868,27.29317,463.983887,27.29317,463.983887
5,aten::cat,,"[[[1, 14308, 16, 80], [1, 14308, 16, 80]], []]","[[[1, 14308, 16, 80], [1, 14308, 16, 80]], []]","['', '0']","['', '0']","[[[1280, 3840, 80, 1], [1280, 3840, 80, 1]], []]","[[[1280, 3840, 80, 1], [1280, 3840, 80, 1]], []]","['TensorList', 'Scalar']","['TensorList', 'Scalar']",[void at::native::(anonymous namespace)::CatAr...,[void at::native::(anonymous namespace)::CatAr...,101.134623,1820.423218,154.975552,2789.559937,53.840929,969.136719,53.840929,969.136719
6,aten::copy_,,"[[14308, 1176], [14308, 1176], []]","[[14308, 1176], [14308, 1176], []]","['', '', 'True']","['', '', 'True']","[[1176, 1], [1176, 1], []]","[[1176, 1], [1176, 1], []]","['c10::Half', 'c10::Half', 'Scalar']","['c10::Half', 'c10::Half', 'Scalar']",[Memcpy HtoD (Host -> Device)],[Memcpy HtoD (Pageable -> Device)],1817.562988,1817.562988,3648.500977,3648.500977,1830.937988,1830.937988,1830.937988,1830.937988
7,vllm::rocm_unquantized_gemm,,"[[14308, 1, 1280], [1280, 1280], [1280]]",,"['', '', '']",,"[[1280, 1280, 1], [1280, 1], [1]]",,"['c10::Half', 'c10::Half', 'c10::Half']",,[Cijk_Alik_Bljk_HHS_BH_Bias_HA_S_SAV_UserArgs_...,,105.787526,1798.387939,0.0,0.0,-105.787526,-1798.387939,105.787526,1798.387939
8,aten::native_layer_norm,,"[[14308, 1, 1280], [], [1280], [1280], []]","[[14308, 1, 1280], [], [1280], [1280], []]","['', '[1280]', '', '', '9.9999999999999995e-07']","['', '[1280]', '', '', '9.9999999999999995e-07']","[[1280, 1280, 1], [], [1], [1], []]","[[1280, 1280, 1], [], [1], [1], []]","['c10::Half', 'ScalarList', 'c10::Half', 'c10:...","['c10::Half', 'ScalarList', 'c10::Half', 'c10:...",[void at::native::(anonymous namespace)::vecto...,[void at::native::(anonymous namespace)::vecto...,46.660781,1633.127319,42.130089,1474.553101,-4.530692,-158.574219,4.61995,161.698242
9,aten::sigmoid,,"[[14308, 1, 5120]]","[[14308, 1, 5120]]",[''],[''],"[[5120, 5120, 1]]","[[5120, 5120, 1]]",['c10::Half'],['c10::Half'],[void at::native::vectorized_elementwise_kerne...,[void at::native::vectorized_elementwise_kerne...,93.411477,1587.995117,98.48077,1674.173096,5.069293,86.177979,5.128001,87.176025


In [None]:
# The `diff_stats_names_summary_df` DataFrame provides the highest-level summary,
# aggregating by operation name. 
df_name_summary = td.diff_stats_names_summary_df
df_name_summary

Unnamed: 0,name,row_count,kernel_time_trace1_sum_ms,kernel_time_trace2_sum_ms,diff_sum_ms,abs_diff_sum_ms
0,FlashAttnVarlenFunc,17,74.062318,0.0,-74.062318,74.062318
1,vllm::rocm_unquantized_gemm,69,17.915006,0.0,-17.915006,17.915006
2,aten::mul,34,3.420192,4.088341,0.668149,0.687341
3,aten::copy_,14,1.91002,3.669718,1.759698,1.905314
4,aten::cat,18,1.820423,2.78956,0.969137,0.969137
5,aten::native_layer_norm,35,1.633127,1.474553,-0.158574,0.161698
6,aten::sigmoid,17,1.587995,1.674173,0.086178,0.087176
7,aten::add,34,1.092235,1.211707,0.119472,0.122216
8,aten::mm,85,0.095502,59.952586,59.857084,59.91516
9,aten::index,2,0.071183,0.024864,-0.046319,0.046319


In [18]:
# Write TraceDiff reports to files

# This section demonstrates how to export TraceDiff reports to disk.
# The first call writes all reports (merged tree, detailed stats, summary stats) to the folder 'rprt_diff'.
# The second call writes a pruned version (GPU-only events) to 'rprt_diff_pruned'.

td.print_tracediff_report_files("rprt_diff")
print("TraceDiff reports written to rprt_diff/")

td.print_tracediff_report_files("rprt_diff_pruned", prune_non_gpu=True)
print("Pruned TraceDiff reports (GPU only) written to rprt_diff_pruned/")

TraceDiff reports written to rprt_diff/
Pruned TraceDiff reports (GPU only) written to rprt_diff_pruned/
