# MOE Kernel Performance Analysis

In [None]:
import subprocess, sys
for pkg in ['numpy', 'pandas', 'plotly', 'ipywidgets', 'nbformat']:
    try: __import__(pkg); print(f"✓ {pkg}")
    except: subprocess.check_call([sys.executable, '-m', 'pip', 'install', pkg])

In [None]:
from pathlib import Path
import pandas as pd
import ipywidgets as widgets
from IPython.display import display
import sys
sys.path.insert(0, '.')
import moe_utils
import importlib.util
spec = importlib.util.spec_from_file_location("analyze", "analyze_profiling.py")
m = importlib.util.module_from_spec(spec)
spec.loader.exec_module(m)
GPU_SPECS = m.GPU_SPECS
get_gpu_specs = m.get_gpu_specs
generate_data_summary = m.generate_data_summary
prepare_roofline_data = m.prepare_roofline_data
aggregate_2stage_kernels = m.aggregate_2stage_kernels
build_roofline_plot = m.build_roofline_plot
build_roofline_2stage_plot = m.build_roofline_2stage_plot
calculate_efficiency_gap = m.calculate_efficiency_gap
build_efficiency_gap_plot = m.build_efficiency_gap_plot
generate_roofline_analysis = m.generate_roofline_analysis
print(f"✓ Loaded")

In [None]:
INPUT_FILE = "results/profiling/kernels_with_counters.csv"
gpu_specs = get_gpu_specs("MI300X")
df = pd.read_csv(INPUT_FILE)
df = prepare_roofline_data(df)
print(f"Loaded {len(df)} kernels")

## Data Summary

In [None]:
summary = generate_data_summary(df)
display(summary)

## Interactive Plot - Individual Kernels

In [None]:
# All possible filter options
opts = ["kernel_type", "stage", "dtype", "q_dtype_a", "q_dtype_w", "q_type", "act_type", 
        "token", "model_dim", "inter_dim", "expert", "topk", "block_m", "use_g1u1", "doweight_stage1"]

color_dd = widgets.Dropdown(options=opts, value="kernel_type", description="Color:")
filter_dd = widgets.Dropdown(options=["All"]+opts, value="All", description="Filter:")
value_dd = widgets.Dropdown(options=["All"], value="All", description="Value:")
log_check = widgets.Checkbox(value=True, description="Log scale")
out = widgets.Output()

def upd_vals(c): 
    value_dd.options = ["All"] if filter_dd.value == "All" else ["All"] + sorted([str(v) for v in df[filter_dd.value].unique()])
    value_dd.value = "All"

def plot(c=None):
    d = df if (filter_dd.value == "All" or value_dd.value == "All") else df[df[filter_dd.value].astype(str) == value_dd.value]
    with out:
        out.clear_output(wait=True)
        fig = build_roofline_plot(d, gpu_specs, color_by=color_dd.value)
        if not log_check.value:
            fig.update_xaxes(type="linear")
            fig.update_yaxes(type="linear")
        fig.show()

filter_dd.observe(upd_vals, names="value")
color_dd.observe(plot, names="value")
value_dd.observe(plot, names="value")
log_check.observe(plot, names="value")
display(widgets.HBox([color_dd, filter_dd, value_dd, log_check]))
display(out)
plot()

## Interactive Plot - 2-Stage Combined

In [None]:
df_comb = aggregate_2stage_kernels(df)
print(f"{len(df_comb)} pairs")

if len(df_comb) > 0:
    fdd = widgets.Dropdown(options=["All"]+opts, value="All", description="Filter:")
    vdd = widgets.Dropdown(options=["All"], value="All", description="Value:")
    log_check2 = widgets.Checkbox(value=True, description="Log scale")
    out2 = widgets.Output()
    
    def upd2(c):
        if fdd.value == "All": vdd.options = ["All"]
        else:
            vals = set()
            if fdd.value in df.columns: vals.update(df[fdd.value].unique())
            if fdd.value in df_comb.columns: vals.update(df_comb[fdd.value].unique())
            vdd.options = ["All"] + sorted([str(v) for v in vals])
        vdd.value = "All"
    
    def plot2(c=None):
        if fdd.value == "All" or vdd.value == "All": dp, dc = df, df_comb
        else:
            dp = df[df[fdd.value].astype(str) == vdd.value] if fdd.value in df.columns else df
            dc = df_comb[df_comb[fdd.value].astype(str) == vdd.value] if fdd.value in df_comb.columns else df_comb
        with out2:
            out2.clear_output(wait=True)
            fig = build_roofline_2stage_plot(dp, dc, gpu_specs)
            if not log_check2.value:
                fig.update_xaxes(type="linear")
                fig.update_yaxes(type="linear")
            fig.show()
    
    fdd.observe(upd2, names="value")
    vdd.observe(plot2, names="value")
    log_check2.observe(plot2, names="value")
    display(widgets.HBox([fdd, vdd, log_check2]))
    display(out2)
    plot2()

## Interactive Plot - Roofline Efficiency

In [None]:
# Calculate efficiency
df_gap = calculate_efficiency_gap(df, gpu_specs)
print(f"Calculated efficiency for {len(df_gap)} kernels")
print(f"Memory-bound: {len(df_gap[df_gap['bound_type']=='memory'])}")
print(f"Compute-bound: {len(df_gap[df_gap['bound_type']=='compute'])}")

In [None]:
# Interactive efficiency plot with config grouping
x_opts = ["token", "model_dim", "inter_dim", "expert", "topk", "block_m", 
          "dtype", "q_dtype_a", "q_dtype_w", "q_type", "act_type", "use_g1u1", "doweight_stage1"]
color_opts = ["kernel_type", "stage", "dtype", "q_dtype_a", "q_dtype_w", "q_type", "act_type"]
group_opts = ["token", "model_dim", "inter_dim", "expert", "topk", "block_m",
              "dtype", "q_dtype_a", "q_dtype_w", "q_type", "act_type", "use_g1u1", "doweight_stage1"]

x_axis_dd = widgets.Dropdown(options=x_opts, value="token", description="X-axis:")
color_dd3 = widgets.Dropdown(options=color_opts, value="kernel_type", description="Color:")
filter_dd3 = widgets.Dropdown(options=["All"]+opts, value="All", description="Filter:")
value_dd3 = widgets.Dropdown(options=["All"], value="All", description="Value:")
config_mode = widgets.Checkbox(value=False, description="Group by config")
group_by_select = widgets.SelectMultiple(options=group_opts, value=['token', 'model_dim'], description="Group by:", rows=10)
out3 = widgets.Output()

def upd3(c):
    if filter_dd3.value == "All": value_dd3.options = ["All"]
    else:
        value_dd3.options = ["All"] + sorted([str(v) for v in df_gap[filter_dd3.value].unique()])
    value_dd3.value = "All"

def plot3(c=None):
    d = df_gap if (filter_dd3.value == "All" or value_dd3.value == "All") else df_gap[df_gap[filter_dd3.value].astype(str) == value_dd3.value]
    with out3:
        out3.clear_output(wait=True)
        group_cols = list(group_by_select.value) if config_mode.value else None
        fig = build_efficiency_gap_plot(d, gpu_specs, x_axis=x_axis_dd.value, color_by=color_dd3.value, 
                                        use_config_grouping=config_mode.value, group_by_cols=group_cols)
        fig.show()

filter_dd3.observe(upd3, names="value")
x_axis_dd.observe(plot3, names="value")
color_dd3.observe(plot3, names="value")
value_dd3.observe(plot3, names="value")
config_mode.observe(plot3, names="value")
group_by_select.observe(plot3, names="value")

display(widgets.VBox([
    widgets.HBox([x_axis_dd, color_dd3, filter_dd3, value_dd3, config_mode]),
    widgets.HBox([widgets.Label("Select columns to group by (only used when 'Group by config' is checked):"), group_by_select])
]))
display(out3)
plot3()

## Export Plots to HTML

In [None]:
# Export roofline plots to HTML files
generate_roofline_analysis(INPUT_FILE, "results/")