# Latency Lab Analysis

This notebook reads `results/index.csv` and shows basic comparisons.


In [None]:
import sys
from pathlib import Path

def find_repo_root(start: Path) -> Path:
    for parent in [start] + list(start.parents):
        if (parent / "scripts" / "results_lib.py").exists():
            return parent
    return start

repo_root = find_repo_root(Path().resolve())
sys.path.append(str(repo_root / "scripts"))

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from results_lib import load_index
from analysis_utils import (
    ensure_dataframe,
    resolve_index_paths,
    prepare_summary,
    prepare_case,
    build_profile,
    plot_summary,
    plot_case_distribution,
    plot_profile,
)

sns.set_theme(style="whitegrid")


## Run benchmarks from the notebook


In [None]:
import sys
from pathlib import Path

from IPython.display import display

def find_repo_root(start: Path) -> Path:
    for parent in [start] + list(start.parents):
        if (parent / "scripts" / "notebook_runner.py").exists():
            return parent
    return start

repo_root = find_repo_root(Path().resolve())
scripts_dir = repo_root / "scripts"
if str(scripts_dir) not in sys.path:
    sys.path.append(str(scripts_dir))

import importlib
import notebook_ui
importlib.reload(notebook_ui)

bench_path = repo_root / "build" / "bench"
results_dir = repo_root / "results"


In [None]:
ui, output, runner = notebook_ui.display_runner_ui(
    bench_path=bench_path,
    results_dir=results_dir,
    auto_load=False,
)


In [None]:
index = load_index(repo_root / "results" / "index.csv")
index = resolve_index_paths(index, repo_root)
index.head(10)


## Summary across cases (interactive)

Use the controls below to select a metric, filter, and grouping. Labels are
shortened in the plots to keep the charts readable.


In [None]:
import ipywidgets as widgets
from IPython.display import display, clear_output

_summary_base = ensure_dataframe(index)
_metric_candidates = ["min", "p50", "p95", "p99", "p999", "max", "mean"]
metric_options = [m for m in _metric_candidates if m in _summary_base.columns]
if not metric_options:
    metric_options = ["p50"]

_summary_hue_candidates = ["pin_cpu", "tags", "bench_args"]
hue_options = ["(none)"] + [
    col for col in _summary_hue_candidates if col in _summary_base.columns
]

lab_options = ["(all)"]
if "lab" in _summary_base.columns:
    labs = _summary_base["lab"].dropna().astype(str).unique().tolist()
    lab_options += sorted(labs)

case_count = int(_summary_base["case"].nunique()) if "case" in _summary_base.columns else 10
max_cases_cap = max(10, min(80, case_count))

summary_metric = widgets.Dropdown(options=metric_options, value=metric_options[0], description="Metric")
summary_hue = widgets.Dropdown(options=hue_options, value="(none)", description="Hue")
summary_lab = widgets.Dropdown(options=lab_options, value="(all)", description="Lab")
summary_tag = widgets.Text(value="", description="Tag contains")
summary_max_cases = widgets.IntSlider(value=min(30, max_cases_cap), min=5, max=max_cases_cap, step=5, description="Max cases")
summary_label_max = widgets.IntSlider(value=32, min=10, max=80, step=2, description="Label max")

summary_out = widgets.Output()
summary_controls = widgets.VBox([
    widgets.HBox([summary_metric, summary_hue, summary_lab]),
    widgets.HBox([summary_tag, summary_max_cases, summary_label_max]),
])

display(summary_controls, summary_out)


In [None]:
def render_summary(*_):
    with summary_out:
        clear_output(wait=True)
        metric = summary_metric.value
        if not metric:
            print("No metric selected.")
            return
        filter_lab = None if summary_lab.value == "(all)" else summary_lab.value
        filter_tag = summary_tag.value.strip() or None
        max_cases = int(summary_max_cases.value) if summary_max_cases.value else None

        try:
            result = prepare_summary(
                index,
                metric,
                filter_lab=filter_lab,
                filter_tag=filter_tag,
                max_cases=max_cases,
            )
        except Exception as exc:
            print(f"Summary error: {exc}")
            return

        if result["df"].empty:
            print("No rows to plot after filtering.")
            return

        display(result["summary_table"])

        hue_value = summary_hue.value
        plot_summary(
            result,
            metric=metric,
            hue_value=None if hue_value == "(none)" else hue_value,
            label_max=int(summary_label_max.value),
        )

for widget in (summary_metric, summary_hue, summary_lab, summary_tag, summary_max_cases, summary_label_max):
    widget.observe(render_summary, names="value")

render_summary()


## Case analysis: compare configurations (interactive)

Select a case and compare configurations. Labels are shortened in the plots;
full configuration strings appear in the table.


In [None]:
import ipywidgets as widgets
from IPython.display import display, clear_output

case_base = ensure_dataframe(index)

_case_metric_candidates = ["min", "p50", "p95", "p99", "p999", "max", "mean"]
case_metric_options = [m for m in _case_metric_candidates if m in case_base.columns]
if not case_metric_options:
    case_metric_options = ["p50"]

case_options = []
if "case" in case_base.columns:
    case_options = sorted(case_base["case"].dropna().astype(str).unique().tolist())
if not case_options:
    case_options = [""]

config_field_candidates = ["pin_cpu", "tags", "iters", "warmup", "bench_args"]
config_field_options = [col for col in config_field_candidates if col in case_base.columns]
if not config_field_options:
    config_field_options = config_field_candidates

case_name_widget = widgets.Dropdown(options=case_options, value=case_options[0], description="Case")
case_primary_metric = widgets.Dropdown(options=case_metric_options, value=case_metric_options[0], description="Metric")
case_metrics_widget = widgets.SelectMultiple(
    options=case_metric_options,
    value=tuple([m for m in ["p50", "p95", "p99", "p999"] if m in case_metric_options]) or (case_metric_options[0],),
    description="Profile",
)
case_config_fields = widgets.SelectMultiple(
    options=config_field_options,
    value=tuple(config_field_options),
    description="Config fields",
)
case_max_configs = widgets.IntSlider(value=8, min=2, max=20, step=1, description="Max configs")
case_label_max = widgets.IntSlider(value=40, min=10, max=80, step=2, description="Label max")

case_out = widgets.Output()
case_controls = widgets.VBox([
    widgets.HBox([case_name_widget, case_primary_metric, case_max_configs]),
    widgets.HBox([case_metrics_widget, case_config_fields, case_label_max]),
])

display(case_controls, case_out)


In [None]:
def render_case(*_):
    with case_out:
        clear_output(wait=True)
        case_name = case_name_widget.value
        primary_metric = case_primary_metric.value
        metrics = list(case_metrics_widget.value)
        config_cols = list(case_config_fields.value)
        label_max = int(case_label_max.value)
        max_configs = int(case_max_configs.value)

        try:
            result = prepare_case(
                index,
                case_name,
                config_columns=config_cols,
                primary_metric=primary_metric,
                metrics=metrics,
            )
        except Exception as exc:
            print(f"Case analysis error: {exc}")
            return

        df = result["df"]
        case_name = result["case_name"]
        if df.empty:
            print(f"No runs found for case: {case_name!r}")
            return

        config_order = result["config_order"]
        label_map, _display_order = plot_case_distribution(
            df,
            config_order=config_order,
            primary_metric=primary_metric,
            unit_label=result["unit_label"],
            label_max=label_max,
            title=f"{case_name} by configuration ({primary_metric})",
        )

        summary_table = result["summary_table"]
        if summary_table is not None:
            table = summary_table.reset_index()
            table["config_display"] = table["config"].map(label_map)
            table = table[["config_display", "config", "run_count", "median", "min", "max"]]
            display(table)

        metrics = [m for m in metrics if m in df.columns]
        if metrics:
            top_configs = config_order[:max_configs] if max_configs else config_order
            profile = build_profile(df, configs=top_configs, metrics=metrics)
            if len(profile):
                profile["config_display"] = profile["config"].map(label_map)
                plot_profile(
                    profile,
                    unit_label=result["unit_label"],
                    title=f"{case_name} quantile profile (top {len(top_configs)})",
                )

for widget in (
    case_name_widget,
    case_primary_metric,
    case_metrics_widget,
    case_config_fields,
    case_max_configs,
    case_label_max,
):
    widget.observe(render_case, names="value")

render_case()
