# Transmission benchmark

This notebook defines a single-pass benchmark and runs randomized, uniform sampling across all implementations in `src/impls` for the configured packet size and deletion probability. Results are collected in `results_table` with columns `impl`, `payload_bitsize`, `packets_until_reconstructed`.

### Collect samples

In [18]:
import importlib
from random import randint
from joblib import Parallel, delayed
import math
import pandas as pd
from pathlib import Path
from tqdm.auto import tqdm
import numpy as np

from impls._interface import Config, Deletion, Protocol


# -----------------------------
# Config
# -----------------------------
PACKET_BITSIZE = 7
DELETION_PROB = 0.1
SAMPLING_DENSITY = 10.0
CHUNK_SIZE = 100  # "emit an event every 100 samples" -> one task per 100
N_WORKERS = -1  # -1 = use all cores; or set int like 8


# -----------------------------
# Benchmark primitive
# -----------------------------
def benchmark_single_pass(
    payload_bitsize: int,
    protocol: Protocol,
    max_iters: int = 10000,
    deletion_prob: float = 0.1,
):
    data = np.random.randint(0, 2, size=payload_bitsize, dtype=np.bool_)

    sampler = protocol.make_sampler(data)
    estimator = protocol.make_estimator()

    def next_sample():
        try:
            return next(sampler)
        except StopIteration:
            raise RuntimeError("Sampler exhausted")

    packets_until_reconstructed = 0

    try:
        # Simulate unknown start
        for _ in range(randint(1, 100)):
            next_sample()

        # Simulate transmission with deletions
        prev_was_deletion = True
        progress = next(estimator)
        while True:
            packet = next_sample()

            if np.random.rand() < deletion_prob:
                if not prev_was_deletion:
                    progress = estimator.send(Deletion)
                prev_was_deletion = True
            else:
                progress = estimator.send(packet)
                prev_was_deletion = False

            packets_until_reconstructed += 1
            if packets_until_reconstructed > max_iters:
                raise RuntimeError(f"Exceeded max iters at {progress}")

    except StopIteration:
        return packets_until_reconstructed

    raise RuntimeError("Unreachable")


# -----------------------------
# Discover implementations (PASS ONLY STRINGS/RANGES)
# -----------------------------
impl_dir = Path("src/impls")
if not impl_dir.exists():
    impl_dir = Path("impls")

impl_names = sorted(p.stem for p in impl_dir.glob("*.py") if not p.name.startswith("_"))

base_samples = 0
impl_specs = []  # list[(impl_name: str, (lo, hi))]
for name in impl_names:
    module = importlib.import_module(f"impls.{name}")
    max_payload = int(module.max_payload_bitsize(PACKET_BITSIZE))
    start = PACKET_BITSIZE + 1
    if max_payload < start:
        continue
    impl_specs.append((name, (start, max_payload)))
    base_samples += max_payload - start + 1

if not impl_specs:
    raise RuntimeError("No eligible implementations found for this packet size.")

samples = int(base_samples * SAMPLING_DENSITY)
n_tasks = math.ceil(samples / CHUNK_SIZE)


# -----------------------------
# One task = one chunk of samples
# (imports modules inside, maintains per-process protocol cache)
# -----------------------------
def run_chunk(n_in_chunk: int, seed: int, impl_specs_local):
    import importlib
    import random
    import numpy as np

    random.seed(seed)
    np.random.seed(seed)

    protocol_cache = {}
    module_cache = {}

    def get_module(impl_name: str):
        m = module_cache.get(impl_name)
        if m is None:
            m = importlib.import_module(f"impls.{impl_name}")
            module_cache[impl_name] = m
        return m

    def get_protocol(impl_name: str, payload_bitsize: int):
        key = (impl_name, payload_bitsize)
        p = protocol_cache.get(key)
        if p is None:
            module = get_module(impl_name)
            config = Config(
                packet_bitsize=PACKET_BITSIZE, payload_bitsize=payload_bitsize
            )
            p = module.create_protocol(config)
            protocol_cache[key] = p
        return p

    rows = []
    for _ in range(n_in_chunk):
        impl_name, (lo, hi) = random.choice(impl_specs_local)
        payload_bitsize = random.randint(lo, hi)
        protocol = get_protocol(impl_name, payload_bitsize)

        packets = benchmark_single_pass(
            payload_bitsize,
            protocol,
            deletion_prob=DELETION_PROB,
        )
        rows.append([impl_name, payload_bitsize, packets])

    return rows


# -----------------------------
# Launch: tqdm advances once per finished chunk (~ every 100 samples)
# -----------------------------

# Make task sizes: mostly CHUNK_SIZE, last one smaller
task_sizes = [CHUNK_SIZE] * (n_tasks - 1) + [samples - CHUNK_SIZE * (n_tasks - 1)]
seeds = np.random.SeedSequence(12345).spawn(n_tasks)  # reproducible-ish across runs


results_chunks = Parallel(n_jobs=N_WORKERS, prefer="processes")(
    delayed(run_chunk)(task_sizes[i], int(seeds[i].generate_state(1)[0]), impl_specs)
    for i in tqdm(range(n_tasks), desc="Benchmarking", total=n_tasks)
)

all_rows = [row for chunk in results_chunks for row in chunk]

results_table = pd.DataFrame(
    all_rows,
    columns=["impl", "payload_bitsize", "packets_until_reconstructed"],
)

results_table

Benchmarking:   0%|          | 0/176 [00:00<?, ?it/s]

Unnamed: 0,impl,payload_bitsize,packets_until_reconstructed
0,chain2,151,70
1,chain2,86,35
2,chain2,301,148
3,chain2,117,58
4,chain2,751,807
...,...,...,...
17575,chain2,170,79
17576,chain3,882,4242
17577,chain2,185,104
17578,chain2,162,79


### Plot results

In [21]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# =========================
# Config
# =========================

BANDS = [0.5, 0.95]

# Kernel bandwidth measured in "payload_bitsize units"
# Higher => more smoothing (wider neighborhood)
KERNEL_BW = 3.0


# =========================
# Colors
# =========================

palette = px.colors.qualitative.D3
impls = sorted(results_table["impl"].unique())
color_map = {impl: palette[i % len(palette)] for i, impl in enumerate(impls)}


def to_rgba(color, alpha):
    if color.startswith("rgb"):
        r, g, b = map(int, color[color.find("(") + 1 : color.find(")")].split(","))
    else:
        c = color.lstrip("#")
        r, g, b = int(c[0:2], 16), int(c[2:4], 16), int(c[4:6], 16)
    return f"rgba({r},{g},{b},{alpha})"


# =========================
# Weighted stats
# =========================


def gaussian_kernel(dx, bw):
    # dx: array of (x - x0)
    return np.exp(-0.5 * (dx / bw) ** 2)


def weighted_mean(y, w):
    sw = w.sum()
    if sw <= 0:
        return np.nan
    return (w * y).sum() / sw


def weighted_quantile(y, w, q):
    """
    Weighted quantile for q in [0,1].
    Uses the standard definition based on cumulative normalized weights.
    """
    y = np.asarray(y, dtype=float)
    w = np.asarray(w, dtype=float)

    mask = np.isfinite(y) & np.isfinite(w) & (w > 0)
    y = y[mask]
    w = w[mask]

    if len(y) == 0:
        return np.nan

    order = np.argsort(y)
    y = y[order]
    w = w[order]

    cw = np.cumsum(w)
    total = cw[-1]
    if total <= 0:
        return np.nan

    t = q * total
    idx = np.searchsorted(cw, t, side="left")
    idx = min(max(idx, 0), len(y) - 1)
    return y[idx]


# =========================
# Kernel-smoothed asymmetric bands from neighborhood samples
# =========================


def compute_kernel_asymmetric_bands(raw_df, bands, bw, min_total_weight=1e-12):
    rows = []

    for impl, g in raw_df.groupby("impl"):
        x_all = g["payload_bitsize"].to_numpy(dtype=float)
        y_all = g["packets_until_reconstructed"].to_numpy(dtype=float)

        # Evaluate summaries at observed payload values (unique x)
        x_grid = np.sort(g["payload_bitsize"].unique().astype(float))

        for x0 in x_grid:
            w = gaussian_kernel(x_all - x0, bw)
            sw = w.sum()

            if sw < min_total_weight:
                continue

            m = weighted_mean(y_all, w)
            if not np.isfinite(m):
                continue

            # asymmetric split around the *kernel-weighted* mean at x0
            above_mask = y_all >= m
            below_mask = y_all <= m

            ya, wa = y_all[above_mask], w[above_mask]
            yb, wb = y_all[below_mask], w[below_mask]

            row = {"impl": impl, "payload_bitsize": x0, "mean": m}

            for p in bands:
                # upper band: quantile among samples >= mean
                # lower band: quantile among samples <= mean
                up = weighted_quantile(ya, wa, p) if wa.sum() > 0 else m
                lo = weighted_quantile(yb, wb, 1 - p) if wb.sum() > 0 else m

                # Safety: keep ordering sane
                if not np.isfinite(up):
                    up = m
                if not np.isfinite(lo):
                    lo = m

                row[f"upper_{p}"] = max(up, m)
                row[f"lower_{p}"] = min(lo, m)

            rows.append(row)

    return (
        pd.DataFrame(rows)
        .sort_values(["impl", "payload_bitsize"])
        .reset_index(drop=True)
    )


bands_df = compute_kernel_asymmetric_bands(results_table, BANDS, bw=KERNEL_BW)

# =========================
# Plot
# =========================

fig = go.Figure()

for impl in impls:
    color = color_map[impl]

    # ---------- raw scatter ----------
    raw = results_table[results_table["impl"] == impl]
    fig.add_trace(
        go.Scatter(
            x=raw["payload_bitsize"],
            y=raw["packets_until_reconstructed"],
            mode="markers",
            name=f"{impl} (samples)",
            legendgroup=f"{impl}_scatter",
            marker=dict(color=to_rgba(color, 0.35), size=5),
            visible="legendonly",
        )
    )

    # ---------- kernel bands ----------
    df = bands_df[bands_df["impl"] == impl]
    x = df["payload_bitsize"].to_numpy()

    alpha_step = 0.1 / max(1, len(BANDS) - 1)
    for i, p in enumerate(sorted(BANDS, reverse=True)):
        alpha = 0.2 + i * alpha_step
        fillcolor = to_rgba(color, alpha)

        fig.add_trace(
            go.Scatter(
                x=x,
                y=df[f"upper_{p}"],
                mode="lines",
                line=dict(width=0),
                legendgroup=impl,
                showlegend=False,
                hoverinfo="skip",
            )
        )
        fig.add_trace(
            go.Scatter(
                x=x,
                y=df[f"lower_{p}"],
                mode="lines",
                line=dict(width=0),
                fill="tonexty",
                fillcolor=fillcolor,
                legendgroup=impl,
                showlegend=False,
                hoverinfo="skip",
            )
        )

    # ---------- kernel mean ----------
    fig.add_trace(
        go.Scatter(
            x=x,
            y=df["mean"],
            mode="lines+markers",
            name=impl,
            legendgroup=impl,
            line=dict(color=color),
            marker=dict(color=color, size=6),
        )
    )

fig.update_layout(
    title=f"Convergence (packet_bitsize={PACKET_BITSIZE}, deletion_prob={DELETION_PROB}, kernel_bw={KERNEL_BW})",
    xaxis_title="payload_bitsize",
    yaxis_title="packets_until_reconstructed",
    legend=dict(groupclick="togglegroup"),
)

fig.show()