# Benchmark — Naive vs Vectorised Layer Summaries
This notebook times two approaches for computing per-layer streak & spatter pixel counts:
- **Naive loop**: reads one layer at a time, sums in Python.
- **Vectorised batch**: reads 128 layers at once, sums in NumPy C-loops.

In [6]:

import contextlib
import pathlib
import time

import h5py
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

H5 = pathlib.Path("../data/2021-07-13 TCR Phase 1 Build 1.hdf5")
CLS_STREAK, CLS_SPATTER = 3, 8
EDGE_FRAC = 0.10


In [None]:
@contextlib.contextmanager
def timed(label: str):
    start = time.perf_counter()
    yield
    end = time.perf_counter()
    print(f"{label} took {end - start:.2f} s")

In [None]:
def summarise_layers_naive(max_layers=None):
    with h5py.File(H5, "r") as h5:
        seg = h5["slices/segmentation_results"]
        streak_ds = seg[str(CLS_STREAK)]
        spatt_ds = seg[str(CLS_SPATTER)]
        nL, ny, nx = streak_ds.shape
        if max_layers:
            nL = min(nL, max_layers)
        edge = int(nx * EDGE_FRAC)
        recs = []
        for k in tqdm(range(nL), desc="Naive"):
            st = streak_ds[k][...]
            sp = spatt_ds[k][...]
            recs.append({
                "layer": k,
                "streak_px": int(st.sum()),
                "spatter_px": int(sp.sum()),
                "streak_right": int(st[:, -edge:].sum()),
                "streak_left": int(st[:, :edge].sum()),
            })
    return pd.DataFrame(recs).set_index("layer")

In [None]:
def summarise_layers_vec(batch=128, max_layers=None):
    with h5py.File(H5, "r") as h5:
        seg = h5["slices/segmentation_results"]
        streak_ds = seg[str(CLS_STREAK)]
        spatt_ds = seg[str(CLS_SPATTER)]
        nL, ny, nx = streak_ds.shape
        if max_layers:
            nL = min(nL, max_layers)
        edge = int(nx * EDGE_FRAC)
        cols = ("streak_px spatter_px streak_left streak_right").split()
        out = np.zeros((nL, len(cols)), dtype=np.int64)
        for start in tqdm(range(0, nL, batch), desc="Vectorised"):
            end = min(start + batch, nL)
            slc = slice(start, end)
            st = streak_ds[slc]
            sp = spatt_ds[slc]
            out[slc,0] = st.sum(axis=(1,2))
            out[slc,1] = sp.sum(axis=(1,2))
            out[slc,2] = st[:,:, :edge].sum(axis=(1,2))
            out[slc,3] = st[:,:, -edge:].sum(axis=(1,2))
    return pd.DataFrame(out, columns=cols).rename_axis("layer")

In [None]:
MAX_LAYERS = 1000  # set None for full run

with timed("Naive approach"):
    df_naive = summarise_layers_naive(MAX_LAYERS)

with timed("Vectorised approach"):
    df_vec = summarise_layers_vec(batch=128, max_layers=MAX_LAYERS)

Naive:   0%|          | 0/10 [00:00<?, ?it/s]

Naive approach took 12.66 s


Vectorised:   0%|          | 0/1 [00:00<?, ?it/s]

Vectorised approach took 1.50 s
