# MboRawArray Benchmarks

Reproducible performance benchmarks for `MboRawArray` (ScanImage TIFF reader).

**Measures:**
- Initialization and metadata extraction
- Frame indexing (single, batch, z-plane selection)
- Phase correction variants (off, correlation, FFT)
- Writing to supported formats (.zarr, .tiff, .h5, .bin)

In [None]:
from pathlib import Path
from datetime import datetime

import mbo_utilities as mbo
from mbo_utilities.benchmarks import (
    BenchmarkConfig,
    BenchmarkResult,
    benchmark_mboraw,
    print_summary,
    plot_benchmark_results,
    plot_comparison,
)

## Configuration

Set your data path and choose a benchmark preset:
- `BenchmarkConfig.quick()` - Fast sanity check (~1-2 min)
- `BenchmarkConfig.full()` - Comprehensive suite (~5-10 min)
- `BenchmarkConfig.read_only()` - Skip write tests (~3-5 min)
- Custom configuration for specific needs

In [None]:
# path to scanimage tiff files (file or directory)
DATA_PATH = Path(r"D:/demo/raw")

# output directory for write benchmarks and results
OUTPUT_DIR = Path(r"D:/demo/bench_output")

# label for this benchmark run (e.g., machine name, version)
LABEL = "RBO_W2"

# choose a preset or create custom config
config = BenchmarkConfig.quick()  # fast test
# config = BenchmarkConfig.full()  # comprehensive
# config = BenchmarkConfig.read_only()  # skip writes

# or customize:
# config = BenchmarkConfig(
#     frame_counts=(10, 100, 500),
#     test_phase_fft=False,  # skip slow FFT tests
#     write_formats=('.zarr',),  # only test zarr
#     repeats=5,
# )

print(f"Data: {DATA_PATH}")
print(f"Config: frame_counts={config.frame_counts}, repeats={config.repeats}")
print(f"Phase tests: no_phase={config.test_no_phase}, corr={config.test_phase_corr}, fft={config.test_phase_fft}")
print(f"Write formats: {config.write_formats}")

## Quick Data Check

Verify the data loads correctly before running benchmarks.

In [None]:
arr = mbo.imread(DATA_PATH)
print(f"Type: {type(arr).__name__}")
print(f"Shape (T, Z, Y, X): {arr.shape}")
print(f"Dtype: {arr.dtype}")
print(f"Files: {len(arr.filenames) if hasattr(arr, 'filenames') else 1}")

# quick sanity check
frame = arr[0]
print(f"\nSingle frame shape: {frame.shape}")

## Run Benchmarks

Execute the full benchmark suite with the configured settings.

In [None]:
result = benchmark_mboraw(
    DATA_PATH,
    config=config,
    output_dir=OUTPUT_DIR,
    label=LABEL,
)

print_summary(result)

## Results Visualization

In [None]:
# dark-mode visualization using mbo theme
plot_benchmark_results(result)

# optionally save the plot
# plot_benchmark_results(result, output_path=OUTPUT_DIR / "benchmark_plot.png")

## Save Results

Save benchmark results to JSON for future comparison.

In [None]:
# generate filename with timestamp
timestamp = datetime.now().strftime("%Y-%m-%d_%H%M%S")
results_dir = OUTPUT_DIR / "results"
results_dir.mkdir(parents=True, exist_ok=True)

filename = results_dir / f"benchmark_{LABEL}_{timestamp}.json"
result.save(filename)
print(f"Saved to: {filename}")

## Compare with Previous Runs

Load and compare results from previous benchmark runs.

In [None]:
# compare multiple runs visually
# results_to_compare = [
#     BenchmarkResult.load(results_dir / "benchmark_run1.json"),
#     BenchmarkResult.load(results_dir / "benchmark_run2.json"),
# ]
# plot_comparison(results_to_compare, metric="scaling")
# plot_comparison(results_to_compare, metric="throughput")

# text comparison with a previous run
def compare_results_text(current: BenchmarkResult, previous_path: Path):
    """compare current results with a previous run (text output)."""
    previous = BenchmarkResult.load(previous_path)

    print(f"\nComparison: {current.label} vs {previous.label}")
    print("=" * 60)

    for category in current.results:
        if category not in previous.results:
            continue

        print(f"\n{category.upper()}")
        print("-" * 40)

        for name in current.results[category]:
            if name not in previous.results[category]:
                continue

            curr_mean = current.results[category][name]["mean_ms"]
            prev_mean = previous.results[category][name]["mean_ms"]
            diff_pct = ((curr_mean - prev_mean) / prev_mean) * 100

            arrow = "↑" if diff_pct > 0 else "↓" if diff_pct < 0 else "→"
            color = "slower" if diff_pct > 5 else "faster" if diff_pct < -5 else "same"

            print(f"  {name:25s}: {curr_mean:8.1f} vs {prev_mean:8.1f} ms  {arrow} {abs(diff_pct):5.1f}% ({color})")

# example: compare with a previous run
# previous_file = results_dir / "benchmark_workstation_v1_2025-12-20_120000.json"
# if previous_file.exists():
#     compare_results_text(result, previous_file)

## Individual Benchmark Functions

Run specific benchmarks independently for targeted testing.

In [None]:
from mbo_utilities.benchmarks import (
    benchmark_init,
    benchmark_indexing,
    benchmark_phase_variants,
    benchmark_writes,
)

# run only initialization benchmark
init_results = benchmark_init(DATA_PATH, repeats=5)
print("Initialization:")
for name, stats in init_results.items():
    print(f"  {name}: {stats.mean_ms:.1f} ± {stats.std_ms:.1f} ms")

In [None]:
# run only indexing benchmark
arr = mbo.imread(DATA_PATH)
idx_results = benchmark_indexing(arr, frame_counts=(1, 10, 100), repeats=3)
print("Indexing:")
for name, stats in idx_results.items():
    print(f"  {name}: {stats.mean_ms:.1f} ± {stats.std_ms:.1f} ms")

In [None]:
# run only phase correction benchmark
phase_results = benchmark_phase_variants(
    DATA_PATH,
    frame_count=50,
    test_phase_fft=False,  # skip slow FFT for quick test
)
print("Phase correction:")
for name, stats in phase_results.items():
    print(f"  {name}: {stats.mean_ms:.1f} ± {stats.std_ms:.1f} ms")

In [None]:
# run only write benchmark
write_results = benchmark_writes(
    arr,
    formats=(".zarr", ".tiff"),
    num_frames=50,
    output_dir=OUTPUT_DIR / "write_test",
)
print("Writing:")
for name, stats in write_results.items():
    print(f"  {name}: {stats.mean_ms:.1f} ± {stats.std_ms:.1f} ms")