In [None]:
import time
import numpy as np
import tensorflow as tf

def benchmark_inference_rgb(
    model,
    X,                      # (N,256,256,3) in [0,255] OR [0,1]
    batch_size=16,
    n_tiles=1024,           # how many tiles to time
    warmup_batches=10,
    repeats=3,
    pixel_size_m=30,
    tile_size=256,
):
    # --- prepare data (ensure float32 in [0,1]) ---
    Xb = X[:n_tiles].astype(np.float32)
    if Xb.max() > 1.5:
        Xb = Xb / 255.0

    # Make a tf.data pipeline for fair GPU timing
    ds = tf.data.Dataset.from_tensor_slices(Xb).batch(batch_size).prefetch(tf.data.AUTOTUNE)

    # Warmup (GPU kernels, cudnn autotune, etc.)
    for i, xb in enumerate(ds):
        _ = model(xb, training=False)
        if i + 1 >= warmup_batches:
            break

    # Timing
    times = []
    for _ in range(repeats):
        t0 = time.perf_counter()
        n = 0
        for xb in ds:
            _ = model(xb, training=False)
            n += xb.shape[0]
        # Make sure all queued GPU work is done before stopping timer
      #  tf.experimental.sync_devices()
        t1 = time.perf_counter()
        times.append(t1 - t0)

    mean_s = float(np.mean(times))
    std_s = float(np.std(times, ddof=1)) if repeats > 1 else 0.0

    sec_per_tile = mean_s / n_tiles
    # area per tile (km^2)
    tile_side_km = (tile_size * pixel_size_m) / 1000.0
    area_km2 = tile_side_km * tile_side_km  # 58.9824 for 30m & 256
    sec_per_km2 = sec_per_tile / area_km2
    km2_per_hour = (3600.0 * area_km2) / sec_per_tile

    out = {
        "n_tiles": n_tiles,
        "batch_size": batch_size,
        "time_total_mean_s": mean_s,
        "time_total_std_s": std_s,
        "sec_per_tile": sec_per_tile,
        "tile_area_km2": area_km2,
        "sec_per_km2": sec_per_km2,
        "km2_per_hour": km2_per_hour,
    }

    print("\nInference benchmark")
    print(f"- tiles timed: {n_tiles}")
    print(f"- batch size: {batch_size}")
    print(f"- total time: {mean_s:.3f} s (Â±{std_s:.3f} over {repeats} runs)")
    print(f"- sec/tile: {sec_per_tile:.6f}")
    print(f"- tile area: {area_km2:.4f} km^2")
    print(f"- sec/km^2: {sec_per_km2:.6f}")
    print(f"- throughput: {km2_per_hour:.2f} km^2/hour")

    return out

# Example call (use your X_test before preprocess scaling OR X_test/255 already):
bench = benchmark_inference_rgb(
    model=model,
    X=X_test,          # your numpy test tiles
    batch_size=16,
    n_tiles=min(2048, X_test.shape[0]),
    warmup_batches=10,
    repeats=3,
    pixel_size_m=30,
    tile_size=256
)
