In [4]:
import tifffile
from pathlib import Path
import time
from datetime import timedelta
import json
import numpy as np

# ---- Helper functions ----
def compute_ndvi(nir, red):
    return np.clip((nir - red) / (nir + red + 1e-5), -1, 1)

def compute_ndwi(green, nir):
    return np.clip((green - nir) / (green + nir + 1e-5), -1, 1)

# ---- Paths ----
processed_dir = Path("../data/processed/sentinel2")
mask_base_dir = Path("../data/masks")
ndvi_dir = mask_base_dir / "ndvi"
ndwi_dir = mask_base_dir / "ndwi"
ndvi_dir.mkdir(parents=True, exist_ok=True)
ndwi_dir.mkdir(parents=True, exist_ok=True)

# ---- Load metadata ----
json_files = sorted(processed_dir.glob("*.json"))
N = len(json_files)
start_time = time.time()

for idx, json_path in enumerate(json_files, start=1):
    with open(json_path) as f:
        meta = json.load(f)

    patch_id = meta["patch_id"]
    timestamp = meta["timestamp"]
    timestamp_str = timestamp.replace(":", "").replace("-", "").replace("T", "_").replace("+", "_").replace(".", "_")
    base_name = f"{patch_id}_{timestamp_str}"

    tiff_file = processed_dir / meta["file_processed"]
    if not tiff_file.exists():
        print(f"[{idx}/{N}] Skipping (missing file): {tiff_file.name}")
        continue

    # Load image
    img = tifffile.imread(tiff_file)
    if img.shape[0] < 9:
        print(f"[{idx}/{N}] Skipping (too few bands): {patch_id}")
        continue

    bands = meta["bands"]
    band_indices = {b: i for i, b in enumerate(bands)}

    try:
        red = img[band_indices["B04"]]
        nir = img[band_indices["B08"]]
        green = img[band_indices["B03"]]
    except KeyError as e:
        print(f"[{idx}/{N}] Skipping (missing band): {patch_id} | {e}")
        continue

    red_n = red  # Already normalized float32
    nir_n = nir
    green_n = green

    # Compute masks
    ndvi = compute_ndvi(nir_n, red_n)
    ndwi = compute_ndwi(green_n, nir_n)

    # Output paths
    out_ndvi = ndvi_dir / f"{base_name}_ndvi.tiff"
    out_ndwi = ndwi_dir / f"{base_name}_ndwi.tiff"

    if out_ndvi.exists() and out_ndwi.exists():
        print(f"[{idx}/{N}] Skipping (masks already exist): {base_name}")
        continue

    tifffile.imwrite(out_ndvi, ndvi.astype("float32"))
    tifffile.imwrite(out_ndwi, ndwi.astype("float32"))

    # Progress tracking
    elapsed = time.time() - start_time
    avg_time = elapsed / idx
    eta = avg_time * (N - idx)
    print(f"[{idx}/{N}] {base_name} processed | "
          f"{1/avg_time:.2f} img/sec | "
          f"ETA: {str(timedelta(seconds=int(eta)))}")


[1/2836] patch_00032_5925311e_20151230_024015_355000_0000 processed | 36.27 img/sec | ETA: 0:01:18
[2/2836] patch_00032_5925311e_20160309_024010_788000_0000 processed | 22.33 img/sec | ETA: 0:02:06
[3/2836] patch_00032_5925311e_20160408_024013_300000_0000 processed | 20.09 img/sec | ETA: 0:02:21
[4/2836] patch_00032_5925311e_20160607_024019_908000_0000 processed | 18.86 img/sec | ETA: 0:02:30
[5/2836] patch_00032_5925311e_20160806_024019_419000_0000 processed | 17.93 img/sec | ETA: 0:02:37
[6/2836] patch_00032_5925311e_20170523_024018_455000_0000 processed | 17.56 img/sec | ETA: 0:02:41
[7/2836] patch_00032_5925311e_20170627_024015_499000_0000 processed | 17.27 img/sec | ETA: 0:02:43
[8/2836] patch_00032_5925311e_20170702_024014_452000_0000 processed | 17.20 img/sec | ETA: 0:02:44
[9/2836] patch_00032_5925311e_20170821_024016_656000_0000 processed | 17.20 img/sec | ETA: 0:02:44
[10/2836] patch_00032_5925311e_20170826_024014_648000_0000 processed | 17.07 img/sec | ETA: 0:02:45
[11/2836]