In [None]:
# === 02_preprocessing.ipynb ===
import os
import numpy as np
from src.data.loader import LungSegDataset
from src.data.transforms import basic_transform  # your resize+normalize+ToTensor

# list your two splits
for ds_name in ("shenzhen", "montgomery"):
    raw_root   = f"data/raw/{ds_name}"
    proc_img   = f"data/processed/{ds_name}/images"
    proc_mask  = f"data/processed/{ds_name}/masks"
    os.makedirs(proc_img,  exist_ok=True)
    os.makedirs(proc_mask, exist_ok=True)

    ds = LungSegDataset(raw_root, transforms=basic_transform)
    print(f"→ Processing {ds_name}: {len(ds)} samples")

    for i in range(len(ds)):
        img, mask = ds[i]   
        # img, mask are torch.Tensor of shape [1, H, W]
        img_np  = img.squeeze().cpu().numpy()   # H×W float32
        mask_np = mask.squeeze().cpu().numpy()  # H×W float32 or uint8

        fname = ds.ids[i]  # e.g. "0001"
        # save as .npy
        np.save(os.path.join(proc_img,  f"{fname}.npy"), img_np)
        np.save(os.path.join(proc_mask, f"{fname}_mask.npy"), mask_np)

    print(f"  ✔ Saved processed → {proc_img}, {proc_mask}\n")

# Quick sanity‑check: load one back
sample_img = np.load(f"data/processed/shenzhen/images/{ds.ids[0]}.npy")
sample_mask= np.load(f"data/processed/shenzhen/masks/{ds.ids[0]}_mask.npy")
print("Sample shapes:", sample_img.shape, sample_mask.shape)
