Previously the NPZ conversion used binary classifier i.e classified both optimal and suboptimal frames as 1 and bad frames as 0

In [2]:
! pip install SimpleITK

Collecting SimpleITK
  Downloading simpleitk-2.5.2-cp311-abi3-win_amd64.whl.metadata (7.3 kB)
Downloading simpleitk-2.5.2-cp311-abi3-win_amd64.whl (18.8 MB)
   ---------------------------------------- 0.0/18.8 MB ? eta -:--:--
   - -------------------------------------- 0.8/18.8 MB 3.7 MB/s eta 0:00:05
   -- ------------------------------------- 1.3/18.8 MB 3.4 MB/s eta 0:00:06
   ---- ----------------------------------- 2.1/18.8 MB 3.2 MB/s eta 0:00:06
   ----- ---------------------------------- 2.6/18.8 MB 3.1 MB/s eta 0:00:06
   ------- -------------------------------- 3.4/18.8 MB 3.1 MB/s eta 0:00:05
   -------- ------------------------------- 3.9/18.8 MB 3.1 MB/s eta 0:00:05
   --------- ------------------------------ 4.5/18.8 MB 3.1 MB/s eta 0:00:05
   ---------- ----------------------------- 5.0/18.8 MB 3.1 MB/s eta 0:00:05
   ------------ --------------------------- 5.8/18.8 MB 3.1 MB/s eta 0:00:05
   ------------- -------------------------- 6.3/18.8 MB 3.1 MB/s eta 0:00:05
   


[notice] A new release of pip is available: 25.0.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import os
import numpy as np
import pandas as pd
import SimpleITK as sitk
from tqdm import tqdm
import traceback

def load_ac_csv(csv_path):
    """Load abdominal circumference CSV as DataFrame with UUID as index."""
    df = pd.read_csv(csv_path).set_index("uuid")
    return df

def get_reference_ac(ac_df, uuid):
    """Get the first available sweep AC measurement for a given UUID."""
    try:
        ac_row = ac_df.loc[uuid]
        ac_values = ac_row.filter(like='sweep_').dropna().values
        return float(ac_values[0]) if len(ac_values) > 0 else None
    except Exception as e:
        print(f"⚠️ Error fetching AC for {uuid}: {e}")
        return None

def get_frame_labels_from_mask(mask_np):
    """
    Generate per-frame labels based on mask pixel values:
    0 = background (no mask)
    1 = optimal frame (pixel value 1 present)
    2 = suboptimal frame (pixel value 2 present, but no optimal)
    """
    labels = []
    for frame_mask in mask_np:
        if (frame_mask == 1).any():
            labels.append(1)  # optimal
        elif (frame_mask == 2).any():
            labels.append(2)  # suboptimal
        else:
            labels.append(0)  # background/no annotation
    return np.array(labels, dtype=np.uint8)

def convert_acouslic_dataset(
    image_dir,
    mask_dir,
    csv_path,
    output_dir,
    limit=None,  # convert all if None
    pixel_spacing=0.28,
    add_channel_dim=False
):
    os.makedirs(output_dir, exist_ok=True)
    ac_df = load_ac_csv(csv_path)

    mha_files = sorted([f for f in os.listdir(image_dir) if f.endswith(".mha")])
    converted = 0

    for f in tqdm(mha_files, desc="Converting ACOUSLIC cases"):
        if limit is not None and converted >= limit:
            break

        case_id = f.replace(".mha", "")
        uuid = case_id  # assumes filenames are UUIDs

        image_path = os.path.join(image_dir, f)
        mask_path = os.path.join(mask_dir, f)
        output_path = os.path.join(output_dir, f"{case_id}.npz")

        if os.path.exists(output_path):
            print(f"ℹ️ Skipping {case_id}: already exists.")
            continue

        try:
            # Load MHA image and mask
            img = sitk.ReadImage(image_path)
            mask = sitk.ReadImage(mask_path)

            img_np = sitk.GetArrayFromImage(img).astype(np.float32)   # [T, H, W]
            mask_np = sitk.GetArrayFromImage(mask).astype(np.uint8)   # [T, H, W]

            if img_np.shape != mask_np.shape:
                print(f"⚠️ Shape mismatch for {case_id}: image={img_np.shape}, mask={mask_np.shape}")
                continue

            # Normalize image to [0, 1]
            img_np -= img_np.min()
            img_np /= (img_np.max() + 1e-8)

            # Optional: add channel dim
            if add_channel_dim:
                img_np = img_np[:, np.newaxis, :, :]
                mask_np = mask_np[:, np.newaxis, :, :]

            # Compute 3-class per-frame label from mask
            # If add_channel_dim, squeeze extra dim before label computation
            label_mask_np = mask_np
            if add_channel_dim:
                label_mask_np = np.squeeze(mask_np, axis=1)  # back to [T,H,W] for label extraction

            label = get_frame_labels_from_mask(label_mask_np)  # [T]

            # Get ground-truth abdominal circumference
            ac_mm = get_reference_ac(ac_df, uuid)
            if ac_mm is None:
                print(f"⚠️ Missing AC value for {uuid}, skipping.")
                continue

            # Save npz
            np.savez_compressed(
                output_path,
                image=img_np.astype(np.float32),
                mask=mask_np.astype(np.uint8),
                label=label.astype(np.uint8),
                pixel_spacing=np.float32(pixel_spacing),
                ac_reference=np.float32(ac_mm),
                uuid=uuid
            )

            print(f"✅ Saved {case_id} | AC: {ac_mm:.2f} mm | Frames: {img_np.shape[0]} | Label classes: {np.unique(label)}")
            converted += 1

        except Exception as e:
            print(f"❌ Error in {case_id}: {e}")
            traceback.print_exc()


Initiate conversion for .mha to .npz

In [None]:
convert_acouslic_dataset(
    image_dir="/kaggle/input/acouslic-ai-dataset/acouslic-ai-train-set/images/stacked_fetal_ultrasound",
    mask_dir="/kaggle/input/acouslic-ai-dataset/acouslic-ai-train-set/masks/stacked_fetal_abdomen",
    csv_path="/kaggle/input/acouslic-ai-dataset/acouslic-ai-train-set/circumferences/fetal_abdominal_circumferences_per_sweep.csv",
    output_dir="/kaggle/working/converted_npz",
    limit=5 #first convert 5 files to check
)

Verifying the dimensions of the .npz files

In [None]:
case = np.load("/kaggle/working/converted_npz/0199616b-bdeb-4119-97a3-a5a3571bd641.npz")
print(case.files)
print(case["image"].shape, case["mask"].shape, case["label"].shape)
print("Pixel spacing:", case["pixel_spacing"])
print("Ground-truth AC (mm):", case["ac_reference"])

case = np.load("/kaggle/working/converted_npz/02d3a9bc-63e2-4deb-9dc1-dba17e7d54c1.npz")
print(case.files)
print(case["image"].shape, case["mask"].shape, case["label"].shape)
print("Pixel spacing:", case["pixel_spacing"])
print("Ground-truth AC (mm):", case["ac_reference"])


Verifying by visualizing actual images and representations from the .npz files