In [5]:
# %% [markdown]
# # Notebook 24: D2 manifest and visual QC for T2FS + ovary masks
#
# This notebook:
# 1. Scans the D2_TCPW folder for patients with both T2FS and ovary mask.
# 2. Builds a candidate manifest CSV.
# 3. Provides an interactive QC viewer to inspect all slices for each patient.

# %% 
import os
from pathlib import Path

import numpy as np
import pandas as pd
import nibabel as nib
import matplotlib.pyplot as plt

from ipywidgets import interact, IntSlider

# Make plots a bit bigger
plt.rcParams["figure.figsize"] = (6, 6)

# Project root is one level up from this notebook
project_root = Path("..").resolve()

# D2 dataset root
d2_root = project_root / "data" / "UT-EndoMRI" / "D2_TCPW"
print("D2 root:", d2_root)

assert d2_root.is_dir(), f"D2 root not found: {d2_root}"

data_dir = project_root / "data"
data_dir.mkdir(exist_ok=True)


D2 root: C:\Users\lytten\programming\dlvr-project\data\UT-EndoMRI\D2_TCPW


In [6]:
# %%
# Find patients that have BOTH T2FS and ovary mask

patient_dirs = sorted([p for p in d2_root.iterdir() if p.is_dir()])
print(f"Found {len(patient_dirs)} patient folders under D2_TCPW.")

rows = []

for patient_folder in patient_dirs:
    pid = patient_folder.name  # e.g., "D2-000"
    t2fs_path = patient_folder / f"{pid}_T2FS.nii.gz"
    ov_path   = patient_folder / f"{pid}_ov.nii.gz"

    if t2fs_path.exists() and ov_path.exists():
        rows.append(
            {
                "patient_id": pid,
                "t2fs_path": str(t2fs_path),
                "ov_mask_path": str(ov_path),
            }
        )
    else:
        # Helpful debug output
        missing = []
        if not t2fs_path.exists():
            missing.append("T2FS")
        if not ov_path.exists():
            missing.append("ov")
        print(f"Skipping {pid}: missing {', '.join(missing)}")

manifest_df = pd.DataFrame(rows)

if manifest_df.empty:
    print("\nNo candidates found – check filename patterns and paths.")
else:
    manifest_df = manifest_df.sort_values("patient_id").reset_index(drop=True)
    print(f"\nFound {len(manifest_df)} patients with BOTH T2FS and ov masks.\n")
    display(manifest_df.head())


Found 73 patient folders under D2_TCPW.
Skipping D2-002: missing ov
Skipping D2-004: missing ov
Skipping D2-006: missing ov
Skipping D2-041: missing ov
Skipping D2-049: missing ov
Skipping D2-053: missing ov
Skipping D2-062: missing ov
Skipping D2-063: missing T2FS
Skipping D2-064: missing ov
Skipping D2-066: missing ov
Skipping D2-067: missing ov
Skipping D2-068: missing T2FS
Skipping D2-069: missing T2FS, ov
Skipping D2-071: missing ov
Skipping D2-077: missing ov
Skipping D2-079: missing ov
Skipping D2-080: missing ov

Found 56 patients with BOTH T2FS and ov masks.



Unnamed: 0,patient_id,t2fs_path,ov_mask_path
0,D2-000,C:\Users\lytten\programming\dlvr-project\data\...,C:\Users\lytten\programming\dlvr-project\data\...
1,D2-001,C:\Users\lytten\programming\dlvr-project\data\...,C:\Users\lytten\programming\dlvr-project\data\...
2,D2-003,C:\Users\lytten\programming\dlvr-project\data\...,C:\Users\lytten\programming\dlvr-project\data\...
3,D2-005,C:\Users\lytten\programming\dlvr-project\data\...,C:\Users\lytten\programming\dlvr-project\data\...
4,D2-007,C:\Users\lytten\programming\dlvr-project\data\...,C:\Users\lytten\programming\dlvr-project\data\...


In [7]:
# %%
# Save the candidate manifest so we can reuse it later

output_path = data_dir / "d2_manifest_t2fs_ov_candidates.csv"

if manifest_df.empty:
    print("Manifest is empty – not saving CSV.")
else:
    manifest_df.to_csv(output_path, index=False)
    print(f"Saved candidate manifest to: {output_path}")


Saved candidate manifest to: C:\Users\lytten\programming\dlvr-project\data\d2_manifest_t2fs_ov_candidates.csv


In [8]:
# %%
def load_patient_volumes(patient_id, manifest=manifest_df):
    """
    Load T2FS volume and ovary mask volume for a given patient_id
    using the candidate manifest.
    """
    row = manifest.loc[manifest["patient_id"] == patient_id]
    if row.empty:
        raise ValueError(f"Patient {patient_id} not found in manifest.")

    t2fs_path = Path(row.iloc[0]["t2fs_path"])
    ov_path = Path(row.iloc[0]["ov_mask_path"])

    img = nib.load(str(t2fs_path))
    msk = nib.load(str(ov_path))

    img_data = img.get_fdata()
    msk_data = msk.get_fdata()

    # Sanity checks
    if img_data.shape != msk_data.shape:
        print("WARNING: Image and mask shapes differ:",
              img_data.shape, msk_data.shape)

    return img_data, msk_data


In [23]:
# %% [markdown]
# Updated QC viewer: only show slices that actually contain ovary mask voxels

# %%
def qc_patient(patient_id, manifest=manifest_df, only_mask_slices=True):
    """
    Interactive visual QC for one patient.
    
    By default, only shows slices that contain any ovary mask.
    
    Parameters
    ----------
    patient_id : str
        e.g. 'D2-001'
    manifest : pd.DataFrame
        Manifest with columns ['patient_id', 't2fs_path', 'ov_mask_path']
    only_mask_slices : bool
        If True, restrict slider to slices where mask > 0.
    """
    img, msk = load_patient_volumes(patient_id, manifest)
    
    # Assume axial slices along last axis (img[:, :, k])
    n_slices = img.shape[2]

    # Find slices with any ovary mask
    mask_slice_indices = [k for k in range(n_slices) if np.any(msk[:, :, k] > 0)]

    if only_mask_slices:
        indices = mask_slice_indices
        if len(indices) == 0:
            print(f"{patient_id}: no slices with ovary mask > 0.")
            return
        print(f"{patient_id}: showing {len(indices)} slices with ov mask "
              f"out of {n_slices} total.")
    else:
        indices = list(range(n_slices))
        print(f"{patient_id}: showing all {n_slices} slices "
              f"({len(mask_slice_indices)} have ov mask).")

    # Normalize image per volume for display
    img_min, img_max = np.percentile(img, [1, 99])
    img_norm = np.clip((img - img_min) / (img_max - img_min + 1e-6), 0, 1)

    def show_slice(idx):
        k = indices[idx]  # actual slice index in the volume

        plt.figure()
        plt.imshow(img_norm[:, :, k].T, cmap="gray", origin="lower")

        mask_slice = msk[:, :, k].T
        if np.any(mask_slice > 0):
            plt.imshow(
                np.ma.masked_where(mask_slice <= 0, mask_slice),
                cmap="autumn",
                alpha=0.5,
                origin="lower",
            )
        plt.axis("off")
        plt.title(f"{patient_id} – slice {k} (mask slice {idx+1}/{len(indices)})")
        plt.show()

    interact(
        show_slice,
        idx=IntSlider(min=0, max=len(indices) - 1, step=1, value=0),
    )


# Quick smoke test (only if we have any patients)
if not manifest_df.empty:
    example_pid = manifest_df.iloc[0]["patient_id"]
    print("Example patient for QC:", example_pid)
    # qc_patient(example_pid)  # uncomment to try
else:
    print("No patients in manifest – QC viewer not available.")


Example patient for QC: D2-000


In [24]:
qc_patient("D2-001")


D2-001: showing 2 slices with ov mask out of 34 total.


interactive(children=(IntSlider(value=0, description='idx', max=1), Output()), _dom_classes=('widget-interact'…