In [None]:
import matplotlib.pyplot as plt
import monai
import torch
import numpy as np
import pandas as pd
from pathlib import Path
import os
import re
import glob
from collections import defaultdict
import pydicom
import nibabel as nib
from scipy.ndimage import zoom
from monai.bundle import load_bundle_config
from huggingface_hub import hf_hub_download
import cv2 #The import-call for cv2 is "pip install opencv-python" (not cv2)

Must know:
- ID 207: Add a folder named "sax" in cine and move all subfolders (with the weird names) in there for consistency

Good to know:
- All missing IDs have a "-" in column "Folders (y/n)". So when you find out folder-order and first and last for the missing ones, change that to "y" and run the code again. 
- We should ask about 187 (no sax folder), for now it also has a "-" and is ignored.
- Otherwise scroll down for the function calls. I structured it this way so it's easy to make into a .py file, but notebook is easier for debugging etc.
- Check out the TODO tag below about the affine to convert it into Nifti format and wether that is necessary for input into Giulia's code.

In [None]:
def parse_filename(filename):
    """
    Extracts sliceloc and triggertime values from a filename.

    Parameters:
        filename (str): Filename containing 'sliceloc_{val}_triggertime_{val}'.

    Returns:
        tuple[float | None, float | None]: Parsed sliceloc and triggertime as floats, or (None, None) if not found.
    """
    match = re.search(r"sliceloc_([-\d.]+)_triggertime_([-\d.]+)", filename)
    if match:
        return float(match.group(1)), float(match.group(2))
    return None, None

In [None]:
def get_relevant_files_n(df, base_path):
    """
    Selects one relevant file per slice location for each patient based on ED frame and apex–base range.

    For each patient, searches {base_path}/{ID}/cine/sax/ (recursively) for files named 
    like '...sliceloc_{val}_triggertime_{val}'. Keeps only slices within the apex–base 
    range and selects the earliest (ED Slice == 0) or latest frame per slice.

    Parameters:
        df (pd.DataFrame): df_y (see above); DataFrame of "ED_slices_and_timepoints.csv", without series-substructure
        base_path (str): Root path containing the patient folders.

    Returns:
        dict[str, list[str]]: Mapping from patient ID to selected file paths.
    """
    patient_files = {}

    for _, row in df.iterrows():
        pid = str(row["ID"]).strip()
        try:
            ed_slice = int(row["ED frame"])
            apex = float(row["apex"])
            base = float(row["base"])
        except (ValueError, TypeError):
            # Skip malformed rows
            continue

        folder = os.path.join(base_path, pid, "cine", "sax")
        if not os.path.isdir(folder):
            print(f"Warning: folder not found for patient {pid}")
            continue

        files = [p for p in Path(folder).rglob("*") if p.is_file()]
        parsed = []

        # Parse filenames
        for f in files:
            fname = f.name
            sliceloc, triggertime = parse_filename(fname)
            if sliceloc is not None and triggertime is not None:
                parsed.append((f, sliceloc, triggertime))

        if not parsed:
            print(f"Warning: no valid files for patient {pid}")
            continue

        # Group by sliceloc → list of triggertimes
        sliceloc_map = defaultdict(list)
        for f, sliceloc, triggertime in parsed:
            sliceloc_map[sliceloc].append((f, triggertime))

        lower, upper = sorted([apex, base])
        selected = []

        for sliceloc, items in sliceloc_map.items():
            if lower <= sliceloc <= upper:
                times = [tt for _, tt in items]
                if ed_slice == 0:
                    target_tt = min(times)
                else:
                    target_tt = max(times)

                # Add the file with this sliceloc + target triggertime
                for f, tt in items:
                    if tt == target_tt:
                        selected.append(str(f))
                        break  # Only one per sliceloc

        patient_files[pid] = selected

    return patient_files



In [None]:
def get_relevant_files_y(df, base_path):
    """
    Selects one relevant file per folder-defined slice for each patient using ED frame.

    For each patient, looks inside {base_path}/{ID}/cine/sax/series_{folder}/ for files.
    The slice locations are inferred from subfolder names (e.g. 'series_25').
    The column 'folder order' lists all available series (in order),
    while 'apex' and 'base' define the first and last folder to include.

    Parameters:
        df (pd.DataFrame): DataFrame with columns 'ID', 'ED Slice', 'apex', 'base', and 'folder order'.
        base_path (str): Root path containing the patient folders.

    Returns:
        dict[str, list[str]]: Mapping from patient ID to selected file paths.
    """

    patient_files = {}

    for _, row in df.iterrows():
        pid = str(row["ID"]).strip()
        try:
            ed_slice = int(row["ED frame"])
            apex = int(row["apex"])
            base = int(row["base"])
            folder_order = str(row["folder order"]).strip()
        except (ValueError, TypeError):
            print(f"Could not extract values for ID {pid}")
            continue

        if not folder_order or folder_order.lower() == "nan":
            continue

        sax_root = os.path.join(base_path, pid, "cine", "sax")
        if not os.path.isdir(sax_root):
            print(f"Warning: folder not found for patient {pid}")
            continue

        # Get ordered folder list (as ints)
        order = [int(x) for x in folder_order.split("-") if x.isdigit()]
        lower, upper = sorted([apex, base])
        lower_idx, upper_idx = order.index(lower), order.index(upper)
        selected_series = order[lower_idx:upper_idx+1]

        selected = []

        for sliceloc in selected_series:
            series_path = os.path.join(sax_root, f"series_{sliceloc}")
            if not os.path.isdir(series_path):
                print(f"Warning: missing folder series_{sliceloc} for patient {pid}")
                continue

            files = glob.glob(os.path.join(series_path, "*"))
            triggertimes = []

            for f in files:
                _, tt = parse_filename(os.path.basename(f))
                if tt is not None:
                    triggertimes.append((f, tt))

            if not triggertimes:
                continue

            if ed_slice == 0:
                chosen_file = min(triggertimes, key=lambda x: x[1])[0]
            else:
                chosen_file = max(triggertimes, key=lambda x: x[1])[0]

            selected.append(chosen_file)

        patient_files[pid] = selected

    return patient_files


In [None]:
def run_segmentation(files_dicts, output_root, save_as_stack: bool):
    """
    Runs MONAI ventricular segmentation on all DICOM files provided in files_dicts.

    Parameters:
        files_dicts (list[dict]): List of dicts (e.g., [files_n, files_y]) with {pid: [file_paths]}.
        output_root (str): Root folder where output NIfTI files will be saved.
    """
    # Load MONAI network config & weights
    parser = load_bundle_config("MONAI", "train.json")
    net = parser.get_parsed_content("network_def")

    model_path = hf_hub_download(
        repo_id="MONAI/ventricular_short_axis_3label",
        filename="models/model.pt"
    )
    net.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
    net.eval()

    target_shape = (256, 256) 

    for files_dict in files_dicts:
        for pid, paths in files_dict.items():
            num_slices = len(paths)
            img_stack = []
            seg_stack = []
            for idx, path in enumerate(paths):
                # Read and preprocess DICOM
                ds = pydicom.dcmread(path)
                img = ds.pixel_array.astype(np.float32)

                #print(f"Original image shape: {img.shape}, ndim: {img.ndim}")

                # Resample width and height to fixed size (256, 256) 
                im_resized = cv2.resize(img, (target_shape[1], target_shape[0]), interpolation=cv2.INTER_LINEAR)

                # Adjust contrast
                # im_resized = cv2.convertScaleAbs(im_resized, alpha=1.465, beta=0.0) # Used alpha-value provided by Giulia (this adjusts contrast)

                #print(f"Resized image shape: {im_resized.shape}")

                # Normalize and add batch & channel dims
                normed_im = im_resized / im_resized.max()
                input_tensor = torch.from_numpy(normed_im).float()[None, None, :, :]

                # Predict
                with torch.no_grad():
                    pred = net(input_tensor)
                    pred = torch.softmax(pred[0], dim=0)
                    seg = torch.argmax(pred, dim=0).numpy()

                if save_as_stack:
                    img_stack.append(normed_im)
                    seg_stack.append(seg)
                else:
                    # Save
                    pid_folder = os.path.join(output_root, str(pid))
                    os.makedirs(pid_folder, exist_ok=True)

                    affine = np.eye(4)  # identity affine 
                    #TODO: Look into this. This is an identity affine to map from numpy array to nifti file format, but we should probably
                    # use the one from the DICOM - or does this not matter for input into Giulia's code?

                    nib.save(nib.Nifti1Image(normed_im, affine), os.path.join(pid_folder, f"{idx}_img.nii.gz"))
                    nib.save(nib.Nifti1Image(seg.astype(np.uint8), affine), os.path.join(pid_folder, f"{idx}_seg.nii.gz"))

                    print(f"Saved {pid} slice {idx}")
        
            if save_as_stack and num_slices > 0: # NOTE: The order of the slices for patients with no folder structure is not necessarily correct.
                
                # Save the entire stack as a single NIfTI file
                pid_folder = os.path.join(output_root, str(pid))
                os.makedirs(pid_folder, exist_ok=True)

                img_stack = np.stack(img_stack, axis=-1)
                seg_stack = np.stack(seg_stack, axis=-1)

                affine = np.eye(4)  # identity affine for the stack

                nib.save(nib.Nifti1Image(img_stack, affine), os.path.join(pid_folder, "img_stack.nii.gz"))
                nib.save(nib.Nifti1Image(seg_stack.astype(np.uint8), affine), os.path.join(pid_folder, "seg_stack.nii.gz"))

                print(f"Saved {pid} image and segmentation stacks")
                

In [251]:
# This would be main in .py

# read in csv split on folders y/n
csv_file = "ED_slices_and_timepoints.csv" #For the future, once we structure our folders/files better we need to (probably) adjust this import
df = pd.read_csv(csv_file)
#display(df)

df.columns = df.columns.str.strip()
df["Folders (y/n)"] = df["Folders (y/n)"].str.strip().str.lower()

df_y = df[df["Folders (y/n)"] == 'y'].reset_index(drop=True)
df_n = df[df["Folders (y/n)"] == 'n'].reset_index(drop=True)

#display(df_n)
#display(df_y)

# Change this based on where you store the data
# base_path = "/Users/au698484/Documents/SSCP25_data/Data and scripts SSCP25 3/CMR_image_data/new data-dicom"
base_path = '/Users/inad001/Documents/SSCP25/Data and scripts SSCP25/CMR_image_data/new data-dicom'

files_n = get_relevant_files_n(df_n, base_path)
files_y = get_relevant_files_y(df_y, base_path)

# Call segmentation and save segmentations and images under specified output file (change to match your own destination)
# run_segmentation([files_n, files_y], output_root="/Users/au698484/Documents/SSCP25_data_segmented")
run_segmentation([files_n, files_y], output_root="/Users/inad001/Documents/SSCP25/segmented_data", save_as_stack=False)


Saved 15 slice 0
Saved 15 slice 1
Saved 15 slice 2
Saved 15 slice 3
Saved 15 slice 4
Saved 15 slice 5
Saved 15 slice 6
Saved 15 slice 7
Saved 114 slice 0
Saved 114 slice 1
Saved 114 slice 2
Saved 114 slice 3
Saved 114 slice 4
Saved 114 slice 5
Saved 114 slice 6
Saved 114 slice 7
Saved 114 slice 8
Saved 114 slice 9
Saved 114 slice 10
Saved 114 slice 11
Saved 126 slice 0
Saved 126 slice 1
Saved 126 slice 2
Saved 126 slice 3
Saved 126 slice 4
Saved 126 slice 5
Saved 126 slice 6
Saved 126 slice 7
Saved 126 slice 8
Saved 126 slice 9
Saved 130 slice 0
Saved 130 slice 1
Saved 130 slice 2
Saved 130 slice 3
Saved 130 slice 4
Saved 130 slice 5
Saved 130 slice 6
Saved 130 slice 7
Saved 130 slice 8
Saved 130 slice 9
Saved 130 slice 10
Saved 130 slice 11
Saved 138 slice 0
Saved 138 slice 1
Saved 138 slice 2
Saved 138 slice 3
Saved 138 slice 4
Saved 138 slice 5
Saved 138 slice 6
Saved 163 slice 0
Saved 163 slice 1
Saved 163 slice 2
Saved 163 slice 3
Saved 163 slice 4
Saved 163 slice 5
Saved 163 slic

### ED_segmentation_data

In [184]:
base_path = os.path.abspath("/Users/inad001/Documents/SSCP25/Data and scripts SSCP25/ED_segmentation_data/segmentation_stacks")

In [204]:
def get_cmr_nifti_files(base_path):

    patient_files = {}

    for patient in os.listdir(base_path):
        pid = patient.strip()

        patient_path = os.path.join(base_path, pid)

        if not os.path.isdir(patient_path):
            print(f"Warning: {patient_path} is not a directory")
            continue

        files = [os.path.join(patient_path, f) for f in os.listdir(patient_path) if f.startswith("cmr") and f.endswith(".nii")]

        if not files:
            print(f"Warning: no valid files found for patient {pid}")
            continue

        patient_files[pid] = files
    
    return patient_files

In [None]:
def run_segmentation_nifti(files_dicts, output_root):
    """
    Runs MONAI ventricular segmentation on all DICOM files provided in files_dicts.

    Parameters:
        files_dicts (list[dict]): List of dicts (e.g., [files_n, files_y]) with {pid: [file_paths]}.
        output_root (str): Root folder where output NIfTI files will be saved.
    """
    # Load MONAI network config & weights
    parser = load_bundle_config("MONAI", "train.json")
    net = parser.get_parsed_content("network_def")

    model_path = hf_hub_download(
        repo_id="MONAI/ventricular_short_axis_3label",
        filename="models/model.pt"
    )
    net.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
    net.eval()

    target_shape = (256, 256) 

    for files_dict in files_dicts:
        for pid, paths in files_dict.items():

            num_files = len(paths)

            if num_files == 0:
                print(f"No files found for patient {pid}. Skipping...")
                continue
            
            imgs = []
            if paths[0].endswith('.nii'): # TODO: Don't do this if the list is empty
                # Read and preprocess NIfTI file
                # nifti = nib.load(paths[0])
                img_stack = np.array(nib.load(paths[0]).get_fdata().astype(np.float32))
                # imgs.append(img_stack.unsqueeze(-1))
            else:
                # Create an empty stack for DICOM images
                img_stack = np.zeros((target_shape[0], target_shape[1], num_files), dtype=np.float32)
                for idx, path in enumerate(paths):
                    # Read and preprocess DICOM
                    ds = pydicom.dcmread(path)
                    img = ds.pixel_array.astype(np.float32)
                    im_resized = cv2.resize(img, (target_shape[1], target_shape[0]), interpolation=cv2.INTER_LINEAR)
                    img_stack[:, :, idx] = im_resized

            seg_stack = np.zeros((target_shape[1], target_shape[1], img_stack.shape[2]), dtype=np.int64)
            processed_imgs_stack = np.zeros((target_shape[1], target_shape[1], img_stack.shape[2]), dtype=np.float32)  # Use the same shape as img_stack
            # seg_stack = np.zeros_like(img_stack, dtype=np.int64)  # Use the same shape as img_stack
            segs = []
            for idx in range(img_stack.shape[2]):
                img = img_stack[:, :, idx]  # Get the current slice
                #print(f"Original image shape: {img.shape}, ndim: {img.ndim}")

                if img.shape[0] != target_shape[0] or img.shape[1] != target_shape[1]:
                    img = cv2.resize(img, (target_shape[1], target_shape[0]), interpolation=cv2.INTER_LINEAR)


                # Resample width and height to fixed size (256, 256) 
                # im_resized = cv2.resize(img, (target_shape[1], target_shape[0]), interpolation=cv2.INTER_LINEAR)

                # Adjust contrast
                # img = cv2.convertScaleAbs(img, alpha=1.465, beta=0.0) # Used alpha-value provided by Giulia (this adjusts contrast)
                # img = cv2.convertScaleAbs(img, alpha=1.1, beta=0.0) # Used alpha-value provided by Giulia (this adjusts contrast), alpha 

                #print(f"Resized image shape: {im_resized.shape}")

                # Normalize and add batch & channel dims
                # input_tensor = torch.from_numpy(im_resized / im_resized.max()).float()[None, None, :, :]
                input_tensor = torch.from_numpy(img / img.max()).float()[None, None, :, :]
                # input_tensor = torch.from_numpy(img).float()[None, None, :, :]

                # Predict
                with torch.no_grad():
                    pred = net(input_tensor)
                    pred = torch.softmax(pred[0], dim=0)
                    seg = torch.argmax(pred, dim=0).numpy()

                # print(f"Processed patient {pid}, slice {idx}: seg shape {seg.shape}, dtype {seg.dtype}")
                # segs.append(seg)

                processed_imgs_stack[:, :, idx] = img  # Store the processed image for this slice
                seg_stack[:, :, idx] = seg  # Store the segmentation for this slice

                # # Save
                # pid_folder = os.path.join(output_root, str(pid))
                # os.makedirs(pid_folder, exist_ok=True)

                # affine = np.eye(4)  # identity affine 
                # #TODO: Look into this. This is an identity affine to map from numpy array to nifti file format, but we should probably
                # # use the one from the DICOM - or does this not matter for input into Giulia's code?

                # nib.save(nib.Nifti1Image(img, affine), os.path.join(pid_folder, f"{idx}_img.nii.gz"))
                # nib.save(nib.Nifti1Image(seg.astype(np.uint8), affine), os.path.join(pid_folder, f"{idx}_seg.nii.gz"))

                # print(f"Saved {pid} slice {idx}")
            # seg_stack = np.stack(segs, axis=-1)  # Stack all segmentations along the last dimension
            
            # Save all slices for this patient
            pid_folder = os.path.join(output_root, str(pid))
            os.makedirs(pid_folder, exist_ok=True)

            affine = np.eye(4)  # identity affine
            # affine = np.eye(9)
            # affine = nifti.affine
            
            # Save the entire image stack and segmentation stack as NIfTI files
            nib.save(nib.Nifti1Image(processed_imgs_stack, affine), os.path.join(pid_folder, "img_stack.nii.gz"))
            nib.save(nib.Nifti1Image(seg_stack.astype(np.uint8), affine), os.path.join(pid_folder, "seg_stack.nii.gz"))

            print(f"Saved all slices for patient {pid} to {pid_folder}")


In [219]:
cmr_files = get_cmr_nifti_files(base_path)

# Run segmentation on the CMR files
run_segmentation([cmr_files], output_root="/Users/inad001/Documents/SSCP25/segmented_nifti")

Saved all slices for patient 95 to /Users/inad001/Documents/SSCP25/segmented_nifti/95
Saved all slices for patient 132 to /Users/inad001/Documents/SSCP25/segmented_nifti/132
Saved all slices for patient 92 to /Users/inad001/Documents/SSCP25/segmented_nifti/92
Saved all slices for patient 66 to /Users/inad001/Documents/SSCP25/segmented_nifti/66
Saved all slices for patient 103 to /Users/inad001/Documents/SSCP25/segmented_nifti/103
Saved all slices for patient 157 to /Users/inad001/Documents/SSCP25/segmented_nifti/157


  input_tensor = torch.from_numpy(img / img.max()).float()[None, None, :, :]


Saved all slices for patient 150 to /Users/inad001/Documents/SSCP25/segmented_nifti/150
Saved all slices for patient 166 to /Users/inad001/Documents/SSCP25/segmented_nifti/166
Saved all slices for patient 35 to /Users/inad001/Documents/SSCP25/segmented_nifti/35
Saved all slices for patient 161 to /Users/inad001/Documents/SSCP25/segmented_nifti/161
Saved all slices for patient 102 to /Users/inad001/Documents/SSCP25/segmented_nifti/102
Saved all slices for patient 105 to /Users/inad001/Documents/SSCP25/segmented_nifti/105
Saved all slices for patient 58 to /Users/inad001/Documents/SSCP25/segmented_nifti/58
Saved all slices for patient 67 to /Users/inad001/Documents/SSCP25/segmented_nifti/67
Saved all slices for patient 93 to /Users/inad001/Documents/SSCP25/segmented_nifti/93
Saved all slices for patient 94 to /Users/inad001/Documents/SSCP25/segmented_nifti/94
Saved all slices for patient 160 to /Users/inad001/Documents/SSCP25/segmented_nifti/160
Saved all slices for patient 158 to /Users