# Export slices containing metastases

This notebook goes through each step to export images of the metastases based on the segmentation

In [6]:
# import libraries

import os
import nibabel as nib
import numpy as np
import matplotlib.pyplot as plt
from skimage.measure import label, regionprops
from pathlib import Path
from tqdm import tqdm
import pandas as pd

import imageio as iio
from PIL import Image

In [7]:
def save_slice_as_image(slice_data, output_path):
    # print(slice_data.shape)
    # print(np.mean(slice_data))
    # cv2.imwrite(output_path, slice_data)
    # plt.imsave(output_path, slice_data, cmap='gray')
    # plt.close()
    data = (slice_data * 255).astype(np.uint8)
    image = Image.fromarray(data)
    image.save(output_path)

# turn images 90 degress
def turn_image_90(image):
    return np.rot90(image, 3)

# mirror images
def mirror_image(image):
    return np.fliplr(image)

def process_nifty_mask(mask_path, mri_path, output_dir, patient_id=None, mri_sequence=None, save_mask_image = False, save_cut_out = False, area_threshold = 0, dataset = "regensburg"):

    # Load the NIFTI mask file
    img = nib.load(mask_path)
    mask_data = img.get_fdata()

    mri_data = nib.load(mri_path).get_fdata()

    unique_labels = np.unique(mask_data)
    #print(unique_labels)

    match dataset:
        case "regensburg":
            target_label = 1
        case "yale":
            # according to the paper
            # 1: tumor necrosis
            # 2: edema
            # 3: enhancement within metastasis

            if len(unique_labels) > 3:
                if 1 in unique_labels and 3 in unique_labels:
                    mask_data = np.where(mask_data == 3, 1, mask_data)
                target_label = 1 #3 #1
            else:
                target_label = 3
        case _:
            raise ValueError("Invalid dataset specified.")    

    new_unique_labels = np.unique(mask_data)
    #print(new_unique_labels)

    # Ensure the mask data is 3D
    if mask_data.ndim != 3:
        raise ValueError("The NIFTI mask data is not 3D.")

    # Find all connected components (metastases) with the target label in 3D
    labeled_mask = label(mask_data == target_label, connectivity=3)  # 3D connectivity
    regions = regionprops(labeled_mask)

    if not regions:
        print(f"No regions found for target_label {target_label} in {mask_path.name} for patient {patient_id}, seq {mri_sequence}")
        return
    
    largest_slices = {}

    # Iterate through each region (metastasis)
    for region in regions:
        # Find the slice with the largest area within this region
        coords = region.coords
        slice_areas = {}
        for coord in coords:
            slice_idx = coord[2]  # Use the correct axis for slice indexing
            if slice_idx not in slice_areas:
                slice_areas[slice_idx] = 0
            slice_areas[slice_idx] += 1

        # Identify the slice index with the largest area for this metastasis
        largest_slice_idx = max(slice_areas, key=slice_areas.get)
        largest_area = slice_areas[largest_slice_idx]

        # Store information about the largest slice for each region
        slice_data = mask_data[:, :, largest_slice_idx]
        largest_slices[region.label] = (largest_area, slice_data, largest_slice_idx, region.label, coords)

    if len(largest_slices) == 0:
        print(f"No metastases found in {mri_path}")
        return
    
    # Save the largest slices as image files
    for label_idx, (area, slice_data, slice_idx, region_label, coords) in largest_slices.items():
        if area > area_threshold:
            full_slice_data = mri_data[:, :, slice_idx]
            mask_slice_data = np.zeros_like(slice_data)

            for coord in coords:
                if coord[2] == slice_idx:
                    mask_slice_data[coord[0], coord[1]] = target_label

            # Create the output path
            output_path = os.path.join(output_dir, f'{patient_id}_metastasis_{slice_idx}_{region_label}_{mri_sequence}.png')            
            save_slice_as_image(mirror_image(turn_image_90(full_slice_data)), output_path)

            if save_mask_image:
                mask_output_path = os.path.join(output_dir, f'{patient_id}_metastasis_{slice_idx}_{region_label}_{mri_sequence}_mask.png')

                # Create a new array with only the pixels of the target label
                mask_slice_data = np.zeros_like(mask_data)
                mask_slice_data[mask_data == target_label] = target_label
                mask_slice_data = mask_slice_data[:, :, slice_idx]

                save_slice_as_image(mirror_image(turn_image_90(mask_slice_data)), mask_output_path)
            

            if save_cut_out:

                for region in regions:
                    single_metastasis_mask = np.zeros_like(mask_data[:, :, slice_idx])
                    for coord in coords:
                        if coord[2] == slice_idx:
                            single_metastasis_mask[coord[0], coord[1]] = target_label
                    
                    masked_mri_slice = full_slice_data * (single_metastasis_mask == target_label)

                    # Create the output path
                    output_path = os.path.join(output_dir, f'{patient_id}_metastasis_{slice_idx}_{region_label}_{mri_sequence}_cutout.png')
                    save_slice_as_image(mirror_image(turn_image_90(masked_mri_slice)), output_path)

            #print(f"Saved largest slice for metastasis (slice {slice_idx}) with area {area} to {output_path}")


In [None]:
# Example usage
#mask_file_path = '/Users/LennartPhilipp/Desktop/testing_data/derivatives/segmented_AURORA_20240424-122816/sub-01009590/metastasis_unbinarized_floats.nii.gz'
#mri_path = "/Users/LennartPhilipp/Desktop/testing_data/derivatives/preprocessed_brainlesion_20240424-110551/sub-01009590/preprocessed/sub-01009590_fla_bet_normalized.nii.gz"
mask_file_path = "/Users/LennartPhilipp/Desktop/Uni/Prowiss/CIA metstobrain/Pretreat-MetsToBrain-Masks/BraTS-MET-00100-000/BraTS-MET-00100-000-seg.nii.gz"
mri_path = "/Users/LennartPhilipp/Desktop/Uni/Prowiss/CIA metstobrain/yale_dataset_normalized/BraTS-MET-00100-000/n4_normalized_BraTS-MET-00100-000-t1c.nii.gz"
output_directory = '/Users/LennartPhilipp/Desktop/testing_data/derivatives/met_slices'
#os.makedirs(output_directory, exist_ok=True)
#process_nifty_mask(mask_file_path, mri_path, output_directory, save_mask_image=True, save_cut_out=True)

## create skript to go through all patients

### Regensburg Dataset

old code

In [None]:
# output: create folder for each patient and save the slices there

# input mri folder: path to all preprocessed patients
# go to each patient and get all 4 mri scans
# input segmentation folder: path to all segmented patients
# go to each patient and get metastatis_unbinarized_floats.nii.gz

path_to_output = Path("/Users/LennartPhilipp/Desktop/testing_data/derivatives/met_slices")
path_to_mri_patients = Path("/Users/LennartPhilipp/Desktop/testing_data/derivatives/preprocessed_brainlesion_20240424-110551")
path_to_segmentation_patients = Path("/Users/LennartPhilipp/Desktop/testing_data/derivatives/segmented_AURORA_20240424-122816")

segmentation_patients = [patient for patient in os.listdir(path_to_segmentation_patients) if os.path.isdir(os.path.join(path_to_segmentation_patients, patient))]

for patient in tqdm(segmentation_patients):
    # go through each patient
    
    print(f"currently working on {patient}")

    # create new folder for output for patient
    path_to_patient_output = Path(path_to_output) / Path(patient)
    os.makedirs(path_to_patient_output, exist_ok=True)

    # IMPORTANT!!!!!
    # adjust path for final run
    path_to_segmentation = Path(path_to_segmentation_patients) / Path(patient) / "metastasis_unbinarized_floats.nii.gz"

    path_to_mri_images = Path(path_to_mri_patients) / Path(patient) / "preprocessed"
    mri_images = [file for file in os.listdir(path_to_mri_images) if ".nii.gz" in file]

    for mri_image in mri_images:
        # go through each mri image for the patient

        mri_sequence = mri_image.split("_")[1]

        path_to_mri = Path(path_to_mri_images) / Path(mri_image)
        process_nifty_mask(path_to_segmentation, path_to_mri, path_to_patient_output, patient, mri_sequence)
    
    print(f"done with patient {patient}")
    

working with actual segmented files

# IMPORTANT
some patients got processed multiple times, what was up with that?!

In [None]:
# output: create folder for each patient and save the slices there

# input mri folder: path to all preprocessed patients
# go to each patient and get all 4 mri scans
# input segmentation folder: path to all segmented patients
# go to each patient and get metastatis_unbinarized_floats.nii.gz

path_to_output = Path("/Users/LennartPhilipp/Desktop/Uni/Prowiss/Datensatz_RGB/regensburg_slices")
path_to_mri_patients = Path("/Users/LennartPhilipp/Desktop/Uni/Prowiss/Datensatz_RGB/preprocessed_n4_brainlesion_percentile_20240612-083743")
path_to_segmentation_patients = Path("/Users/LennartPhilipp/Desktop/Uni/Prowiss/Datensatz_RGB/Segs_Lennart_Export")

path_to_tsv = Path("/Users/LennartPhilipp/Desktop/Uni/Prowiss/Dateien/participants_01_08_24.tsv")

segmentation_patients = [patient for patient in os.listdir(path_to_segmentation_patients) if "labels" in patient]

pats_to_preprocess = pd.read_csv(path_to_tsv, sep="\t")["participant_id"].tolist()

already_processed_patients = [patient for patient in os.listdir(path_to_output) if os.path.isdir(os.path.join(path_to_output, patient))]

counter = 0

for patient in tqdm(segmentation_patients):
    # go through each patient

    # if counter >= 1:
    #     break
    
    # get patient ID
    patientID = patient.split("_")[0]

    # check if patient ID is in list of patients to preprocess
    if patientID not in pats_to_preprocess:
        continue

    if patientID in already_processed_patients:
        print(f"patient {patientID} already processed")
        continue

    print(f"currently working on {patientID}")

    # create new folder for output for patient
    path_to_patient_output = Path(path_to_output) / Path(patientID)
    os.makedirs(path_to_patient_output, exist_ok=True)

    # IMPORTANT!!!!!
    # adjust path for final run
    path_to_segmentation = Path(path_to_segmentation_patients) / Path(patient)

    path_to_mri_images = Path(path_to_mri_patients) / Path(patientID) / "perc_normalized"
    mri_images = [file for file in os.listdir(path_to_mri_images) if ".nii.gz" in file]

    if mri_images == []:
        print("********************************")
        print(f"patient {patientID} not complete")
        continue

    for mri_image in mri_images:
        # go through each mri image for the patient

        mri_sequence = mri_image.split("_")[1]

        path_to_mri = Path(path_to_mri_images) / Path(mri_image)
        process_nifty_mask(path_to_segmentation, path_to_mri, path_to_patient_output, patientID, mri_sequence,
                           save_mask_image = True,
                           save_cut_out = True,
                           area_threshold = 10)
    
    print(f"done with patient {patientID}")

    counter += 1

print(counter)
    

In [23]:
path_to_image = Path("/Users/LennartPhilipp/Desktop/Uni/Prowiss/Datensatz_RGB/regensburg_test_slices/sub-01134825/sub-01134825_metastasis_105_1_t1.png")

test_image = iio.v3.imread(path_to_image)
print(test_image.shape)

(240, 240)


### Yale Dataset

In [8]:
path_to_yale_patients_raw = Path("/Users/LennartPhilipp/Desktop/Uni/Prowiss/CIA metstobrain/Pretreat-MetsToBrain-Masks")
path_to_yale_patients_processed = Path("/Users/LennartPhilipp/Desktop/Uni/Prowiss/CIA metstobrain/yale_dataset_normalized")
path_to_output = Path("/Users/LennartPhilipp/Desktop/Uni/Prowiss/CIA metstobrain/yale_metastases")

In [11]:
# output: create folder for each patient and save the slices there

# input mri folder: path to all patient scans, including segmentations
# go to each patient and get all 4 mri scans as well as segmentation


yale_patients = [
    patient_folder for patient_folder in os.listdir(path_to_yale_patients_raw)
    if os.path.isdir(os.path.join(path_to_yale_patients_raw, patient_folder)) and "BraTS-MET" in patient_folder
]

# already_processed_yale_ids = [
#     pid for pid in os.listdir(path_to_output)
#     if os.path.isdir(os.path.join(path_to_output, pid)) and len(os.listdir(os.path.join(path_to_output, pid))) > 4
#]

counter = 0

#os.makedirs(path_to_output, exist_ok = True)

for patient in tqdm(yale_patients, desc = "Processing Yale Patients"):

    # Counter to pause the loop after 5 iterations
    # if counter >= 5:
    #     break

    # go through each patient

    pat_id_extracted = patient.split("-")[2] # e.g., "0086" from "BraTS-MET-0086-000"
    
    print(f"currently working on patient ID: {pat_id_extracted} (from folder. {patient})")

    # create new folder for output for patient
    path_to_patient_output = Path(path_to_output) / Path(pat_id_extracted)
    os.makedirs(path_to_patient_output, exist_ok=True)

    # IMPORTANT!!!!!
    # adjust path for final run
    path_to_raw_patient_files = Path(path_to_yale_patients_raw) / Path(patient)
    segmentation_files = [file for file in os.listdir(path_to_raw_patient_files) if file.endswith("-seg.nii.gz")]
    if not segmentation_files:
        print(f"Warning: No segmentation file found for {patient}. Skipping.")
        continue
    path_to_segmentation = Path(path_to_raw_patient_files) / Path(segmentation_files[0])

    # Get Processed MRI Images
    path_to_mri_images = Path(path_to_yale_patients_processed) / Path(patient)
    if not path_to_mri_images.exists():
        print(f"Warning: Processed MRI folder not found for {patient} at {path_to_mri_images}. Skippin.")
        continue

    # Expecting n4_normalized_BraTS-MET-XXXXX-000-t1.nii.gz etc.
    mri_images = [
        file for file in os.listdir(path_to_mri_images)
        if file.endswith(".nii.gz") and "n4_normalized" in file and "seg" not in file # ensure it's not a segmentation file
    ]

    if len(mri_images) != 4:
        print("********************************")
        print(f"Warning: Not enough normalized MRI image files found for {patient} in {path_to_mri_images}. Skipping.")
        continue

    for mri_image_filename in mri_images:
        # go through each mri image for the patient

        try:
            # Extract sequence type, e.g., "t1c" from "n4_normalized_BraTS-MET-00100-000-t1.nii.gz"
            parts = mri_image_filename.split("-")
            if len(parts) >= 5:
                mri_sequence_type = parts[4].split(".")[0]
            else:
                print(f"Warning: Could not determine MRI sequence from filename: {mri_image_filename}. Skipping this file.")
                continue
                
            path_to_current_mri = path_to_mri_images / mri_image_filename

            process_nifty_mask(
                mask_path = path_to_segmentation,
                mri_path = path_to_current_mri,
                output_dir = path_to_patient_output,
                patient_id = pat_id_extracted,

                mri_sequence = mri_sequence_type,
                save_mask_image = True,
                save_cut_out = False,
                
                area_threshold = 10,
                dataset = "yale"
            )

        except Exception as e:
            print(f"Error processing MRI file {mri_image_filename} for patient {patient}: {e}")
    
    counter += 1

    print(f"done with patient {patient}")
    

Processing Yale Patients:   0%|          | 0/200 [00:00<?, ?it/s]

currently working on patient ID: 00086 (from folder. BraTS-MET-00086-000)


Processing Yale Patients:   0%|          | 1/200 [00:01<05:33,  1.68s/it]

done with patient BraTS-MET-00086-000
currently working on patient ID: 00284 (from folder. BraTS-MET-00284-000)


Processing Yale Patients:   1%|          | 2/200 [00:02<04:39,  1.41s/it]

done with patient BraTS-MET-00284-000
currently working on patient ID: 00290 (from folder. BraTS-MET-00290-000)


Processing Yale Patients:   2%|▏         | 3/200 [00:04<04:17,  1.30s/it]

done with patient BraTS-MET-00290-000
currently working on patient ID: 00247 (from folder. BraTS-MET-00247-000)


Processing Yale Patients:   2%|▏         | 4/200 [00:05<03:58,  1.22s/it]

done with patient BraTS-MET-00247-000
currently working on patient ID: 00253 (from folder. BraTS-MET-00253-000)


Processing Yale Patients:   2%|▎         | 5/200 [00:06<04:09,  1.28s/it]

done with patient BraTS-MET-00253-000
currently working on patient ID: 00119 (from folder. BraTS-MET-00119-000)


Processing Yale Patients:   3%|▎         | 6/200 [00:27<25:55,  8.02s/it]

done with patient BraTS-MET-00119-000
currently working on patient ID: 00131 (from folder. BraTS-MET-00131-000)


Processing Yale Patients:   4%|▎         | 7/200 [00:56<47:45, 14.85s/it]

done with patient BraTS-MET-00131-000
currently working on patient ID: 00125 (from folder. BraTS-MET-00125-000)


Processing Yale Patients:   4%|▍         | 8/200 [01:20<56:32, 17.67s/it]

done with patient BraTS-MET-00125-000
currently working on patient ID: 00124 (from folder. BraTS-MET-00124-000)


Processing Yale Patients:   4%|▍         | 9/200 [01:54<1:12:52, 22.90s/it]

done with patient BraTS-MET-00124-000
currently working on patient ID: 00130 (from folder. BraTS-MET-00130-000)


Processing Yale Patients:   5%|▌         | 10/200 [02:19<1:14:40, 23.58s/it]

done with patient BraTS-MET-00130-000
currently working on patient ID: 00118 (from folder. BraTS-MET-00118-000)


Processing Yale Patients:   6%|▌         | 11/200 [03:12<1:42:50, 32.65s/it]

done with patient BraTS-MET-00118-000
currently working on patient ID: 00252 (from folder. BraTS-MET-00252-000)


Processing Yale Patients:   6%|▌         | 12/200 [03:22<1:20:18, 25.63s/it]

done with patient BraTS-MET-00252-000
currently working on patient ID: 00291 (from folder. BraTS-MET-00291-000)


Processing Yale Patients:   6%|▋         | 13/200 [03:31<1:04:19, 20.64s/it]

done with patient BraTS-MET-00291-000
currently working on patient ID: 00285 (from folder. BraTS-MET-00285-000)


Processing Yale Patients:   7%|▋         | 14/200 [03:40<52:36, 16.97s/it]  

done with patient BraTS-MET-00285-000
currently working on patient ID: 00293 (from folder. BraTS-MET-00293-000)


Processing Yale Patients:   8%|▊         | 15/200 [03:49<45:06, 14.63s/it]

done with patient BraTS-MET-00293-000
currently working on patient ID: 00278 (from folder. BraTS-MET-00278-000)


Processing Yale Patients:   8%|▊         | 16/200 [03:58<39:48, 12.98s/it]

done with patient BraTS-MET-00278-000
currently working on patient ID: 00250 (from folder. BraTS-MET-00250-000)


Processing Yale Patients:   8%|▊         | 17/200 [04:07<36:17, 11.90s/it]

done with patient BraTS-MET-00250-000
currently working on patient ID: 00244 (from folder. BraTS-MET-00244-000)


Processing Yale Patients:   9%|▉         | 18/200 [04:16<33:29, 11.04s/it]

done with patient BraTS-MET-00244-000
currently working on patient ID: 00126 (from folder. BraTS-MET-00126-000)


Processing Yale Patients:  10%|▉         | 19/200 [04:26<31:56, 10.59s/it]

done with patient BraTS-MET-00126-000
currently working on patient ID: 00132 (from folder. BraTS-MET-00132-000)


Processing Yale Patients:  10%|█         | 20/200 [04:36<30:48, 10.27s/it]

done with patient BraTS-MET-00132-000
currently working on patient ID: 00133 (from folder. BraTS-MET-00133-000)


Processing Yale Patients:  10%|█         | 20/200 [04:45<42:49, 14.27s/it]


KeyboardInterrupt: 