In [None]:
from os import listdir
import numpy as np
import pandas as pd
import nibabel as nib
from PIL import Image


In [None]:
# Construct patient list and related tumor grade
data_dir = "../data"
full_patient_list = []
for patient in listdir(data_dir):
    if "FU" in patient:
        continue
    full_patient_list.append(patient.replace("_nifti", ""))

tumor_grade = []
meta_data = pd.read_csv("../processed-data/UCSF-PDGM-metadata_v5.csv")
meta_data["ID"] = meta_data["ID"].apply(lambda x: "-".join(x.split("-")[:-1]) + "-" + x.split("-")[-1].rjust(4, "0"))
grade_key = "WHO CNS Grade"
for patient_id in full_patient_list:
    grade = meta_data[grade_key].loc[meta_data["ID"] == patient_id]
    tumor_grade.append(grade.values[0])
print(len(full_patient_list), len(tumor_grade))

In [None]:
# Construct images from slices of MRIs.
data_dir = "../data"
tumor_save_dir = "../processed-data/images/tumor"
notumor_save_dir = "../processed-data/images/notumor"
image_name_template = "{patient_id}_mri-axis-{mri_axis}_slice-{slice}.png"

patient_id_list = full_patient_list
samples_per_patient_per_label = 4
min_relative_brain_area_per_sample = .3
modalities = ["T1", "T1c", "T2", "FLAIR"]
mri_axes = [0,1,2]
random_seed = 360
random_sampler = np.random.default_rng(seed=random_seed)

for patient_id  in full_patient_list:
    patient_directory = f"{data_dir}/{patient_id}_nifti"
    brain_seg_path = f"{patient_directory}/{patient_id}_brain_segmentation.nii.gz"
    tumor_seg_path = f"{patient_directory}/{patient_id}_tumor_segmentation.nii.gz"
    brain_seg_mri = nib.load(brain_seg_path).get_fdata() # 3D array containing brian segmentation
    tumor_seg_mri = nib.load(tumor_seg_path).get_fdata() # 3D array containing tumor segmentation 
    for modality in modalities:
        modality_path = f"{patient_directory}/{patient_id}_{modality}.nii.gz"
        mri_data = nib.load(modality_path).get_fdata()
        for mri_axis in mri_axes:
            transverse_axes = tuple([i for i in mri_axes if i != mri_axis])
            normalized_brain_seg_area = brain_seg_mri.sum(axis=transverse_axes) / np.max(brain_seg_mri.sum(axis=transverse_axes))
            above_min_relative_brain_area_mask = normalized_brain_seg_area > min_relative_brain_area_per_sample
            tumor_location_mask = tumor_seg_mri.any(axis=transverse_axes)
            min_relative_area_no_tumor_mask = np.logical_and(above_min_relative_brain_area_mask, ~tumor_location_mask)
            if tumor_location_mask.sum() < samples_per_patient_per_label:
                n_samples_tumor = int(tumor_location_mask.sum())
            else:
                n_samples_tumor = samples_per_patient_per_label
            if min_relative_area_no_tumor_mask.sum() < samples_per_patient_per_label:
                n_samples_notumor = int(min_relative_area_no_tumor_mask.sum())
            else:
                n_samples_notumor = samples_per_patient_per_label
            indices_along_axis = np.arange(tumor_location_mask.size)
            min_relative_area_no_tumor_indices = indices_along_axis[min_relative_area_no_tumor_mask]
            tumor_location_indices = indices_along_axis[tumor_location_mask]
            random_tumorless_brain_slices = random_sampler.choice(min_relative_area_no_tumor_indices, size=n_samples_notumor, replace=False)
            random_tumor_slices = random_sampler.choice(tumor_location_indices, size=n_samples_tumor, replace=False)
            for slice in random_tumorless_brain_slices:    
                # Get array
                if mri_axis == 0:
                    img_array = mri_data[slice,:,:]
                elif mri_axis == 1:
                    img_array = mri_data[:,slice,:]
                else:
                    img_array = mri_data[:,:,slice]
                # Save as an image.
                max_value = np.max(img_array)
                min_value = np.min(img_array)
                new_img_array = (img_array - min_value) / (max_value - min_value) * 255 # Shift and normalization from 0 to 255 to be saved as an 8-bit png.
                img = Image.fromarray(new_img_array.astype(np.int8), mode="L")
                save_dir = notumor_save_dir + "/" + modality                    
                save_path = save_dir + "/" + image_name_template.format(patient_id=patient_id, mri_axis=mri_axis, slice=slice)
                img.save(save_path)
            for slice in random_tumor_slices:
                # Get array
                if mri_axis == 0:
                    img_array = mri_data[slice,:,:]
                elif mri_axis == 1:
                    img_array = mri_data[:,slice,:]
                else:
                    img_array = mri_data[:,:,slice]
                # Save as an image.
                max_value = np.max(img_array)
                min_value = np.min(img_array)
                new_img_array = (img_array - min_value) / (max_value - min_value) * 255 # Shift and normalization from 0 to 255 to be saved as an 8-bit png.
                img = Image.fromarray(new_img_array.astype(np.int8), mode="L")
                save_dir = tumor_save_dir + "/" + modality
                save_path = save_dir + "/" + image_name_template.format(patient_id=patient_id, mri_axis=mri_axis, slice=slice)
                img.save(save_path)
