# __Import & config__

In [1]:
%load_ext autoreload
%autoreload 2
import os
os.chdir('C:\\Users\\Usuario\\TFG\\digipanca\\')

In [13]:
import nibabel as nib
import numpy as np
import torch
from tqdm.auto import tqdm

# __Functions__

In [10]:
def get_patient_data(patient_dir):
    patient_id = os.path.basename(patient_dir)
    # Image and mask paths
    image_path = os.path.join(patient_dir, "SEQ", f"CTport-{patient_id}.nii")
    mask_paths = {
        "pancreas": os.path.join(patient_dir, "SEG", f"Pancreas-{patient_id}.nii"),
        "tumor": os.path.join(patient_dir, "SEG", f"Tumor-{patient_id}.nii"),
        "arteries": os.path.join(patient_dir, "SEG", f"Arterias-{patient_id}.nii"),
        "veins": os.path.join(patient_dir, "SEG", f"Venas-{patient_id}.nii"),
    }

    image_nii = nib.load(image_path)

    masks = np.zeros_like(image_nii.get_fdata(), dtype=np.uint8)

    # Combine the segmentation masks
    for i, (_, mask_path) in enumerate(mask_paths.items(), start=1):
        mask_nii = nib.load(mask_path)
        mask_data = mask_nii.get_fdata()

        # Binarize the mask
        mask_data = (mask_data > 0).astype(np.uint8)

        masks[mask_data > 0] = i

    masks_nii = nib.Nifti1Image(
        dataobj=masks,
        affine=mask_nii.affine,
        header=mask_nii.header
    )

    return image_nii, masks_nii

In [None]:
def save_patient_data(folder, patient_id, vol, msk):
    # Get number of the patient
    n = int(patient_id.split('m')[1])   # rtumN
    vol_path = os.path.join(folder, f'rtum{n:03d}.nii.gz')
    msk_path = os.path.join(folder, f'rtum{n:03d}.nii.gz')
    # Save the volume and mask
    nib.save(vol, vol_path)
    nib.save(msk, msk_path)

## Test functions

In [11]:
# test
patient_dir = 'C:\\Users\\Usuario\\TFG\\digipanca\\data\\raw\\train\\rtum1'
image_nii, masks_nii = get_patient_data(patient_dir)
print(image_nii.shape)
print(masks_nii.shape)
# Save the data
save_folder = 'C:\\Users\\Usuario\\TFG\\digipanca\\data\\prepared\\train'
os.makedirs(os.path.join(save_folder, 'images'), exist_ok=True)
os.makedirs(os.path.join(save_folder, 'masks'), exist_ok=True)
save_patient_data(save_folder, 'rtum01', image_nii, masks_nii)
# Check the saved data

(512, 512, 91)
(512, 512, 91)


In [12]:
# test equality
image_nii2 = nib.load(os.path.join(save_folder, 'images', 'rtum001.nii.gz'))
masks_nii2 = nib.load(os.path.join(save_folder, 'masks', 'rtum001.nii.gz'))
print(np.array_equal(image_nii.get_fdata(), image_nii2.get_fdata()))
print(np.array_equal(masks_nii.get_fdata(), masks_nii2.get_fdata()))

True
True


# __Prepare data__

## Train data

In [None]:
data_dir = 'data/raw/train'
save_folder = 'data/prepared/'
os.makedirs(os.path.join(save_folder, 'imagesTr'), exist_ok=True)
os.makedirs(os.path.join(save_folder, 'labelsTr'), exist_ok=True)
# Get all patient directories
patient_dirs = [os.path.join(data_dir, d) for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d))]
print(f"Found {len(patient_dirs)} patient directories.")
# Process each patient directory
for patient_dir in tqdm(patient_dirs):
    # Get the patient ID
    patient_id = os.path.basename(patient_dir)
    # Get the image and mask data
    image_nii, masks_nii = get_patient_data(patient_dir)
    # Save the data
    save_patient_data(save_folder, patient_id, image_nii, masks_nii)

Found 88 patient directories.


  0%|          | 0/88 [00:00<?, ?it/s]

## Test data

In [None]:
data_dir = 'data/raw/test'
save_folder = 'data/prepared/'
os.makedirs(os.path.join(save_folder, 'imagesTs'), exist_ok=True)
os.makedirs(os.path.join(save_folder, 'labelsTs'), exist_ok=True)
# Get all patient directories
patient_dirs = [os.path.join(data_dir, d) for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d))]
print(f"Found {len(patient_dirs)} patient directories.")
# Process each patient directory
for patient_dir in tqdm(patient_dirs):
    # Get the patient ID
    patient_id = os.path.basename(patient_dir)
    # Get the image and mask data
    image_nii, masks_nii = get_patient_data(patient_dir)
    # Save the data
    save_patient_data(save_folder, patient_id, image_nii, masks_nii)

Found 28 patient directories.


  0%|          | 0/28 [00:00<?, ?it/s]