https://github.com/Artsplendr/Medical-Images-Liver-Segmentation-NIfTI/blob/master/Medical_Images_(Liver)_Segmentation_UNet.ipynb

In [1]:
import numpy as np
import os
import nibabel as nib
from nibabel.testing import data_path

In [10]:
import numpy as np
import os
from glob import glob

def get_pixels_hu(scans):
    """
    Convert raw values into Hounsfield units
    """
    image = np.stack([s.pixel_array for s in scans])
    # Convert to int16 (from sometimes int16), 
    # should be possible as values should always be low enough (<32k)
    image = image.astype(np.int16)

    # Set outside-of-scan pixels to 1
    # The intercept is usually -1024, so air is approximately 0
    image[image == -2000] = 0
    
    # Convert to Hounsfield units (HU)
    intercept = scans[0].RescaleIntercept
    slope = scans[0].RescaleSlope
    
    if slope != 1:
        image = slope * image.astype(np.float64)
        image = image.astype(np.int16)
        
    image += np.int16(intercept)
    
    return np.array(image, dtype=np.int16)

def get_substrings(path_to_folder: list[str], str_match: str = '_liver'):
    """
    In a folder, identify all files which include the substring str_match.
    
    Returns: List of strings of file names that match str_match
    """
    list_with_matched_substrings = list(filter(lambda x: str_match in x, path_to_folder))
    return list_with_matched_substrings

def preprocess(path_folder_with_raw_data: str, 
               path_folder_processed_data: str, 
               file_name_processed_images: str = 'imgs_train.npy',
               file_name_processed_masks: str = 'masks_train.npy',
               training_files: bool = True,
               image_rows: int = int(512/2), 
               image_cols: int = int(512/2)):
    nifti_files = sorted(glob(os.path.join(path_folder_with_raw_data, '*')))
    # file names corresponding to training masks
    segm_masks = get_substrings(nifti_files, '_liver')
    # file names corresponding to training images
    source_images = get_substrings(nifti_files, '_orig')
    
    # --------------------------------------------------------------
    # double check that numbering matches

    # "e01", "e02" in file names
    numbers_range = range(1, 10)
    sequence_of_numbers = [number for number in numbers_range]
    idx = ['e0' + str(number) for number in sequence_of_numbers]

    numbers_range = range(10,21)
    sequence_of_numbers = [number for number in numbers_range]
    idx = idx + ['e' + str(number) for number in sequence_of_numbers]

    counter = int(0)
    for liver, orig in zip(segm_masks, source_images):
        if not idx[counter] in liver and idx[counter] in orig:
            print("indices mismatch!!!!")
            break
        counter += 1
    # print("seems that the indices all match as they should")
    # --------------------------------------------------------------
    masks_list = []
    images_list = []

    if training_files:
        for liver_mask, orig_scan in zip(segm_masks, source_images):
            # load 3D training segmentation mask (shape=(512,512,129))
            mask_nifti = nib.load(os.path.join(path_folder_with_raw_data, liver_mask))
            # load 3D training ground truth image
            image_nifti = nib.load(os.path.join(path_folder_with_raw_data, orig_scan)) 
            
            for k in range(mask_nifti.shape[2]-1):
                #axial cuts are made along the z axis with undersampling
                mask_2d = np.array(mask_nifti.get_fdata()[::2, ::2, k]) 
                image_2d = np.array(image_nifti.get_fdata()[::2, ::2, k])
                #we only recover the 2D sections containing the liver
                #if mask_2d contains only 0, it means that there is no liver
                if len(np.unique(mask_2d)) != 1:
                    # print(k)
                    masks_list.append(mask_2d)
                    images_list.append(image_2d)
    else:
        for liver_mask, orig_scan in zip(segm_masks, source_images):
            # print(orig_scan)
            mask_nifti = nib.load(os.path.join(path_folder_with_raw_data, liver_mask))
            # load 3D training ground truth image
            image_nifti = nib.load(os.path.join(path_folder_with_raw_data, orig_scan)) 
            # print(img.shape)
            assert mask_nifti.shape[2] == image_nifti.shape[2]
            
            for k in range(mask_nifti.shape[2]):  
                mask_2d = np.array(mask_nifti.get_fdata()[::2, ::2, k]) 
                image_2d = np.array(image_nifti.get_fdata()[::2, ::2, k])
                
                masks_list.append(mask_2d)
                images_list.append(image_2d)
                    
    imgs = np.ndarray(
            (len(images_list), image_rows, image_cols), dtype='uint8'
            )

    imgs_mask = np.ndarray(
            (len(masks_list), image_rows, image_cols), dtype='uint8'
            )

    for index, img in enumerate(images_list):
        imgs[index, :, :] = img
        
    for index, img in enumerate(masks_list):
        imgs_mask[index, :, :] = img

    np.save(get_clean_path(os.path.join(path_folder_processed_data, file_name_processed_images)), imgs)
    np.save(get_clean_path(os.path.join(path_folder_processed_data, file_name_processed_masks)), imgs_mask)
    
    print('Images and masks saved to .npy files at:',
          '\n',
          os.path.join(path_folder_processed_data, file_name_processed_images),
          '\n', 
          os.path.join(path_folder_processed_data, file_name_processed_masks)
          )
    

def dice_coef(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

def dice_coef_loss(y_true, y_pred):
    return -dice_coef(y_true, y_pred)

def get_clean_path(path: str):
    return os.path.normpath(path.replace("\\","/").replace("\r", "/r").replace("\n", "/n"))

In [3]:
data_path_raw_train = "F:/MA/ircad-dataset/raw/train"
data_path_raw_eval = "F:/MA/ircad-dataset/raw/eval"

data_path_preprocessed_train = "F:/MA/ircad-dataset/preprocessed/train"
data_path_preprocessed_eval  = "F:/MA/ircad-dataset/preprocessed/eval"