# Preprocessing code

In [13]:
import os
import numpy as np
import matplotlib.pyplot as plt
import glob
from skimage import transform, util
import nibabel as nib
import tqdm
from PIL import Image
from skimage.restoration import denoise_tv_chambolle

def normalize_img(img):
    # Warning for when dividing NaN value??
    norm_img = np.divide(img,np.max(img))
    return norm_img


def crop_pad_resize(image, nx, ny):
    x, y = image.shape

    # difference in nr of pixels (divide by 2 since we have 2 sides)
    x_s = (x - nx) // 2
    y_s = (y - ny) // 2
    x_c = (nx - x) // 2
    y_c = (ny - y) // 2

    if x > nx and y > ny:
        # if image is larger in both dimensions cut a slice
        slice_cropped = image[x_s:x_s + nx, y_s:y_s + ny]

    else:
        # if one dim is smaller fill that side up with 0's
        slice_cropped = np.zeros((nx, ny))

        if x <= nx and y > ny:
            # fill up x direction with 0's, cut in x direction
            slice_cropped[x_c:x_c + x, :] = image[:, y_s:y_s + ny]
        elif x > nx and y <= ny:
            # fill up y direction with 0's, cut in y direction
            slice_cropped[:, y_c:y_c + y] = image[x_s:x_s + nx, :]
        else:
            # if dimensions are as desired, keep the original slice
            slice_cropped[x_c:x_c + x, y_c:y_c + y] = image[:, :]

    return slice_cropped


def preprocess(input_folder, target_resolution, target_size, denoise=False, alphaTV=0.1):
    '''
    This function preprocesses ACDC data. It crops all images to the same size,
    transforms everything to the same resolution and normalizes the images.
    It automatically makes the folder where preprocessed data is written to,
    in the same format as the ACDC data is given. The images are in PNG-format.
    
    input_folder: the folder where raw ACDC data is located.
    target_resolution: desired resolution, should be a tuple with 2 items (x- and y-dimensions).
    target_size: desired size. Should be a tuple wiht 2 items (x- and y-dimensions).
    '''
    corrupted_files = []
    nx, ny = target_size
    data_folder = input_folder
    
    if denoise:
        foldername = 'preprocessed_denoised'
    else:
        foldername = 'preprocessed'
    
    if not os.path.exists(foldername):
        os.mkdir(foldername)
    else:
        print(foldername+' folder already exists. Continuing regardless.')
    
    # Loop over train and test folders
    for train_test in ['training', 'testing']:

        input_folder = os.path.join(data_folder, train_test)
        len_inp = len(input_folder)+1
        
        # Make train and test folders in preprocessed folder
        if not os.path.exists(os.path.join(foldername+'/', train_test)):
            os.mkdir(os.path.join(foldername+'/', train_test))
        else:
            print('T'+train_test[1:]+' folder already exists. Continuing regardless.')
        
        # Loop over patient folders
        for folder in os.listdir(input_folder):
            
            if folder != '.ipynb_checkpoints':  # Sometimes trouble with automatically made files

                folder_path = os.path.join(input_folder, folder)
                
                # Make patient folders in preprocessed folder
                if not os.path.exists(os.path.join(foldername+'/'+train_test, folder_path[len_inp:])):
                    os.mkdir(os.path.join(foldername+'/'+train_test, folder_path[len_inp:]))
                else:
                    print('Folder for '+folder_path[len_inp:]+' already exists. Continuing regardless.')
                
                if os.path.exists(foldername+'/'+train_test+'/'+folder_path[len_inp:]+'/.ipynb_checkpoints'):
                    os.rmdir(foldername+'/'+train_test+'/'+folder_path[len_inp:]+'/.ipynb_checkpoints')
                    
                lst = os.listdir(foldername+'/'+train_test+'/'+folder_path[len_inp:])
                
                if len(lst) == 0:  # Only create files if the designated folder is empty
                    
                    for file in glob.glob(os.path.join(folder_path, 'patient???_frame??.nii.gz')):

                        # Save information about patient
                        with open(os.path.join(folder_path, 'Info.cfg')) as f:
                            lines = f.readlines()

                        ED = int(lines[0].strip()[-2:])
                        ES = int(lines[1].strip()[-2:])

                        # Split file name
                        file_base = file.split('.nii.gz')[0]
                        file_mask = file_base + '_gt.nii.gz'

                        # Load data from .nii.gz files
                        img_nii = nib.load(file)
                        img_dat = img_nii.get_fdata()

                        mask_nii = nib.load(file_mask)
                        mask_dat = mask_nii.get_fdata().astype(int)
                        
                        if(np.logical_or('patient038' in file , 'patient057' in file)):
                            print(np.unique(mask_dat))


                        img = img_nii.get_fdata()
                        mask = mask_nii.get_fdata()

                        pixel_size = img_nii.header.get_zooms()

                        # Make vector to make all images have the same resolution
                        scale_vector = [pixel_size[0] / target_resolution[0], pixel_size[1] / target_resolution[1]] 

                        for zz in tqdm.tqdm(range(img.shape[2])):

                            # Normalize, rescale and crop the image and  mask

                            slice_img = np.squeeze(img[:, :, zz])
                            slice_img = normalize_img(np.squeeze(img[:, :, zz]))
                            img_rescaled = transform.rescale(slice_img,
                                                             scale_vector,
                                                             order=1,
                                                             preserve_range=True,
                                                             mode='constant')

                            slice_mask = np.squeeze(mask[:, :, zz])
                            # slice_mask = normalize_img(np.squeeze(mask[:, :, zz]))
                            mask_rescaled = np.around(transform.rescale(slice_mask,
                                                              scale_vector,
                                                              order=0,
                                                              preserve_range=True,
                                                              mode='constant'))

                            img_cropped = crop_pad_resize(img_rescaled, nx, ny)
                            mask_cropped = crop_pad_resize(mask_rescaled, nx, ny)
                            
                            if(np.logical_or('patient038' in file , 'patient057' in file)):
                                print(np.unique(mask_rescaled))
                                # print(file_base)
                                corrupted_files.append(file_base)
                            if denoise:
                                img_cropped = denoise_tv_chambolle(img_cropped, eps=1e-6, weight=alphaTV, max_num_iter=1000)

                            # Save images in PNG format
                            if 'frame{:02}'.format(ED) in file:
                                img_loc = os.path.join(foldername+'/'+train_test, file[len_inp:-7]+'_slice{:01}_ED'.format(zz)+'.png')
                                img_fin = Image.fromarray(np.uint8(255 * img_cropped),mode="L")
                                img_fin.save(img_loc, format='PNG')

                                mask_loc = os.path.join(foldername+'/'+train_test, file[len_inp:-7]+'_slice{:01}_ED_gt'.format(zz)+'.png')
                                mask_fin = Image.fromarray(np.uint8(255 * mask_cropped), mode="L")
                                mask_fin.save(mask_loc, format='PNG')
                            else:
                                img_loc = os.path.join(foldername+'/'+train_test, file[len_inp:-7]+'_slice{:01}_ES'.format(zz)+'.png')
                                img_fin = Image.fromarray(np.uint8(255 * img_cropped), mode="L")
                                img_fin.save(img_loc, format='PNG')

                                mask_loc = os.path.join(foldername+'/'+train_test, file[len_inp:-7]+'_slice{:01}_ES_gt'.format(zz)+'.png')
                                mask_fin = Image.fromarray(np.uint8(255 * mask_cropped),mode="L")
                                mask_fin.save(mask_loc, format='PNG')
                else:
                    print('Folder for '+folder_path[len_inp:]+' is not empty. No files were written to this folder.')
        
    # print(corrupted_files)
    print("Preprocessing Finished")



#### Preprocess the data

In [14]:
target_resolution = (1.36719, 1.36719)
target_size = (212, 212)
data_path = './database'

preprocess(data_path, target_resolution, target_size)

100%|██████████| 10/10 [00:00<00:00, 59.17it/s]
100%|██████████| 10/10 [00:00<00:00, 43.67it/s]
100%|██████████| 8/8 [00:00<00:00, 59.19it/s]
100%|██████████| 8/8 [00:00<00:00, 58.90it/s]
100%|██████████| 11/11 [00:00<00:00, 50.13it/s]
100%|██████████| 11/11 [00:00<00:00, 50.60it/s]
100%|██████████| 10/10 [00:00<00:00, 58.48it/s]
100%|██████████| 10/10 [00:00<00:00, 59.66it/s]
100%|██████████| 10/10 [00:00<00:00, 60.10it/s]
100%|██████████| 10/10 [00:00<00:00, 32.47it/s]
100%|██████████| 6/6 [00:00<00:00, 62.38it/s]
100%|██████████| 6/6 [00:00<00:00, 62.62it/s]
100%|██████████| 10/10 [00:00<00:00, 68.40it/s]
100%|██████████| 10/10 [00:00<00:00, 68.55it/s]
100%|██████████| 10/10 [00:00<00:00, 64.13it/s]
100%|██████████| 10/10 [00:00<00:00, 63.20it/s]
100%|██████████| 7/7 [00:00<00:00, 62.67it/s]
100%|██████████| 7/7 [00:00<00:00, 61.97it/s]
100%|██████████| 10/10 [00:00<00:00, 64.88it/s]
100%|██████████| 10/10 [00:00<00:00, 64.20it/s]
100%|██████████| 18/18 [00:00<00:00, 29.30it/s]
100%

[0 1 2 3]


100%|██████████| 8/8 [00:00<00:00, 63.62it/s]


[0. 1. 2. 3.]
[0. 1. 2. 3.]
[0. 1. 2. 3.]
[0. 1. 2. 3.]
[0. 1. 2. 3.]
[0. 1. 2. 3.]
[0. 2. 3.]
[0. 2. 3.]
[0 1 2 3]


  0%|          | 0/8 [00:00<?, ?it/s]

[0. 1. 2. 3.]
[0. 1. 2. 3.]


100%|██████████| 8/8 [00:00<00:00, 62.83it/s]


[0. 1. 2. 3.]
[0. 1. 2. 3.]
[0. 1. 2. 3.]
[0. 1. 2. 3.]
[0. 1. 2. 3.]
[0. 2. 3.]


100%|██████████| 11/11 [00:00<00:00, 77.21it/s]
100%|██████████| 11/11 [00:00<00:00, 76.25it/s]
100%|██████████| 10/10 [00:00<00:00, 69.42it/s]
100%|██████████| 10/10 [00:00<00:00, 69.12it/s]
100%|██████████| 9/9 [00:00<00:00, 71.22it/s]
100%|██████████| 9/9 [00:00<00:00, 70.47it/s]


[0 1 2 3]


100%|██████████| 8/8 [00:00<00:00, 43.92it/s]

[0. 1. 2. 3.]
[0. 1. 2. 3.]
[0. 1. 2. 3.]
[0. 1. 2. 3.]
[0. 1. 2. 3.]
[0. 1. 2. 3.]
[0. 1. 2. 3.]
[0. 2. 3.]





[0 1 2 3]


100%|██████████| 8/8 [00:00<00:00, 43.90it/s]

[0. 2. 3.]
[0. 1. 2. 3.]
[0. 1. 2. 3.]
[0. 1. 2. 3.]
[0. 1. 2. 3.]
[0. 1. 2. 3.]
[0. 2. 3.]
[0. 2.]



100%|██████████| 9/9 [00:00<00:00, 55.10it/s]
100%|██████████| 9/9 [00:00<00:00, 67.27it/s]
100%|██████████| 10/10 [00:00<00:00, 56.27it/s]
100%|██████████| 10/10 [00:00<00:00, 54.84it/s]
100%|██████████| 6/6 [00:00<00:00, 59.03it/s]
100%|██████████| 6/6 [00:00<00:00, 58.72it/s]
100%|██████████| 11/11 [00:00<00:00, 67.51it/s]
100%|██████████| 11/11 [00:00<00:00, 66.61it/s]
100%|██████████| 9/9 [00:00<00:00, 62.88it/s]
100%|██████████| 9/9 [00:00<00:00, 62.86it/s]
100%|██████████| 8/8 [00:00<00:00, 59.22it/s]
100%|██████████| 8/8 [00:00<00:00, 30.96it/s]
100%|██████████| 8/8 [00:00<00:00, 70.44it/s]
100%|██████████| 8/8 [00:00<00:00, 69.89it/s]
100%|██████████| 8/8 [00:00<00:00, 67.19it/s]
100%|██████████| 8/8 [00:00<00:00, 66.84it/s]
100%|██████████| 10/10 [00:00<00:00, 62.64it/s]
100%|██████████| 10/10 [00:00<00:00, 62.85it/s]
100%|██████████| 10/10 [00:00<00:00, 51.23it/s]
100%|██████████| 10/10 [00:00<00:00, 50.83it/s]
100%|██████████| 10/10 [00:00<00:00, 57.38it/s]
100%|██████████

Preprocessing Finished





#### Denoise the data

In [None]:
preprocess(data_path, target_resolution, target_size, denoise=True, alphaTV=0.2)
# alpha = 0.1 geeft wel prima maar nog veel details, 0.3 is misschien net hoog.