# Preprocessing code

In [44]:
import os
import numpy as np
import matplotlib.pyplot as plt
import glob
from skimage import transform
import nibabel as nib
import tqdm
from PIL import Image
from skimage.restoration import denoise_tv_chambolle


def normalize_img(img):
    # Warning for when dividing NaN value??
    norm_img = np.divide(img,np.max(img))
    return norm_img


def crop_pad_resize(image, nx, ny):
    '''
    Code from Christian F. Baumgartner and Lisa M. Koch from
    "An Exploration of 2D and 3D Deep Learning Techniques for
    Cardiac MR Image Segmentation" (2017).
    Comments by us.
    '''
    x, y = image.shape

    # difference in nr of pixels (divide by 2 since we have 2 sides)
    x_s = (x - nx) // 2
    y_s = (y - ny) // 2
    x_c = (nx - x) // 2
    y_c = (ny - y) // 2

    if x > nx and y > ny:
        # if image is larger in both dimensions cut a slice
        slice_cropped = image[x_s:x_s + nx, y_s:y_s + ny]

    else:
        # if one dim is smaller fill that side up with 0's
        slice_cropped = np.zeros((nx, ny))

        if x <= nx and y > ny:
            # fill up x direction with 0's, cut in y direction
            slice_cropped[x_c:x_c + x, :] = image[:, y_s:y_s + ny]
        elif x > nx and y <= ny:
            # fill up y direction with 0's, cut in x direction
            slice_cropped[:, y_c:y_c + y] = image[x_s:x_s + nx, :]
        else:
            # if dimensions are as desired, keep the original slice
            slice_cropped[x_c:x_c + x, y_c:y_c + y] = image[:, :]

    return slice_cropped


def preprocess(input_folder, target_resolution, target_size, denoise=False, alphaTV=0.2):
    '''
    This function preprocesses ACDC data. It crops all images to the same size,
    transforms everything to the same resolution and normalizes the images.
    It automatically makes the folder where preprocessed data is written to,
    in the same format as the ACDC data is given. The images are in PNG-format.
    If wanted, it can denoise the data as well. It will put this in a different folder.
    If you want to have denoised and non-denoised data, run the function twice with denoise
    on False and True.
    The function outputs the scale vectors and original image sizes so we can transform the masks
    back to the original input format.
    
    input_folder: the folder where raw ACDC data is located.
    target_resolution: desired resolution, should be a tuple with 2 items (x- and y-dimensions).
    target_size: desired size. Should be a tuple wiht 2 items (x- and y-dimensions).
    alphaTV: parameter used in the TV denoising.
    '''
    nx, ny = target_size
    data_folder = input_folder
    # i = 0  # iterator for saving original resolution and size
    scale_vectors = [[0,0]]
    original_image_size = [[0,0]]
    
    if denoise:
        foldername = 'preprocessed_denoised'
    else:
        foldername = 'preprocessed'
    
    if not os.path.exists(foldername):
        os.mkdir(foldername)
    else:
        print(foldername+' folder already exists. Continuing regardless.')
    
    # Loop over train and test folders
    for train_test in ['training', 'testing']:

        input_folder = os.path.join(data_folder, train_test)
        len_inp = len(input_folder)+1
        
#         if os.path.exists(os.path.join(input_folder+'/'+train_test, '.ipynb_checkpoints')):
#             n = len(os.listdir(input_folder)) - 1
#         else:
#             n = len(os.listdir(input_folder))
            
#         original_pixel_size = np.zeros((n, 2))
#         original_image_size = np.zeros((n, 2))
        
        # Make train and test folders in preprocessed folder
        if not os.path.exists(os.path.join(foldername+'/', train_test)):
            os.mkdir(os.path.join(foldername+'/', train_test))
        else:
            print('T'+train_test[1:]+' folder already exists. Continuing regardless.')
        
        # Loop over patient folders
        for folder in os.listdir(input_folder):
            
            if folder != '.ipynb_checkpoints':  # Sometimes trouble with automatically made files

                folder_path = os.path.join(input_folder, folder)
                
                # Make patient folders in preprocessed folder
                if not os.path.exists(os.path.join(foldername+'/'+train_test, folder_path[len_inp:])):
                    os.mkdir(os.path.join(foldername+'/'+train_test, folder_path[len_inp:]))
                else:
                    print('Folder for '+folder_path[len_inp:]+' already exists. Continuing regardless.')
                
                if os.path.exists(foldername+'/'+train_test+'/'+folder_path[len_inp:]+'/.ipynb_checkpoints'):
                    os.rmdir(foldername+'/'+train_test+'/'+folder_path[len_inp:]+'/.ipynb_checkpoints')
                    
                lst = os.listdir(foldername+'/'+train_test+'/'+folder_path[len_inp:])
                
                if len(lst) == 0:  # Only create files if the designated folder is empty
                    
                    for file in glob.glob(os.path.join(folder_path, 'patient???_frame??.nii.gz')):

                        # Save information about patient
                        with open(os.path.join(folder_path, 'Info.cfg')) as f:
                            lines = f.readlines()

                        ED = int(lines[0].strip()[-2:])
                        ES = int(lines[1].strip()[-2:])

                        # Split file name
                        file_base = file.split('.nii.gz')[0]
                        file_mask = file_base + '_gt.nii.gz'

                        # Load data from .nii.gz files
                        img_nii = nib.load(file)
                        img_dat = img_nii.get_fdata()

                        mask_nii = nib.load(file_mask)
                        mask_dat = mask_nii.get_fdata()

                        img = img_nii.get_fdata()
                        mask = mask_nii.get_fdata()

                        pixel_size = img_nii.header.get_zooms()
                        
                        # Save original pixel and image size before transforming
                        # original_pixel_size = np.append(original_pixel_size, [[pixel_size[0], pixel_size[1]]], axis = 0)
                        original_image_size = np.append(original_image_size, [[img.shape[0], img.shape[1]]], axis=0)
                            
                        # Make vector to make all images have the same resolution
                        scale_vector = [pixel_size[0] / target_resolution[0], pixel_size[1] / target_resolution[1]] 
                        scale_vectors = np.append(scale_vectors, [scale_vector], axis=0)
                        
                        print(scale_vectors)
                        
                        for zz in tqdm.tqdm(range(img.shape[2])):

                            # Normalize, rescale and crop the image and  mask

                            slice_img = np.squeeze(img[:, :, zz])
                            slice_img = normalize_img(np.squeeze(img[:, :, zz]))
                            img_rescaled = transform.rescale(slice_img,
                                                             scale_vector,
                                                             order=1,
                                                             preserve_range=True,
                                                             mode='constant')

                            slice_mask = np.squeeze(mask[:, :, zz])
                            
                            # slice_mask = normalize_img(np.squeeze(mask[:, :, zz]))
                            mask_rescaled = transform.rescale(slice_mask,
                                                              scale_vector,
                                                              order=0,
                                                              preserve_range=True,
                                                              mode='constant')

                            img_cropped = crop_pad_resize(img_rescaled, nx, ny)
                            mask_cropped = crop_pad_resize(mask_rescaled, nx, ny)
                            
                            if denoise:
                                img_cropped = denoise_tv_chambolle(img_cropped, eps=1e-6, weight=alphaTV, max_num_iter=1000)

                            # Save images in PNG format
                            if 'frame{:02}'.format(ED) in file:
                                img_loc = os.path.join(foldername+'/'+train_test, file[len_inp:-7]+'_slice{:01}_ED'.format(zz)+'.png')
                                img_fin = Image.fromarray(np.uint8(255 * img_cropped),mode="L")
                                img_fin.save(img_loc, format='PNG')

                                mask_loc = os.path.join(foldername+'/'+train_test, file[len_inp:-7]+'_slice{:01}_ED_gt'.format(zz)+'.png')
                                mask_fin = Image.fromarray(np.uint8(mask_cropped), mode="L")
                                mask_fin.save(mask_loc, format='PNG')
                            else:
                                img_loc = os.path.join(foldername+'/'+train_test, file[len_inp:-7]+'_slice{:01}_ES'.format(zz)+'.png')
                                img_fin = Image.fromarray(np.uint8(255 * img_cropped), mode="L")
                                img_fin.save(img_loc, format='PNG')

                                mask_loc = os.path.join(foldername+'/'+train_test, file[len_inp:-7]+'_slice{:01}_ES_gt'.format(zz)+'.png')
                                mask_fin = Image.fromarray(np.uint8(mask_cropped),mode="L")
                                mask_fin.save(mask_loc, format='PNG')
                else:
                    print('Folder for '+folder_path[len_inp:]+' is not empty. No files were written to this folder.')
    
    return scale_vectors, original_image_size



#### Preprocess the data

In [45]:
target_resolution = (1.36719, 1.36719)
target_size = (212, 212)
data_path = 'dummy_data'

scale_vectors, original_image_size = preprocess(data_path, target_resolution, target_size)

preprocessed folder already exists. Continuing regardless.
Training folder already exists. Continuing regardless.
Folder for patient001 already exists. Continuing regardless.
[[0.         0.        ]
 [1.14285505 1.14285505]]


 60%|██████    | 6/10 [00:00<00:00, 52.24it/s]

[0.]
[0. 1. 2. 3.]
[0. 1. 2. 3.]
[0. 1. 2. 3.]
[0. 1. 2. 3.]
[0. 1. 2. 3.]
[0. 1. 2. 3.]
[0. 1. 2. 3.]
[0. 1. 2. 3.]


100%|██████████| 10/10 [00:00<00:00, 52.13it/s]

[0. 1. 2. 3.]
Testing folder already exists. Continuing regardless.





In [8]:
print(scale_vectors)

print(original_image_size)

[[0.         0.        ]
 [1.14285505 1.14285505]]
[[  0   0]
 [216 256]]


#### Denoise the data

In [None]:
preprocess(data_path, target_resolution, target_size, denoise=True, alphaTV=0.2)
# alpha = 0.1 geeft wel prima maar nog veel details, 0.3 is misschien net hoog.

### Back to original format

In [50]:
def backtoformat(scale_vectors, original_image_size, mask_folder):
    '''
    scale_vectors: the vectors that were used in preprocessing to reach desired
        resolution
    original_image_size: the orginal sizes of the images
    mask_folder: the folder where the masks (output from network) are located
    '''
    
    foldername = 'finalmasks'
    prev_file = '00000000000'
    i = -1
    
    # make folder if it doesn't exist yet
    if not os.path.exists(foldername):
        os.mkdir(foldername)
    else:
        print(foldername+' folder already exists. Continuing regardless.')
    
    # remove automatically made files
    if os.path.exists(os.path.join(mask_folder, '.ipynb_checkpoints')):
        os.rmdir(os.path.join(mask_folder, '.ipynb_checkpoints'))
    
    # only write files if the folder is empty
    if len(os.listdir(foldername)) == 0:
        # loop over masks
        for file in os.listdir(mask_folder):

            # update iterator if we go to the next patient
            if prev_file[:11] != file[:11]:
                i += 1

            px, py = scale_vectors[i]
            nx, ny = original_image_size[i]

            file_path = os.path.join(mask_folder, file)
            
            mask = Image.open(file_path).convert('L')
            mask = np.array(mask, dtype=np.uint8)
            print(np.unique(mask))
            scale_vector = [1/px, 1/py]

            # pad or crop 
            mask = crop_pad_resize(mask, nx, ny)
            
            # scale back
            mask = transform.rescale(mask,
                                     scale_vector,
                                     order=0,
                                     preserve_range=True,
                                     mode='constant')

            

            # save file
            mask_loc = os.path.join(foldername, file)
            mask_fin = Image.fromarray(np.uint8(mask), mode="L")
            
            mask_fin.save(mask_loc, format='PNG')
            prev_file = file
    else:
        print(foldername+' folder was not empty. No files were written.')

In [52]:
mask_folder = 'dummy_masks'
scale_vectors = [[0, 0]]
scale_vectors = np.append(scale_vectors, [[1.14285505, 1.14285505]], axis=0)
original_image_size = [[0, 0]]
original_image_size = np.append(original_image_size, [[216, 256]], axis=0)

backtoformat(scale_vectors[1:], original_image_size[1:], mask_folder)

[0 1 2 3]
[0 1 2 3]
[0 1 2 3]
[0 1 2 3]
[0 1 2 3]
[0 1 2 3]
[0 1 2 3]
[0 1 2 3]
[0 1 2 3]
[0 1 2 3]
[0 1 2 3]
[0 1 2 3]
[0 1 2 3]
[0 1 2 3]
[0 1 2 3]
[0 1 2 3]
[0 1 2 3]
[0 1 2 3]
[0 1 2 3]
[0 1 2 3]
[0 1 2 3]
[0]
[0]
[0]
[0 1 2 3]
[0 1 2 3]
[0 1 2 3]
[0 1 2 3]
[0 1 2 3]
[0 1 2 3]
