# Preprocessing code

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import glob
from skimage import transform
import nibabel as nib
import tqdm
from PIL import Image
from skimage.restoration import denoise_tv_chambolle
from statistics import mean, median


def normalize_img(img):
    # Warning for when dividing NaN value??
    norm_img = np.divide(img,np.max(img))
    return norm_img


def crop_pad_resize(image, nx, ny):
    '''
    Code from Christian F. Baumgartner and Lisa M. Koch from
    "An Exploration of 2D and 3D Deep Learning Techniques for
    Cardiac MR Image Segmentation" (2017).
    Comments by us.
    '''
    x, y = image.shape

    # difference in nr of pixels (divide by 2 since we have 2 sides)
    x_s = (x - nx) // 2
    y_s = (y - ny) // 2
    x_c = (nx - x) // 2
    y_c = (ny - y) // 2

    if x > nx and y > ny:
        # if image is larger in both dimensions cut a slice
        slice_cropped = image[x_s:x_s + nx, y_s:y_s + ny]

    else:
        # if one dim is smaller fill that side up with 0's
        slice_cropped = np.zeros((nx, ny))

        if x <= nx and y > ny:
            # fill up x direction with 0's, cut in y direction
            slice_cropped[x_c:x_c + x, :] = image[:, y_s:y_s + ny]
        elif x > nx and y <= ny:
            # fill up y direction with 0's, cut in x direction
            slice_cropped[:, y_c:y_c + y] = image[x_s:x_s + nx, :]
        else:
            # if dimensions are as desired, keep the original slice
            slice_cropped[x_c:x_c + x, y_c:y_c + y] = image[:, :]

    return slice_cropped


def preprocess(input_folder, target_resolution, target_size, denoise=False, alphaTV=0.2):
    '''
    This function preprocesses ACDC data. It crops all images to the same size,
    transforms everything to the same resolution and normalizes the images.
    It automatically makes the folder where preprocessed data is written to,
    in the same format as the ACDC data is given. The images are in PNG-format.
    If wanted, it can denoise the data as well. It will put this in a different folder.
    If you want to have denoised and non-denoised data, run the function twice with denoise
    on False and True.
    The function outputs the scale vectors and original image sizes so we can transform the masks
    back to the original input format.
    
    input_folder: the folder where raw ACDC data is located.
    target_resolution: desired resolution, should be a tuple with 2 items (x- and y-dimensions).
    target_size: desired size. Should be a tuple wiht 2 items (x- and y-dimensions).
    alphaTV: parameter used in the TV denoising.
    '''
    nx, ny = target_size
    data_folder = input_folder
    # i = 0  # iterator for saving original resolution and size
    scale_vectors = [[0, 0]]
    original_image_size = [[0, 0]]
    
    if denoise:
        foldername = 'preprocessed_denoised'
    else:
        foldername = 'preprocessed'
    
    if not os.path.exists(foldername):
        os.mkdir(foldername)
    else:
        print(foldername+' folder already exists. Continuing regardless.')
        
    # print(target_resolution)
    
    # Loop over train and test folders
    heightImage = []
    widthImage = []
    voxelImage = []
    depthImage = []
    groupNOR = []
    groupMINF = []
    groupDCM = []
    groupHCM = []
    groupRV = []
    maxInt = []
    minInt = []
    meanInt = []
    for train_test in ['training', 'testing']:

        input_folder = os.path.join(data_folder, train_test)
        len_inp = len(input_folder)+1
        
#         if os.path.exists(os.path.join(input_folder+'/'+train_test, '.ipynb_checkpoints')):
#             n = len(os.listdir(input_folder)) - 1
#         else:
#             n = len(os.listdir(input_folder))
            
#         original_pixel_size = np.zeros((n, 2))
#         original_image_size = np.zeros((n, 2))
        
        # Make train and test folders in preprocessed folder
        if not os.path.exists(os.path.join(foldername+'/', train_test)):
            os.mkdir(os.path.join(foldername+'/', train_test))
        else:
            print('T'+train_test[1:]+' folder already exists. Continuing regardless.')
        
        # Loop over patient folders
        for folder in os.listdir(input_folder):
            
            if folder != '.ipynb_checkpoints':  # Sometimes trouble with automatically made files

                folder_path = os.path.join(input_folder, folder)
                
                # Make patient folders in preprocessed folder
                if not os.path.exists(os.path.join(foldername+'/'+train_test, folder_path[len_inp:])):
                    os.mkdir(os.path.join(foldername+'/'+train_test, folder_path[len_inp:]))
                else:
                    print('Folder for '+folder_path[len_inp:]+' already exists. Continuing regardless.')
                
                if os.path.exists(foldername+'/'+train_test+'/'+folder_path[len_inp:]+'/.ipynb_checkpoints'):
                    os.rmdir(foldername+'/'+train_test+'/'+folder_path[len_inp:]+'/.ipynb_checkpoints')
                    
                lst = os.listdir(foldername+'/'+train_test+'/'+folder_path[len_inp:])
                
                if len(lst) == 0:  # Only create files if the designated folder is empty
                    
                    for file in glob.glob(os.path.join(folder_path, 'patient???_frame??.nii.gz')):

                        # Save information about patient
                        with open(os.path.join(folder_path, 'Info.cfg')) as f:
                            lines = f.readlines()

                        ED = int(lines[0].strip()[-2:])
                        ES = int(lines[1].strip()[-2:])
                        Group = lines[2].strip().split(":")
                        Group2 = lines[5].strip().split(":")
                        Group3 = float(Group2[1].strip())
                        # print(Group)
                       
                        
                        # Split file name
                        file_base = file.split('.nii.gz')[0]
                        file_mask = file_base + '_gt.nii.gz'

                        # Load data from .nii.gz files
                        img_nii = nib.load(file)
                        img_dat = img_nii.get_fdata()
                        
                        heightImage.append(img_dat.shape[0])
                        widthImage.append(img_dat.shape[1])
                        depthImage.append(img_dat.shape[2])
                        # print(np.amax(np.uint8(img_dat)))
                        # print(np.amin(np.uint8(img_dat)))
                        maxInt.append(np.amax(np.uint8(img_dat)))
                        minInt.append(np.amin(np.uint8(img_dat)))
                        meanInt.append(np.mean(np.uint8(img_dat)))
                                      
                        mask_nii = nib.load(file_mask)
                        mask_dat = mask_nii.get_fdata()

                        img = img_nii.get_fdata()
                        mask = mask_nii.get_fdata()

                        pixel_size = img_nii.header.get_zooms()
                        
                        sx, sy, sz  = img_nii.header.get_zooms()
                        volume_vox = target_resolution[0]*target_resolution[1]*sz
                        voxelImage.append(volume_vox)
                        
                        
                        # print(str(volume_vox) + ' mm^3')
                        # print(str(sx) + ' x')
                        # print(str(sy) + ' y')
                        # Save original pixel and image size before transforming
                        # original_pixel_size = np.append(original_pixel_size, [[pixel_size[0], pixel_size[1]]], axis = 0)
                        original_image_size = np.append(original_image_size, [[img.shape[0], img.shape[1]]], axis=0)
                            
                        # Make vector to make all images have the same resolution
                        scale_vector = [pixel_size[0] / target_resolution[0], pixel_size[1] / target_resolution[1]] 
                        scale_vectors = np.append(scale_vectors, [scale_vector], axis=0)
                        
                        for zz in tqdm.tqdm(range(img.shape[2])):
                            
                            if(Group[1] == ' NOR'):
                                groupNOR.append(Group3)
                            elif(Group[1] == ' MINF'):
                                groupMINF.append(Group3)
                            elif(Group[1] == ' DCM'):
                                groupDCM.append(Group3)    
                            elif(Group[1] == ' HCM'):
                                groupHCM.append(Group3)
                            elif(Group[1] == ' RV'):
                                groupRV.append(Group3)

                            # Normalize, rescale and crop the image and  mask

                            slice_img = np.squeeze(img[:, :, zz])
                            slice_img = normalize_img(np.squeeze(img[:, :, zz]))
                            img_rescaled = transform.rescale(slice_img,
                                                             scale_vector,
                                                             order=1,
                                                             preserve_range=True,
                                                             mode='constant')

                            slice_mask = np.squeeze(mask[:, :, zz])
                            
                            # slice_mask = normalize_img(np.squeeze(mask[:, :, zz]))
                            mask_rescaled = transform.rescale(slice_mask,
                                                              scale_vector,
                                                              order=0,
                                                              preserve_range=True,
                                                              mode='constant')

                            img_cropped = crop_pad_resize(img_rescaled, nx, ny)
                            mask_cropped = crop_pad_resize(mask_rescaled, nx, ny)
                            
                            if denoise:
                                img_cropped = denoise_tv_chambolle(img_cropped, eps=1e-6, weight=alphaTV, max_num_iter=1000)

                            # Save images in PNG format
                            if 'frame{:02}'.format(ED) in file:
                                img_loc = os.path.join(foldername+'/'+train_test, file[len_inp:-7]+'_slice{:01}_ED_'.format(zz)+ str(volume_vox) +'.png')
                                img_fin = Image.fromarray(np.uint8(255 * img_cropped),mode="L")
                                img_fin.save(img_loc, format='PNG')

                                mask_loc = os.path.join(foldername+'/'+train_test, file[len_inp:-7]+'_slice{:01}_ED_'.format(zz)+ str(volume_vox) + '_gt.png')
                                mask_fin = Image.fromarray(np.uint8(mask_cropped), mode="L")
                                mask_fin.save(mask_loc, format='PNG')
                            else:
                                img_loc = os.path.join(foldername+'/'+train_test, file[len_inp:-7]+'_slice{:01}_ES_'.format(zz)+  str(volume_vox) + '.png')
                                img_fin = Image.fromarray(np.uint8(255 * img_cropped), mode="L")
                                img_fin.save(img_loc, format='PNG')

                                mask_loc = os.path.join(foldername+'/'+train_test, file[len_inp:-7]+'_slice{:01}_ES_'.format(zz)+ str(volume_vox) + '_gt.png')
                                mask_fin = Image.fromarray(np.uint8(mask_cropped),mode="L")
                                mask_fin.save(mask_loc, format='PNG')
                    
                else:
                    print('Folder for '+folder_path[len_inp:]+' is not empty. No files were written to this folder.')
    
    scale_vectors = np.delete(scale_vectors, 0, axis=0)
    original_image_size = np.delete(original_image_size, 0, axis=0)
    # print(max(depthImage))
    # print(min(depthImage))
    # # print(mean(depthImage))
    # print(max(groupNOR))
    # print(min(groupNOR))
    # print(mean(groupNOR))
    # print(max(groupMINF))
    # print(min(groupMINF))
    # print(mean(groupMINF))
    # print(max(groupDCM))
    # print(min(groupDCM))
    # print(mean(groupDCM))
    # print(max(groupHCM))
    # print(min(groupHCM))
    # print(mean(groupHCM))
    # print(max(groupRV))
    # print(min(groupRV))
    # print(mean(groupRV))
    print("Int")
    print(max(maxInt))
    print(max(minInt))
    print(max(meanInt))

    print(min(maxInt))
    print(min(minInt))
    print(min(meanInt))

    print(mean(maxInt))
    print(mean(minInt))
    print(mean(meanInt))
    # print(min(heightImage))
    # print(min(widthImage))
    # print(min(voxelImage))
    # print(mean(heightImage))
    # print(mean(widthImage))
    # print(mean(voxelImage))
    print('Preprocessed Finished!')
   
    
    return scale_vectors, original_image_size



In [2]:
def secret_preprocess(input_folder, target_resolution, target_size, denoise=False, alphaTV=0.2):
    '''
    This function preprocesses ACDC data. It crops all images to the same size,
    transforms everything to the same resolution and normalizes the images.
    It automatically makes the folder where preprocessed data is written to,
    in the same format as the ACDC data is given. The images are in PNG-format.
    If wanted, it can denoise the data as well. It will put this in a different folder.
    If you want to have denoised and non-denoised data, run the function twice with denoise
    on False and True.
    The function outputs the scale vectors and original image sizes so we can transform the masks
    back to the original input format.
    
    input_folder: the folder where raw ACDC data is located.
    target_resolution: desired resolution, should be a tuple with 2 items (x- and y-dimensions).
    target_size: desired size. Should be a tuple wiht 2 items (x- and y-dimensions).
    alphaTV: parameter used in the TV denoising.
    '''
    nx, ny = target_size
    data_folder = input_folder
    # i = 0  # iterator for saving original resolution and size
    scale_vectors = [[0, 0]]
    original_image_size = [[0, 0]]
    
    if denoise:
        foldername = 'preprocessed_denoised'
    else:
        foldername = 'secret_preprocessed'
    
    if not os.path.exists(foldername):
        os.mkdir(foldername)
    else:
        print(foldername+' folder already exists. Continuing regardless.')
    
    # Loop over train and test folders
    train_test = 'secret_test'

    input_folder = os.path.join(data_folder, train_test)
    len_inp = len(input_folder)+1

    # Make train and test folders in preprocessed folder
    if not os.path.exists(os.path.join(foldername+'/', train_test)):
        os.mkdir(os.path.join(foldername+'/', train_test))
    else:
        print('T'+train_test[1:]+' folder already exists. Continuing regardless.')

    # Loop over patient folders
    for i, folder in enumerate(os.listdir(input_folder)):

        if folder != '.ipynb_checkpoints':  # Sometimes trouble with automatically made files

            folder_path = os.path.join(input_folder, folder)

            # Make patient folders in preprocessed folder
            if not os.path.exists(os.path.join(foldername+'/'+train_test, folder_path[len_inp:])):
                os.mkdir(os.path.join(foldername+'/'+train_test, folder_path[len_inp:]))
            else:
                print('Folder for '+folder_path[len_inp:]+' already exists. Continuing regardless.')

            if os.path.exists(foldername+'/'+train_test+'/'+folder_path[len_inp:]+'/.ipynb_checkpoints'):
                os.rmdir(foldername+'/'+train_test+'/'+folder_path[len_inp:]+'/.ipynb_checkpoints')

            lst = os.listdir(foldername+'/'+train_test+'/'+folder_path[len_inp:])

            if len(lst) == 0:  # Only create files if the designated folder is empty

                for file in glob.glob(os.path.join(folder_path, 'patient???_frame??.nii.gz')):
                    # Load data from .nii.gz files
                    img_nii = nib.load(file)
                    img = img_nii.get_fdata()
                    
                    # Take the slices of the i'th patient (out of the 25)
                    img = img[:,:,:,i]
                    
                    pixel_size = img_nii.header.get_zooms()

                    # Save original pixel and image size before transforming
                    original_image_size = np.append(original_image_size, [[img.shape[0], img.shape[1]]], axis=0)

                    # Make vector to make all images have the same resolution
                    scale_vector = [pixel_size[0] / target_resolution[0], pixel_size[1] / target_resolution[1]]
                    scale_vectors = np.append(scale_vectors, [scale_vector], axis=0)

                    for sliice in tqdm.tqdm(range(img.shape[2])):

                        # Normalize, rescale and crop the image and  mask
                        slice_img = np.squeeze(img[:, :, sliice])
                        slice_img = normalize_img(np.squeeze(img[:, :, sliice]))
                        img_rescaled = transform.rescale(slice_img,
                                                         scale_vector,
                                                         order=1,
                                                         preserve_range=True,
                                                         mode='constant')

                        img_cropped = crop_pad_resize(img_rescaled, nx, ny)

                        if denoise:
                            img_cropped = denoise_tv_chambolle(img_cropped, eps=1e-6, weight=alphaTV, max_num_iter=1000)

                        # Save images in PNG format
                        img_loc = os.path.join(foldername+'/'+train_test, file[len_inp:-7]+'_slice{:02}'.format(sliice)+'.png')
                        img_fin = Image.fromarray(np.uint8(255 * img_cropped),mode="L")
                        img_fin.save(img_loc, format='PNG')

            else:
                print('Folder for '+folder_path[len_inp:]+' is not empty. No files were written to this folder.')
    
    scale_vectors = np.delete(scale_vectors, 0, axis=0)
    original_image_size = np.delete(original_image_size, 0, axis=0)
    
    return scale_vectors, original_image_size

#### Preprocess the data

In [3]:
target_resolution = (1.36719, 1.36719)
target_size = (256, 256)
data_path = './database/'

scale_vectors, original_image_size = preprocess(data_path, target_resolution, target_size)

100%|██████████| 10/10 [00:00<00:00, 43.02it/s]
100%|██████████| 10/10 [00:00<00:00, 43.46it/s]
100%|██████████| 8/8 [00:00<00:00, 43.78it/s]
100%|██████████| 8/8 [00:00<00:00, 43.41it/s]
100%|██████████| 11/11 [00:00<00:00, 30.27it/s]
100%|██████████| 11/11 [00:00<00:00, 30.96it/s]
100%|██████████| 10/10 [00:00<00:00, 44.98it/s]
100%|██████████| 10/10 [00:00<00:00, 44.53it/s]
100%|██████████| 10/10 [00:00<00:00, 45.03it/s]
100%|██████████| 10/10 [00:00<00:00, 44.35it/s]
100%|██████████| 6/6 [00:00<00:00, 45.00it/s]
100%|██████████| 6/6 [00:00<00:00, 44.44it/s]
100%|██████████| 10/10 [00:00<00:00, 53.13it/s]
100%|██████████| 10/10 [00:00<00:00, 53.60it/s]
100%|██████████| 10/10 [00:00<00:00, 46.46it/s]
100%|██████████| 10/10 [00:00<00:00, 47.43it/s]
100%|██████████| 7/7 [00:00<00:00, 25.68it/s]
100%|██████████| 7/7 [00:00<00:00, 47.26it/s]
100%|██████████| 10/10 [00:00<00:00, 48.24it/s]
100%|██████████| 10/10 [00:00<00:00, 48.05it/s]
100%|██████████| 18/18 [00:00<00:00, 52.42it/s]
100%

Int
255
32
96.39805385044643
175
0
38.8757357893319
251
10
56.65650121521835
Preprocessed Finished!





In [None]:
scale_vectors, original_imsize = preprocess(data_path, target_resolution, target_size)

In [222]:
print(scale_vectors)

print(original_image_size)

[[1.14285505 1.14285505]
 [0.99999817 0.99999817]]
[[216 256]
 [232 256]]


#### Denoise the data

In [None]:
preprocess(data_path, target_resolution, target_size, denoise=True, alphaTV=0.2)
# alpha = 0.1 geeft wel prima maar nog veel details, 0.3 is misschien net hoog.

### Back to original format

In [254]:
def backtoformat(scale_vectors, original_image_size, mask_folder):
    '''
    scale_vectors: the vectors that were used in preprocessing to reach desired
        resolution
    original_image_size: the orginal sizes of the images
    mask_folder: the folder where the masks (output from network) are located
    '''
    
    foldername = 'finalmasks'
    prev_file = '00000000000'  # so that the first new_patient is always true
    i = -1
    new_patient = True

    # make folder if it doesn't exist yet
    if not os.path.exists(foldername):
        os.mkdir(foldername)
    else:
        print(foldername+' folder already exists. Continuing regardless.')
    
    # remove automatically made files
    if os.path.exists(os.path.join(mask_folder, '.ipynb_checkpoints')):
        os.rmdir(os.path.join(mask_folder, '.ipynb_checkpoints'))
    
    # only write files if the folder is empty
    if len(os.listdir(foldername)) == 0:
        # loop over masks
        for file in sorted(os.listdir(mask_folder)):
            
            # update iterator if we go to the next patient
            if prev_file[:11] != file[:11]:
                i += 1
                new_patient = True
            else:
                new_patient = False

            px, py = scale_vectors[i][0], scale_vectors[i][1]
            nx, ny = original_image_size[i][0], original_image_size[i][1]

            file_path = os.path.join(mask_folder, file)
            
            mask = Image.open(file_path).convert('L')
            mask = np.array(mask, dtype=np.uint8)
            
            scale_vector = [1/px, 1/py]
            
            # scale back
            mask = transform.rescale(mask,
                                     scale_vector,
                                     order=0,
                                     preserve_range=True,
                                     mode='constant')
            
            # pad or crop back
            mask = crop_pad_resize(mask, nx, ny)
            
            if new_patient:
                if i > 0:
                    # save the previous 3D np array (if there is one)
                    niftimage = nib.Nifti1Image(threedimage, affine=np.eye(4))
                    nib.save(niftimage, os.path.join('finalmasks', prev_file[:-17]+'_gt'+'.nii.gz'))
                # if we have a new page, make new np array for new nii.gz file
                threedimage = mask.reshape(nx, ny, 1)

            else:
                # add mask to 3D np array
                mask_threed = mask.reshape(nx, ny, 1)
                threedimage = np.concatenate([threedimage, mask_threed], 2)


            # save file as PNG if wanted
#             mask_loc = os.path.join(foldername, file)
#             mask_fin = Image.fromarray(np.uint8(mask), mode="L")
            
#             mask_fin.save(mask_loc, format='PNG')
            prev_file = file
        
        # save last 3D np array
        niftimage = nib.Nifti1Image(threedimage, affine=np.eye(4))
        nib.save(niftimage, os.path.join('finalmasks', prev_file[:-17]+'_gt'+'.nii.gz'))
    else:
        print(foldername+' folder was not empty. No files were written.')
        
    

In [255]:
mask_folder = 'dummy_masks'

backtoformat(scale_vectors, original_image_size, mask_folder)

#### Test if saving as nii.gz went well

In [261]:
file = 'finalmasks/patient001_frame01_gt.nii.gz'
img_nii = nib.load(file)
img = img_nii.get_fdata()
img.shape

(216, 256, 10)

### Testing 

In [139]:
for file in sorted(os.listdir('dummy_masks')):
    print(os.path.join('finalmasks', file[:-17]+'_gt'+'.nii.gz'))

finalmasks/patient001_frame01_gt.nii.gz
finalmasks/patient001_frame01_gt.nii.gz
finalmasks/patient001_frame01_gt.nii.gz
finalmasks/patient001_frame01_gt.nii.gz
finalmasks/patient001_frame01_gt.nii.gz
finalmasks/patient001_frame01_gt.nii.gz
finalmasks/patient001_frame01_gt.nii.gz
finalmasks/patient001_frame01_gt.nii.gz
finalmasks/patient001_frame01_gt.nii.gz
finalmasks/patient001_frame01_gt.nii.gz


In [162]:
nx, ny = (216, 256)
threedimage = np.zeros((nx, ny, 1), int)
print(threedimage.shape)
mask = np.ones(216*256)
mask_threed = mask.reshape(nx, ny, 1)
print(mask_threed.shape)

threedimage = np.concatenate([threedimage, mask_threed], 2)
print(threedimage.shape)

again = np.concatenate([threedimage, mask_threed], 2)
print(again.shape)

(216, 256, 1)
(216, 256, 1)
(216, 256, 2)
(216, 256, 3)


In [200]:
a1 = np.zeros((3, 4, 1), dtype=np.uint8)

a2 = np.full((3, 4, 1), 2, dtype=np.uint8)

x = np.concatenate([a1, a2], 2)

a3 = np.full((3, 4, 1), 3, dtype=np.uint8)

y = np.concatenate([x, a3], 2)
print(y.shape)

y = np.delete(y, 0, axis=2)
print(y.shape)

# x = np.arange(4*4*3).reshape(4,4,3)
# print(x.shape)
# ni_img = nib.Nifti1Image(x, affine=np.eye(4))
# niftimage = nib.Nifti1Image(y, affine=np.eye(4))

# nib.save(niftimage, os.path.join('finalmasks', 'test3d.nii.gz'))

(3, 4, 3)
(3, 4, 2)


In [208]:
y = [[0, 0]]
y = np.append(scale_vectors, [[1.14285505, 1.14285505]], axis=0)
x = [[0, 0]]
x = np.append(original_image_size, [[216, 256]], axis=0)

print(y.shape)
print(x.shape)

y = np.delete(y, 0, axis=1)
print(y.shape)

(4, 2)
(4, 2)
(4, 1)


In [232]:
x, y = scale_vectors[0][0], scale_vectors[0][1]
print(x)
print(y)

1.1428550530650459
1.1428550530650459


In [235]:
scale_vectors

array([[1.14285505, 1.14285505],
       [0.99999817, 0.99999817]])