In [1]:
import os
import numpy as np
import nibabel as nib
import tensorflow as tf
import matplotlib.pyplot as plt
from skimage import measure
from skimage.transform import resize
from keras_unet.metrics import dice_coef
from keras_unet.models import custom_unet
from keras_unet.losses import jaccard_distance
from sklearn.model_selection import train_test_split
from PIL import Image
from PIL import ImageOps
import fnmatch
import nibabel as nib
import shutil

-----------------------------------------
keras-unet init: TF version is >= 2.0.0 - using `tf.keras` instead of `Keras`
-----------------------------------------


In [2]:
# figure out difference that needs to be made up in rows/columns
def padding(img, expected_size):
    desired_size = expected_size
    delta_width = desired_size - img.size[0]
    delta_height = desired_size - img.size[1]
    pad_width = delta_width // 2
    pad_height = delta_height // 2
    padding = (pad_width, pad_height, delta_width - pad_width, delta_height - pad_height)
    return ImageOps.expand(img, padding)

#after calculating the padding, add in the padding to rows and columns to meet new expected size
def resize_with_padding(img, expected_size):
    img.thumbnail((expected_size[0], expected_size[1]))
    # print(img.size)
    delta_width = expected_size[0] - img.size[0]
    delta_height = expected_size[1] - img.size[1]
    pad_width = delta_width // 2
    pad_height = delta_height // 2
    padding = (pad_width, pad_height, delta_width - pad_width, delta_height - pad_height)
    return ImageOps.expand(img, padding)

In [3]:
def gather_by_name(orignal_path, new_path, phrase):
    n=0
    file_list=[]
    for root, dirs, files in os.walk(os.path.normpath(original_path), topdown=True):
        for name in files:
            if phrase in name:
                file_list.append(os.path.join(root,name))                
                
    for j in range(len(file_list)):
        filename = file_list[j]
        shutil.copy(filename, new_path)

   

    print('copied and moved '+ str(len(file_list))+' files')

In [55]:
def gather_filenames(path):
    raw_path = path
    patient_folders = []
    pt_fnames = []

    import os
    for root, dirs, files in os.walk(os.path.normpath(raw_path), topdown=True):
        for name in files:
            #print(os.path.join(root, name))
            pt_fnames.append(os.path.join(root, name))
    print('\nPatient Folders have been identified\n')
    #sort through and get only the files with ROI in them
    #this eliminates the tiff and 3D files 
    #%%
    ROI_list = []
    for j in range(len(pt_fnames)):
        ROI_name = 'ROI'
        filename = os.path.basename(pt_fnames[j])
        if ROI_name in filename:
            ROI_list.append(pt_fnames[j])
    print('\nFilenames have been found and added\n')
    print('copied and moved '+ str(len(ROI_list))+' files')
    
    return ROI_list

In [45]:
def convert_NPY_NII(ROI_list):  
    print('Converting', str(len(ROI_list)), 'files')
    for i in range(len(ROI_list)): # loop through all the available files from the list that had our keyword
        orig_fname = os.path.basename(ROI_list[i])# grab the ith filename in the list
        print(orig_fname)
        #extract information from the filename
        num_slice = int(orig_fname[-2:])
        #print(num_slice)
        if num_slice < 50:
            #print('over 99')
            num_slice = int(orig_fname[-3:])
            num_width = int((orig_fname[-8:-4]))
            #print(num_width)
            num_height = int((orig_fname[-12:-8]))
            #print(num_height)
        else:
            #print('less than 99')
            num_width = int((orig_fname[-7:-3]))
            #print(num_width)
            num_height = int((orig_fname[-11:-7]))
            #print(num_height)
        pt_numb =(orig_fname[0:6])
        yr_numb = (orig_fname[8])
        if 'Cyst' in orig_fname:
            img_type = 'C'
        elif 'Kidney' in orig_fname:
            img_type = 'M_K'
        elif 'Image' in orig_fname:
            img_type = 'M'
        if 'Right' in orig_fname:
            side = 'R'
        elif 'Left' in orig_fname:
            side = 'L'
        call_file = str(ROI_list[i]) #define our filename with path to open (working_path+'/'+orig_fname)
        resized = np.zeros((num_slice,new_size,new_size), dtype ='uint8')
        transposed = np.zeros((new_size, new_size, num_slice), dtype='uint8')
        with open(r'%s' %call_file, 'rb') as file: #read in raw uint8 and resize correctly
             data = np.fromfile(file, dtype = 'uint8').reshape(num_slice,num_width,num_height)
             for j in range(num_slice):
                 orig_slice = data[j]
                 re_slice = Image.fromarray(orig_slice)
                 resized[j] = resize_with_padding(re_slice, (new_size, new_size))
             for i in range(resized.shape[0]):
                 old_slice = resized[i,:,:]
                 transposed[:,:,i] = old_slice

                 # now we need to rename this resized array and save it as a .npy
        #new_fname = str('%s' %orig_fname + '_RESIZED_') #keep the original name for now 
        new_fname = str(pt_numb +'_'+ yr_numb +'_'+ str(num_slice) +'_'+ side + '_' +  img_type )
        file_name = "%s" %new_fname # add our extension
        np.save(os.path.join(new_path, file_name), transposed) # save in the new file folder
        converted_array = np.array(transposed, dtype=np.float32)
        affine = np.eye(4)
        nifti_file = nib.Nifti1Image(converted_array, affine)
        nib.save(nifti_file, os.path.join(final_path, "%s" %new_fname))


    print("complete --- nice job")

In [46]:
raw_path = r'C:\Users\UAB\data\KU'
new_path = r'C:\Users\UAB\data\AllNPY\\'
final_path = r'C:\Users\UAB\data\AllNIIimages'
cyst_path = r'C:\Users\UAB\data\CystNII'
new_size = 256

In [58]:
this_list = gather_filenames(raw_path)


Patient Folders have been identified


Filenames have been found and added

copied and moved 744 files


In [59]:
print(len(this_list))

744


In [60]:
convert_NPY_NII(this_list)

Converting 744 files
101934 y0 t3 Cyst ROI Left 8bit 137 178 96
101934 y0 t3 Cyst ROI Right 8bit 125 221 96
101934 y0 t3 Image ROI Left 8bit 137 178 96
101934 y0 t3 Image ROI Right 8bit 125 221 96
101934 y0 t3 Kidney ROI Left 8bit 137 178 96
101934 y0 t3 Kidney ROI Right 8bit 125 221 96
101934 y1 t3 Cyst ROI Left 8bit 134 167 96
101934 y1 t3 Cyst ROI Right 8bit 119 205 96
101934 y1 t3 Image ROI Left 8bit 134 167 96
101934 y1 t3 Image ROI Right 8bit 119 205 96
101934 y1 t3 Kidney ROI Left 8bit 134 167 96
101934 y1 t3 Kidney ROI Right 8bit 119 205 96
101934 y2 t3 Cyst ROI Left 8bit 142 202 96
101934 y2 t3 Cyst ROI Right 8bit 125 221 96
101934 y2 t3 Image ROI Left 8bit 142 202 96
101934 y2 t3 Image ROI Right 8bit 125 221 96
101934 y2 t3 Kidney ROI Left 8bit 142 202 96
101934 y2 t3 Kidney ROI Right 8bit 125 221 96
101934 y3 t3 Cyst ROI Left 8bit 153 186 96
101934 y3 t3 Cyst ROI Right 8bit 130 214 96
101934 y3 t3 Image ROI Left 8bit 153 186 96
101934 y3 t3 Image ROI Right 8bit 130 214 96
10

120395 y2 t3 Kidney ROI Right 8bit 130 179 111
120395 y3 t3 Cyst ROI Left 8bit 146 208 120
120395 y3 t3 Cyst ROI Right 8bit 134 179 120
120395 y3 t3 Image ROI Left 8bit 146 208 120
120395 y3 t3 Image ROI Right 8bit 134 179 120
120395 y3 t3 Kidney ROI Left 8bit 146 208 120
120395 y3 t3 Kidney ROI Right 8bit 134 179 120
120777 y0 t3 Cyst ROI Left 8bit 142 177 87
120777 y0 t3 Cyst ROI Right 8bit 138 186 87
120777 y0 t3 Image ROI Left 8bit 142 177 87
120777 y0 t3 Image ROI Right 8bit 138 186 87
120777 y0 t3 Kidney ROI Left 8bit 142 177 87
120777 y0 t3 Kidney ROI Right 8bit 138 186 87
120777 y1 t3 Cyst ROI Left 8bit 155 198 99
120777 y1 t3 Cyst ROI Right 8bit 144 184 99
120777 y1 t3 Image ROI Left 8bit 155 198 99
120777 y1 t3 Image ROI Right 8bit 144 184 99
120777 y1 t3 Kidney ROI Left 8bit 155 198 99
120777 y1 t3 Kidney ROI Right 8bit 144 184 99
120777 y2 t3 Cyst ROI Left 8bit 148 195 93
120777 y2 t3 Cyst ROI Right 8bit 139 183 93
120777 y2 t3 Image ROI Left 8bit 148 195 93
120777 y2 t3 Im

146563 y1 t3 Kidney ROI Right 8bit 124 139 99
146563 y2 t3 Cyst ROI Left 8bit 120 138 93
146563 y2 t3 Cyst ROI Right 8bit 128 148 93
146563 y2 t3 Image ROI Left 8bit 120 138 93
146563 y2 t3 Image ROI Right 8bit 128 148 93
146563 y2 t3 Kidney ROI Left 8bit 120 138 93
146563 y2 t3 Kidney ROI Right 8bit 128 148 93
146563 y3 t3 Cyst ROI Left 8bit 125 152 99
146563 y3 t3 Cyst ROI Right 8bit 125 139 99
146563 y3 t3 Image ROI Left 8bit 125 152 99
146563 y3 t3 Image ROI Right 8bit 125 139 99
146563 y3 t3 Kidney ROI Left 8bit 125 152 99
146563 y3 t3 Kidney ROI Right 8bit 125 139 99
148014 y0 t3 Cyst ROI Left 8bit 115 145 84
148014 y0 t3 Cyst ROI Right 8bit 119 186 84
148014 y0 t3 Image ROI Left 8bit 115 145 84
148014 y0 t3 Image ROI Right 8bit 119 186 84
148014 y0 t3 Kidney ROI Left 8bit 115 145 84
148014 y0 t3 Kidney ROI Right 8bit 119 186 84
148014 y1 t3 Cyst ROI Left 8bit 112 145 96
148014 y1 t3 Cyst ROI Right 8bit 126 179 96
148014 y1 t3 Image ROI Left 8bit 112 145 96
148014 y1 t3 Image ROI

161547 y0 t3 Image ROI Left 8bit 144 170 117
161547 y0 t3 Image ROI Right 8bit 133 173 117
161547 y0 t3 Kidney ROI Left 8bit 144 170 117
161547 y0 t3 Kidney ROI Right 8bit 133 173 117
161547 y1 t3 Cyst ROI Left 8bit 150 173 135
161547 y1 t3 Cyst ROI Right 8bit 138 180 135
161547 y1 t3 Image ROI Left 8bit 150 173 135
161547 y1 t3 Image ROI Right 8bit 138 180 135
161547 y1 t3 Kidney ROI Left 8bit 150 173 135
161547 y1 t3 Kidney ROI Right 8bit 138 180 135
161547 y2 t3 Cyst ROI Left 8bit 139 166 132
161547 y2 t3 Cyst ROI Right 8bit 138 181 132
161547 y2 t3 Image ROI Left 8bit 139 166 132
161547 y2 t3 Image ROI Right 8bit 138 181 132
161547 y2 t3 Kidney ROI Left 8bit 139 166 132
161547 y2 t3 Kidney ROI Right 8bit 138 181 132
161547 y3 t3 Cyst ROI Left 8bit 142 167 135
161547 y3 t3 Cyst ROI Right 8bit 144 200 135
161547 y3 t3 Image ROI Left 8bit 142 167 135
161547 y3 t3 Image ROI Right 8bit 144 200 135
161547 y3 t3 Kidney ROI Left 8bit 142 167 135
161547 y3 t3 Kidney ROI Right 8bit 144 200 1

193273 y2 t3 Kidney ROI Right 8bit 136 156 111
193273 y3 t3 Cyst ROI Left 8bit 125 150 111
193273 y3 t3 Cyst ROI Right 8bit 131 156 111
193273 y3 t3 Image ROI Left 8bit 125 150 111
193273 y3 t3 Image ROI Right 8bit 131 156 111
193273 y3 t3 Kidney ROI Left 8bit 125 150 111
193273 y3 t3 Kidney ROI Right 8bit 131 156 111
complete --- nice job


## There are two exceptions to the above code- Pt 136055 and Pt 170121
Deal with these manually to utalize the full set


In [24]:
Problem_path = r'C:\Users\UAB\data\Problems'
pt_fnames=[]
ROI_list = []
import os
for root, dirs, files in os.walk(os.path.normpath(Problem_path), topdown=True):
    for name in files:
        #print(os.path.join(root, name))
        pt_fnames.append(os.path.join(root, name))
print('\nPatient Folders have been identified \n')
#sort through and get only the files with ROI in them
#this eliminates the tiff and 3D files 
#%%
ROI_list = []
for j in range(len(pt_fnames)):
    ROI_name = 'ROI'
    filename = os.path.basename(pt_fnames[j])
    if ROI_name in filename:
        ROI_list.append(pt_fnames[j])
print('\nFilenames have been found and added\n')


Patient Folders have been identified 


Filenames have been found and added



In [25]:

print('Converting', str(len(ROI_list)), 'files')
for i in range(len(ROI_list)): # loop through all the available files from the list that had our keyword
    orig_fname = os.path.basename(ROI_list[i])# grab the ith filename in the list
    print(orig_fname)
    num_slice = 126
    num_width = 151
    num_height = 97
    pt_numb = 136055
    yr_numb = 0
    if 'Cyst' in orig_fname:
        img_type = 'C'
    elif 'Kidney' in orig_fname:
        img_type = 'M_K'
    elif 'Image' in orig_fname:
        img_type = 'M'
    if 'Right' in orig_fname:
        side = 'R'
    elif 'Left' in orig_fname:
        side = 'L'
    call_file = str(ROI_list[i]) #define our filename with path to open (working_path+'/'+orig_fname)
    resized = np.zeros((num_slice,new_size,new_size), dtype ='uint8')
    transposed = np.zeros((new_size, new_size, num_slice), dtype='uint8')
    with open(r'%s' %call_file, 'rb') as file: #read in raw uint8 and resize correctly
         data = np.fromfile(file, dtype = 'uint8').reshape(num_slice,num_width,num_height)
         for j in range(num_slice):
             orig_slice = data[j]
             re_slice = Image.fromarray(orig_slice)
             resized[j] = resize_with_padding(re_slice, (new_size, new_size))
         for i in range(resized.shape[0]):
             old_slice = resized[i,:,:]
             transposed[:,:,i] = old_slice
             
             # now we need to rename this resized array and save it as a .npy
    #new_fname = str('%s' %orig_fname + '_RESIZED_') #keep the original name for now 
    new_fname = str(str(pt_numb) +'_'+ str(yr_numb) +'_'+ str(num_slice) +'_'+ side + '_' +  img_type )
    file_name = "%s" %new_fname # add our extension
    np.save(os.path.join(new_path, file_name), transposed) # save in the new file folder
    converted_array = np.array(transposed, dtype=np.float32)
    affine = np.eye(4)
    nifti_file = nib.Nifti1Image(converted_array, affine)
    nib.save(nifti_file, os.path.join(final_path, "%s" %new_fname))


print("complete --- nice job")

Converting 3 files
136055 y0 t3 Cyst ROI Left 8bit 97 151 126
136055 y0 t3 Image ROI Left 8bit 97 151 126
136055 y0 t3 Kidney ROI Left 8bit 97 151 126
complete --- nice job


In [35]:
Problem_path = r'C:\Users\UAB\data\dciacj'
pt_fnames=[]
ROI_list = []
import os
for root, dirs, files in os.walk(os.path.normpath(Problem_path), topdown=True):
    for name in files:
        #print(os.path.join(root, name))
        pt_fnames.append(os.path.join(root, name))
print('\nPatient Folders have been identified \n')
#sort through and get only the files with ROI in them
#this eliminates the tiff and 3D files 
#%%
ROI_list = []
for j in range(len(pt_fnames)):
    ROI_name = 'ROI'
    filename = os.path.basename(pt_fnames[j])
    if ROI_name in filename:
        ROI_list.append(pt_fnames[j])
print('\nFilenames have been found and added\n')


Patient Folders have been identified 


Filenames have been found and added



In [38]:

    print('Converting', str(len(ROI_list)), 'files')
    for i in range(len(ROI_list)): # loop through all the available files from the list that had our keyword
        orig_fname = os.path.basename(ROI_list[i])# grab the ith filename in the list
        print(orig_fname)
        #extract information from the filename
        num_slice = int(orig_fname[-2:])
        #print(num_slice)
        if num_slice < 5:
            #print('over 99')
            num_slice = int(orig_fname[-3:])
            num_width = int((orig_fname[-8:-4]))
            #print(num_width)
            num_height = int((orig_fname[-12:-8]))
            #print(num_height)
        else:
            #print('less than 99')
            num_width = int((orig_fname[-7:-3]))
            #print(num_width)
            num_height = int((orig_fname[-11:-7]))
            #print(num_height)
        pt_numb =(orig_fname[0:6])
        yr_numb = (orig_fname[8])
        if 'Cyst' in orig_fname:
            img_type = 'C'
        elif 'Kidney' in orig_fname:
            img_type = 'M_K'
        elif 'Image' in orig_fname:
            img_type = 'M'
        if 'Right' in orig_fname:
            side = 'R'
        elif 'Left' in orig_fname:
            side = 'L'
        call_file = str(ROI_list[i]) #define our filename with path to open (working_path+'/'+orig_fname)
        resized = np.zeros((num_slice,new_size,new_size), dtype ='uint8')
        transposed = np.zeros((new_size, new_size, num_slice), dtype='uint8')
        with open(r'%s' %call_file, 'rb') as file: #read in raw uint8 and resize correctly
             data = np.fromfile(file, dtype = 'uint8').reshape(num_slice,num_width,num_height)
             for j in range(num_slice):
                 orig_slice = data[j]
                 re_slice = Image.fromarray(orig_slice)
                 resized[j] = resize_with_padding(re_slice, (new_size, new_size))
             for i in range(resized.shape[0]):
                 old_slice = resized[i,:,:]
                 transposed[:,:,i] = old_slice

                 # now we need to rename this resized array and save it as a .npy
        #new_fname = str('%s' %orig_fname + '_RESIZED_') #keep the original name for now 
        new_fname = str(pt_numb +'_'+ yr_numb +'_'+ str(num_slice) +'_'+ side + '_' +  img_type )
        file_name = "%s" %new_fname # add our extension
        np.save(os.path.join(new_path, file_name), transposed) # save in the new file folder
        converted_array = np.array(transposed, dtype=np.float32)
        affine = np.eye(4)
        nifti_file = nib.Nifti1Image(converted_array, affine)
        nib.save(nifti_file, os.path.join(final_path, "%s" %new_fname))


    print("complete --- nice job")

Converting 6 files
170121 y3 t3 Cyst ROI Left 8bit 141 146 168
170121 y3 t3 Cyst ROI Right 8bit 142 208 168
170121 y3 t3 Image ROI Left 8bit 141 146 168
170121 y3 t3 Image ROI Right 8bit 142 208 168
170121 y3 t3 Kidney ROI Left 8bit 141 146 168
170121 y3 t3 Kidney ROI Right 8bit 142 208 168
complete --- nice job


In [61]:
path = final_path
cyst_names=[]
for root, dirs, files in os.walk(os.path.normpath(path), topdown=True):
    for name in files:
        #print(os.path.join(root, name))
        cyst_names.append(os.path.join(root, name))

C_list = []
for j in range(len(cyst_names)):
    C_name = '_C'
    filename = os.path.basename(cyst_names[j])
    if C_name in filename:
        C_list.append(cyst_names[j])

for i in range(len(C_list)):
    shutil.move(C_list[i],cyst_path )

## Break out into different sized training batches

In [62]:
data_path = final_path
images = []
segmentations = []
for f in os.listdir(data_path):
  if '_K' in f:
    continue
  else:
    images.append(f)
    segmentations.append(f.replace('.nii', '_K.nii'))

print(images[0], segmentations[0])
images = np.array(images)
segmentations = np.array(segmentations)

indices = np.array(range(len(images))) # we will use this in the next step.

101934_0_96_L_M.nii 101934_0_96_L_M_K.nii


In [63]:
print(len(indices))

264


In [68]:
Batch_20 = images[0:20]
print(Batch_20.shape)
print(Batch_20[0])
print(images[0])

(20,)
101934_0_96_L_M.nii
101934_0_96_L_M.nii
