# This code is meant to take the ImageJ raw files and convert them to .nii files
## It will walk you through opening the files, zero padding them, renaming, and saving


### First import libraries
 For those new to Jupyter Notebook, use shift + enter to run the cell or the play button in the toolbar above


In [29]:
import os
import numpy as np
import nibabel as nib
import tensorflow as tf
import matplotlib.pyplot as plt
from skimage import measure
from skimage.transform import resize
from keras_unet.metrics import dice_coef
from keras_unet.models import custom_unet
from keras_unet.losses import jaccard_distance
from sklearn.model_selection import train_test_split
from PIL import Image
from PIL import ImageOps
import fnmatch
import nibabel as nib
import shutil

Here we are pre-defining some functions to use later

In [30]:
# figure out difference that needs to be made up in rows/columns
def padding(img, expected_size):
    desired_size = expected_size
    delta_width = desired_size - img.size[0]
    delta_height = desired_size - img.size[1]
    pad_width = delta_width // 2
    pad_height = delta_height // 2
    padding = (pad_width, pad_height, delta_width - pad_width, delta_height - pad_height)
    return ImageOps.expand(img, padding)

#after calculating the padding, add in the padding to rows and columns to meet new expected size
def resize_with_padding(img, expected_size):
    img.thumbnail((expected_size[0], expected_size[1]))
    # print(img.size)
    delta_width = expected_size[0] - img.size[0]
    delta_height = expected_size[1] - img.size[1]
    pad_width = delta_width // 2
    pad_height = delta_height // 2
    padding = (pad_width, pad_height, delta_width - pad_width, delta_height - pad_height)
    return ImageOps.expand(img, padding)

In [31]:
def gather_by_name(orignal_path, new_path, phrase):
    n=0
    file_list=[]
    for root, dirs, files in os.walk(os.path.normpath(original_path), topdown=True):
        for name in files:
            if phrase in name:
                file_list.append(os.path.join(root,name))                
                
    for j in range(len(file_list)):
        filename = file_list[j]
        shutil.copy(filename, new_path)

   

    print('copied and moved '+ str(len(file_list))+' files')

In [32]:
def gather_filenames(path):
    raw_path = path
    patient_folders = []
    pt_fnames = []

    import os
    for root, dirs, files in os.walk(os.path.normpath(raw_path), topdown=True):
        for name in files:
            #print(os.path.join(root, name))
            pt_fnames.append(os.path.join(root, name))
    print('\nPatient Folders have been identified\n')
    #sort through and get only the files with ROI in them
    #this eliminates the tiff and 3D files 
    #%%
    ROI_list = []
    for j in range(len(pt_fnames)):
        ROI_name = 'ROI'
        filename = os.path.basename(pt_fnames[j])
        if ROI_name in filename:
            ROI_list.append(pt_fnames[j])
    print('\nFilenames have been found and added\n')
    print('copied and moved '+ str(len(ROI_list))+' files')
    
    return ROI_list

In [49]:
def convert_NPY_NII(ROI_list):
    #new_size= new_size
    print('Converting', str(len(ROI_list)), 'files')
    for i in range(len(ROI_list)): # loop through all the available files from the list that had our keyword
        orig_fname = os.path.basename(ROI_list[i])# grab the ith filename in the list
        print(orig_fname)
        #extract information from the filename
        num_slice = int(orig_fname[-3:])
        #print(num_slice)
        if num_slice < 15:
            orig_fname = this_list[2]
            num_slice = int(orig_fname[-3:])
            num_width = int((orig_fname[-7:-4]))
            #print(num_width)
            num_height = int((orig_fname[-11:-8]))
        elif num_slice < 50:
            #print('over 99')
            num_slice = int(orig_fname[-3:])
            num_width = int((orig_fname[-8:-4]))
            #print(num_width)
            num_height = int((orig_fname[-12:-8]))
            #print(num_height)
        else:
            #print('less than 99')
            num_width = int((orig_fname[-7:-3]))
            #print(num_width)
            num_height = int((orig_fname[-11:-7]))
            #print(num_height)
        pt_numb =(orig_fname[0:6])
        yr_numb = (orig_fname[8])
        if 'Cyst' in orig_fname:
            img_type = 'C'
        elif 'Kidney' in orig_fname:
            img_type = 'M_K'
        elif 'Image' in orig_fname:
            img_type = 'M'
        if 'Right' in orig_fname:
            side = 'R'
        elif 'Left' in orig_fname:
            side = 'L'
        call_file = str(ROI_list[i]) #define our filename with path to open (working_path+'/'+orig_fname)
        resized = np.zeros((num_slice,new_size,new_size), dtype ='uint8')
        transposed = np.zeros((new_size, new_size, num_slice), dtype='uint8')
        with open(r'%s' %call_file, 'rb') as file: #read in raw uint8 and resize correctly
             data = np.fromfile(file, dtype = 'uint8').reshape(num_slice,num_width,num_height)
             for j in range(num_slice):
                 orig_slice = data[j]
                 re_slice = Image.fromarray(orig_slice)
                 resized[j] = resize_with_padding(re_slice, (new_size, new_size))
             for i in range(resized.shape[0]):
                 old_slice = resized[i,:,:]
                 transposed[:,:,i] = old_slice

                 # now we need to rename this resized array and save it as a .npy
        #new_fname = str('%s' %orig_fname + '_RESIZED_') #keep the original name for now 
        new_fname = str(pt_numb +'_'+ yr_numb +'_'+ str(num_slice) +'_'+ side + '_' +  img_type )
        file_name = "%s" %new_fname # add our extension
        np.save(os.path.join(new_path, file_name), transposed) # save in the new file folder
        converted_array = np.array(transposed, dtype=np.float32)
        affine = np.eye(4)
        nifti_file = nib.Nifti1Image(converted_array, affine)
        nib.save(nifti_file, os.path.join(final_path, "%s" %new_fname))


    print("complete --- nice job")

## FilePath Input
Fill in the filepaths for the raw data, the new path to store the npy files, and the final path for the images and annotations 
Make sure to fill out the new sizing as the complete size with zero padding

In [50]:
raw_path = r'C:\Users\UAB\data\UAB\Pt 457036\Pt 457036'
new_path = r'C:\Users\UAB\data\UAB\AllNPY\\'
final_path = r'C:\Users\UAB\data\UAB\AllNIIimages'
cyst_path = r'C:\Users\UAB\data\UAB\CystNII'
new_size = 512

In [51]:
#remove .tif files

In [52]:
this_list = gather_filenames(raw_path)


Patient Folders have been identified


Filenames have been found and added

copied and moved 24 files


In [53]:
orig_fname = this_list[2]
num_slice = int(orig_fname[-3:])
num_width = int((orig_fname[-7:-4]))
            #print(num_width)
num_height = int((orig_fname[-11:-8]))

print(num_slice, num_width, num_height)

105 169 110


In [54]:
print(len(this_list))
print(this_list[1])

24
C:\Users\UAB\data\UAB\Pt 457036\Pt 457036\dciaca\457036 y0 Cyst ROI Right 8bit 102 142 105


In [55]:
convert_NPY_NII(this_list)

Converting 24 files
457036 y0 Cyst ROI Left 8bit 110 169 105
457036 y0 Cyst ROI Right 8bit 102 142 105
457036 y0 Image ROI Left 8bit 110 169 105
457036 y0 Image ROI Right 8bit 102 142 105
457036 y0 Kidney ROI Left 8bit 110 169 105
457036 y0 Kidney ROI Right 8bit 102 142 105
457036 y1 Cyst ROI Left 8bit 112 163 105
457036 y1 Cyst ROI Right 8bit 101 146 105
457036 y1 Image ROI Left 8bit 112 163 105
457036 y1 Image ROI Right 8bit 101 146 105
457036 y1 Kidney ROI Left 8bit 112 163 105
457036 y1 Kidney ROI Right 8bit 101 146 105
457036 y2 Cyst ROI Left 8bit 118 158 105
457036 y2 Cyst ROI Right 8bit 101 141 105
457036 y2 Image ROI Left 8bit 118 158 105
457036 y2 Image ROI Right 8bit 101 141 105
457036 y2 Kidney ROI Left 8bit 118 158 105
457036 y2 Kidney ROI Right 8bit 101 141 105
457036 y3 Cyst ROI Left 8bit 118 161 111
457036 y3 Cyst ROI Right 8bit 106 138 111
457036 y3 Image ROI Left 8bit 118 161 111
457036 y3 Image ROI Right 8bit 106 138 111
457036 y3 Kidney ROI Left 8bit 118 161 111
4570

## There are two exceptions to the above code- Pt 136055 and Pt 170121
Deal with these manually to utalize the full set


In [13]:
Problem_path = r'C:\Users\UAB\data\Problems'
pt_fnames=[]
ROI_list = []
import os
for root, dirs, files in os.walk(os.path.normpath(Problem_path), topdown=True):
    for name in files:
        #print(os.path.join(root, name))
        pt_fnames.append(os.path.join(root, name))
print('\nPatient Folders have been identified \n')
#sort through and get only the files with ROI in them
#this eliminates the tiff and 3D files 
#%%
ROI_list = []
for j in range(len(pt_fnames)):
    ROI_name = 'ROI'
    filename = os.path.basename(pt_fnames[j])
    if ROI_name in filename:
        ROI_list.append(pt_fnames[j])
print('\nFilenames have been found and added\n')


Patient Folders have been identified 


Filenames have been found and added



In [14]:

print('Converting', str(len(ROI_list)), 'files')
for i in range(len(ROI_list)): # loop through all the available files from the list that had our keyword
    orig_fname = os.path.basename(ROI_list[i])# grab the ith filename in the list
    print(orig_fname)
    num_slice = 126
    num_width = 151
    num_height = 97
    pt_numb = 136055
    yr_numb = 0
    if 'Cyst' in orig_fname:
        img_type = 'C'
    elif 'Kidney' in orig_fname:
        img_type = 'M_K'
    elif 'Image' in orig_fname:
        img_type = 'M'
    if 'Right' in orig_fname:
        side = 'R'
    elif 'Left' in orig_fname:
        side = 'L'
    call_file = str(ROI_list[i]) #define our filename with path to open (working_path+'/'+orig_fname)
    resized = np.zeros((num_slice,new_size,new_size), dtype ='uint8')
    transposed = np.zeros((new_size, new_size, num_slice), dtype='uint8')
    with open(r'%s' %call_file, 'rb') as file: #read in raw uint8 and resize correctly
         data = np.fromfile(file, dtype = 'uint8').reshape(num_slice,num_width,num_height)
         for j in range(num_slice):
             orig_slice = data[j]
             re_slice = Image.fromarray(orig_slice)
             resized[j] = resize_with_padding(re_slice, (new_size, new_size))
         for i in range(resized.shape[0]):
             old_slice = resized[i,:,:]
             transposed[:,:,i] = old_slice
             
             # now we need to rename this resized array and save it as a .npy
    #new_fname = str('%s' %orig_fname + '_RESIZED_') #keep the original name for now 
    new_fname = str(str(pt_numb) +'_'+ str(yr_numb) +'_'+ str(num_slice) +'_'+ side + '_' +  img_type )
    file_name = "%s" %new_fname # add our extension
    np.save(os.path.join(new_path, file_name), transposed) # save in the new file folder
    converted_array = np.array(transposed, dtype=np.float32)
    affine = np.eye(4)
    nifti_file = nib.Nifti1Image(converted_array, affine)
    nib.save(nifti_file, os.path.join(final_path, "%s" %new_fname))


print("complete --- nice job")

Converting 3 files
136055 y0 t3 Cyst ROI Left 8bit 97 151 126
136055 y0 t3 Image ROI Left 8bit 97 151 126
136055 y0 t3 Kidney ROI Left 8bit 97 151 126
complete --- nice job


In [9]:
Problem_path = r'C:\Users\UAB\data\UAB\Pt 456283\Pt 456283'
pt_fnames=[]
ROI_list = []
import os
for root, dirs, files in os.walk(os.path.normpath(Problem_path), topdown=True):
    for name in files:
        #print(os.path.join(root, name))
        pt_fnames.append(os.path.join(root, name))
print('\nPatient Folders have been identified \n')
#sort through and get only the files with ROI in them
#this eliminates the tiff and 3D files 
#%%
ROI_list = []
for j in range(len(pt_fnames)):
    ROI_name = 'ROI'
    filename = os.path.basename(pt_fnames[j])
    if ROI_name in filename:
        ROI_list.append(pt_fnames[j])
print('\nFilenames have been found and added\n')


Patient Folders have been identified 


Filenames have been found and added



In [24]:
print(ROI_list[3])
print(ROI_list[0][-6:-3])

C:\Users\UAB\data\UAB\Pt 456283\Pt 456283\Pt 456283 2002 06 26 F 19Y\456283 y1 Image ROI Right 8bit 200 200 9
200


In [27]:
new_path = r'C:\Users\UAB\data\UAB\AllNPY\\'
final_path = r'C:\Users\UAB\data\UAB\AllNIIimages'
new_size = 512

In [28]:
    print('Converting', str(len(ROI_list)), 'files')
    for i in range(len(ROI_list)): # loop through all the available files from the list that had our keyword
        orig_fname = os.path.basename(ROI_list[i])# grab the ith filename in the list
        print(orig_fname)
        #extract information from the filename
        num_slice = int(orig_fname[-2:])
        #print(num_slice)
        if num_slice < 10:
            #print('over 99')
            num_slice = int(orig_fname[-1:])
            num_width = int((orig_fname[-5:-2]))
            #print(num_width)
            num_height = int((orig_fname[-9:-6]))
            #print(num_height)
        else:
            #print('over 99')
            num_slice = int(orig_fname[-2:])
            num_width = int((orig_fname[-6:-3]))
            #print(num_width)
            num_height = int((orig_fname[-10:-7]))
            #print(num_height)
        pt_numb =(orig_fname[0:6])
        yr_numb = (orig_fname[8])
        if 'Cyst' in orig_fname:
            img_type = 'C'
        elif 'Kidney' in orig_fname:
            img_type = 'M_K'
        elif 'Image' in orig_fname:
            img_type = 'M'
        if 'Right' in orig_fname:
            side = 'R'
        elif 'Left' in orig_fname:
            side = 'L'
        call_file = str(ROI_list[i]) #define our filename with path to open (working_path+'/'+orig_fname)
        resized = np.zeros((num_slice,new_size,new_size), dtype ='uint8')
        transposed = np.zeros((new_size, new_size, num_slice), dtype='uint8')
        with open(r'%s' %call_file, 'rb') as file: #read in raw uint8 and resize correctly
             data = np.fromfile(file, dtype = 'uint8').reshape(num_slice,num_width,num_height)
             for j in range(num_slice):
                 orig_slice = data[j]
                 re_slice = Image.fromarray(orig_slice)
                 resized[j] = resize_with_padding(re_slice, (new_size, new_size))
             for i in range(resized.shape[0]):
                 old_slice = resized[i,:,:]
                 transposed[:,:,i] = old_slice

                 # now we need to rename this resized array and save it as a .npy
        #new_fname = str('%s' %orig_fname + '_RESIZED_') #keep the original name for now 
        new_fname = str(pt_numb +'_'+ yr_numb +'_'+ str(num_slice) +'_'+ side + '_' +  img_type )
        file_name = "%s" %new_fname # add our extension
        np.save(os.path.join(new_path, file_name), transposed) # save in the new file folder
        converted_array = np.array(transposed, dtype=np.float32)
        affine = np.eye(4)
        nifti_file = nib.Nifti1Image(converted_array, affine)
        nib.save(nifti_file, os.path.join(final_path, "%s" %new_fname))


    print("complete --- nice job")

Converting 18 files
456283 y1 Cyst ROI Left 8bit 200 200 10
456283 y1 Cyst ROI Right 8bit 200 200 9
456283 y1 Image ROI Left 8bit 200 200 10
456283 y1 Image ROI Right 8bit 200 200 9
456283 y1 Kidney ROI Left 8bit 200 200 10
456283 y1 Kidney ROI Right 8bit 200 200 9
456283 y2 Cyst ROI Left 8bit 220 220 10
456283 y2 Cyst ROI Right 8bit 200 200 9
456283 y2 Image ROI Left 8bit 220 220 10
456283 y2 Image ROI Right 8bit 200 200 9
456283 y2 Kidney ROI Left 8bit 220 220 10
456283 y2 Kidney ROI Right 8bit 200 200 9
456283 y3 Cyst ROI Left 8bit 230 230 10
456283 y3 Cyst ROI Right 8bit 200 200 9
456283 y3 Image ROI Left 8bit 230 230 10
456283 y3 Image ROI Right 8bit 200 200 9
456283 y3 Kidney ROI Left 8bit 230 230 10
456283 y3 Kidney ROI Right 8bit 200 200 9
complete --- nice job


## Re-assign Masks
Move the masks to a new location in the file structure desired

In [56]:
path = r'C:\Users\UAB\data\UAB\AllNPY'
cyst_path = r'C:\Users\UAB\data\UAB\CystNII'
cyst_names=[]
for root, dirs, files in os.walk(os.path.normpath(path), topdown=True):
    for name in files:
        #print(os.path.join(root, name))
        cyst_names.append(os.path.join(root, name))

C_list = []
for j in range(len(cyst_names)):
    C_name = '_C'
    filename = os.path.basename(cyst_names[j])
    if C_name in filename:
        C_list.append(cyst_names[j])

for i in range(len(C_list)):
    shutil.move(C_list[i],cyst_path )

## Binarize masks
first gather the set based on the naming convention above, then binarize and save in a different location

In [57]:
def gather_set(data_path, phrase):
    set_of = []
    path = data_path + '\\'
    for f in os.listdir(data_path):
      if phrase in f:
        set_of.append(f)
      else:
        continue
    set_of = np.array(set_of)

    indices = np.array(range(len(set_of))) # we will use this in the next step.

    return set_of



In [58]:
data_path = r'C:\Users\UAB\data\UAB\AllNPY'
final_path = r'C:\Users\UAB\data\UAB\AllNPY'
kidney_non = gather_set(data_path, '_K')

In [59]:
seg_list=kidney_non

for i in range(len(seg_list)):
    segment = nib.load(data_path+"\\"+seg_list[i])
    seg_data = segment.get_fdata()
    binarized = np.where(seg_data>1,1,seg_data)
    affine = np.eye(4)
    nifti_file = nib.Nifti1Image(binarized, affine)
    nib.save(nifti_file, os.path.join(final_path, "%s" %seg_list[i]))
    

ImageFileError: Cannot work out file type of "C:\Users\UAB\data\UAB\AllNPY\407132_0_10_L_M_K.npy"

# Save as single slices


In [1]:
import numpy as np
import os 



def gather_set(data_path, phrase):
    set_of = []
    path = data_path + '\\'
    for f in os.listdir(data_path):
      if phrase in f:
        set_of.append(f)
      else:
        continue
    #set_of = np.array(set_of)

    indices = np.array(range(len(set_of))) # we will use this in the next step.

    return set_of

In [62]:
data_path = r"C:\Users\UAB\data\UAB\AllNPY"

images = gather_set(data_path, '_M.')
labels = gather_set(data_path, '_K')


In [None]:
import nibabel as nib
new_path = r"C:\Users\UAB\Kidney-Segmentation-Jupyter\data\TwoDim"
for i in range(len(images)):
    working_img = nib.load(data_path + '\\' + images[i])
    working_img = working_img.get_fdata()
    file_name = images[i][:-5]
    for j in range(working_img.shape[-1]):
        save_slice = working_img[:,:,j]
        new_fname = str(file_name + str(j) +'_M')
        np.save(os.path.join(new_path, new_fname), save_slice)

In [None]:
for i in range(len(labels)):
    working_img = nib.load(data_path + '\\' + labels[i])
    working_img = working_img.get_fdata()
    file_name = labels[i][:-7]
    for j in range(working_img.shape[-1]):
        save_slice = working_img[:,:,j]
        new_fname = str(file_name + str(j) +'_K')
        np.save(os.path.join(new_path, new_fname), save_slice)

In [None]:
data_path = r"C:\Users\UAB\data\UAB\AllNPY"
new_path = r"C:\Users\UAB\data\UAB\new"

images = gather_set(data_path, '_M.')
labels = gather_set(data_path, '_K')


for i in range(len(labels)):
    working_img = np.load(data_path + '\\' + labels[i])
    file_name = labels[i][:-7]
    for j in range(working_img.shape[-1]):
        save_slice = working_img[:,:,j]
        new_fname = str(file_name + str(j) +'_K')
        np.save(os.path.join(new_path, new_fname), save_slice)
        
for i in range(len(images)):
    working_img = np.load(data_path + '\\' + images[i])
    file_name = images[i][:-5]
    for j in range(working_img.shape[-1]):
        save_slice = working_img[:,:,j]
        new_fname = str(file_name + str(j) +'_M')
        np.save(os.path.join(new_path, new_fname), save_slice)

check sizes and range


In [8]:
image_test = np.load(r"C:\Users\UAB\Kidney-Segmentation-Jupyter\data\TwoDim\105005_0_84_L_0_M.npy")
print(image_test.max(), image_test.shape)
label_test = np.load(r"C:\Users\UAB\Kidney-Segmentation-Jupyter\data\TwoDim\150534_2_141_L_81_K.npy")
print(label_test.max(), label_test.shape)

141.0 (512, 512)
1.0 (512, 512)


binarize labels

In [60]:
final_path = r"C:\Users\UAB\data\UAB\AllNPY"
seg_list = gather_set(final_path, '_K')


for i in range(len(seg_list)):
    seg_data = np.load(final_path+"\\"+seg_list[i])
    file_name = seg_list[i][:-4]
    binarized = np.where(seg_data>1,1,seg_data)
    np.save(os.path.join(final_path, file_name), binarized)

In [61]:
label_test = np.load(r"C:\Users\UAB\data\UAB\AllNPY\457036_3_111_R_M_K.npy")
print(label_test.max(), label_test.shape)

1 (512, 512, 111)
