# This code will train the model and save weights  using a data generator

First Import libraries

In [86]:
import os
import numpy as np
import keras
import tensorflow
from sklearn.model_selection import train_test_split
import nibabel as nib
import tensorflow as tf
from tensorflow.python.keras.utils.data_utils import Sequence
from keras.utils import np_utils
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
from skimage import measure
from skimage.transform import resize
from keras_unet.metrics import dice_coef
from keras_unet.models import custom_unet
from keras_unet.losses import jaccard_distance
from sklearn.model_selection import train_test_split
from PIL import Image
from PIL import ImageOps
import fnmatch
import nibabel as nib
import shutil

Define our Data Generator and other functions to be used later

In [119]:
class DataGenerator(tensorflow.keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, labels, batch_size=12, dim=(512,512,1), n_channels=1,
                 n_classes=2, shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))
    #we have rounded the number of total options (list_IDs)/batch size 
    #to get an integer for the length

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        print("indexes are as follows:"+str(indexes))
        #index*batch_size:index+1*batch size - block off a section the size of batchsize

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        print("list IDs are as follows:"+str(list_IDs_temp))
        
        
        

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        # X shape should be (12,(512,512,1),1)
        y = np.empty((self.batch_size, *self.dim, self.n_channels))
        # y shape should be (12,(512,512,1),1)

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            #X[i,] = np.load('data/' + ID + '.npy')
            X[i, ...,0] = np.load('data\\' + ID)

            # Store class
            #y[i,] = np.load(self.labels[ID])
            y[i] = self.labels[ID]
            print(y.shape)

        return X, to_categorical(y, num_classes=self.n_classes)



In [120]:
def gather_set(data_path, phrase):
    set_of = []
    path = data_path + '\\'
    for f in os.listdir(data_path):
      if phrase in f:
        set_of.append(f)
      else:
        continue
    #set_of = np.array(set_of)

    indices = np.array(range(len(set_of))) # we will use this in the next step.

    return set_of

Define the paths where images and segmentations are located. Gather into groups and arrange into a dictionary that can be split up for training, testing, and validation for the model

In [121]:
data_path = r"C:\Users\UAB\Kidney-Segmentation-Jupyter\data\\"

images = gather_set(data_path, '_M.')
labels = gather_set(data_path, '_K')
print(images[0:2], labels[0:2])


['101934_1_96_L_0_M.npy', '101934_1_96_L_10_M.npy'] ['101934_1_96_L_0_K.npy', '101934_1_96_L_10_K.npy']


In [122]:
test_img = np.load(data_path + images[1])
print(test_img.shape)
test_lab = np.load(data_path + labels[1])
print(test_lab.shape)


(512, 512, 1)
(512, 512, 1)


In [123]:
d = {}
for i in images:
    if i not in d:
        d[i] = len(d)

labels_mapping = list(map(d.get, images))
#print(labels_mapping)

labels = {images[i]:labels_mapping[i] for i in range(len(images))}

In [116]:
from sklearn.model_selection import train_test_split
train, val = train_test_split(list(labels.keys()),train_size = 0.8)
partition = {'train':train, 'validation':val}

#print(partition)
#print(labels)

Create our data generators 

In [124]:
params = {'dim': (512,512,1),
          'batch_size': 12,
          'n_classes': 2,
          'n_channels': 1,
          'shuffle': True}
training_generator = DataGenerator(partition['train'], labels, **params)
validation_generator = DataGenerator(partition['validation'], labels, **params)

In [125]:
X,y = training_generator[0]

indexes are as follows:[110  60 154  92 141  68  24 212  41 158 214 222]
list IDs are as follows:['101934_1_96_L_7_M.npy', '101934_1_96_L_72_M.npy', '101934_2_96_L_34_M.npy', '101934_1_96_L_44_M.npy', '101934_1_96_R_94_M.npy', '101934_1_96_R_36_M.npy', '101934_2_96_L_9_M.npy', '101934_1_96_R_45_M.npy', '101934_2_96_L_17_M.npy', '101934_1_96_L_28_M.npy', '101934_1_96_R_54_M.npy', '101934_1_96_L_54_M.npy']
(12, 512, 512, 1, 1)
(12, 512, 512, 1, 1)
(12, 512, 512, 1, 1)
(12, 512, 512, 1, 1)
(12, 512, 512, 1, 1)
(12, 512, 512, 1, 1)
(12, 512, 512, 1, 1)
(12, 512, 512, 1, 1)
(12, 512, 512, 1, 1)
(12, 512, 512, 1, 1)
(12, 512, 512, 1, 1)
(12, 512, 512, 1, 1)


IndexError: index 77 is out of bounds for axis 1 with size 3

In [63]:
model = custom_unet(input_shape=(512,512,1))


In [64]:
# define the checkpoint
from keras.callbacks import ModelCheckpoint
filepath = "data_gen_test.h5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [65]:
model.compile(
  optimizer=tf.keras.optimizers.Adam(learning_rate=3e-4),
  loss=jaccard_distance,
  metrics=[dice_coef])

In [66]:
# Train model on dataset
model.fit(training_generator,
                    validation_data = validation_generator,
                    use_multiprocessing=True,
                    workers=6)

TypeError: expected str, bytes or os.PathLike object, not int

In [39]:
new_list = list(labels.items())
print(new_list[189])

('101934_1_96_R_94_M.npy', 189)


In [43]:
print(new_list[189][0])
problem_child = np.load(data_path + '\\' +new_list[189][0])

101934_1_96_R_94_M.npy


In [44]:
print(problem_child.shape)

(512, 512, 1)
