## 11. Final Functions

In [1]:
import numpy as np
import pandas as pd
from PIL import Image
import tensorflow as tf
import time

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
path = '/content/drive/MyDrive/cs2/'

In [13]:
#Loading Unet++ trained model
objects = {'bce_dice_loss':bce_dice_loss, 'dice_coefficient': dice_coefficient}
model = tf.keras.models.load_model(path + 'unet_pp', custom_objects = objects)

In [8]:
#loading Validation Data dataframe
vali_data = pd.read_csv(path + "data/validtn_data.csv").fillna('')
x= vali_data['ImageId'].tolist()
y = list(vali_data[['rle_1', 'rle_2', 'rle_3', 'rle_4']].to_records(index = False))

### 11.1 Final Function - 1

In [15]:
#Implementing custom data generator for predictions
#https://towardsdatascience.com/implementing-custom-data-generators-in-keras-de56f013581c
class PredictDataGenerator(tf.keras.utils.Sequence):
    def __init__(self,total_img_list, img_folder_path, list_idcs, batch_size=32 ):
        self.batch_size = batch_size
        self.img_list = total_img_list
        self.path = img_folder_path
        self.list_idcs = list_idcs
        self.indices = list(range(0,len(total_img_list)))
        #self.rem = len(self.list_idcs) % (self.batch_size)
        self.on_epoch_end()

    def __len__(self):
        return len(self.list_idcs) // (self.batch_size)

    def __getitem__(self, index):
        index = self.index[index * self.batch_size:(index + 1) * self.batch_size]
        batch = [self.indices[k] for k in index]
        X = self.__get_data(batch)
        return X
    
    def on_epoch_end(self):
        self.index = np.arange(len(self.list_idcs))

    def __get_data(self, batch):
        X = np.empty((self.batch_size,256,1600,3),dtype=np.float32) # image place-holders
              
        for i, id in enumerate(batch):
            img = Image.open( self.path + str(self.img_list[id]))
            X[i,] = img#input image

        return X
        
def final_fun_1(X):
    '''
    X: List of Image Ids
    returns: List of tuples(containing rles of predicted masks) for the corresponding Image Ids
    '''        
    predictions_list = []
    for i in range(0,len(X),320):
        batch_idcs =  list(range(i, min(len(X), i + 320)))
        if len(batch_idcs)== 320:        
            sub_batch = PredictDataGenerator(total_img_list = X,
                                          img_folder_path = path + 'data/train_images/',
                                          list_idcs = batch_idcs)
        else:
            sub_batch = PredictDataGenerator(total_img_list = X,
                                          img_folder_path = path + 'data/train_images/',
                                          list_idcs = batch_idcs,
                                          batch_size= len(batch_idcs))

        subbatch_pred_masks = model.predict(sub_batch)

        for j, idx in enumerate(batch_idcs):
            rle1 = mask2rle(subbatch_pred_masks[j,:,:,0].round().astype(int))
            rle2 = mask2rle(subbatch_pred_masks[j,:,:,1].round().astype(int))
            rle3 = mask2rle(subbatch_pred_masks[j,:,:,2].round().astype(int))
            rle4 = mask2rle(subbatch_pred_masks[j,:,:,3].round().astype(int))
            predictions_list.append((rle1, rle2, rle3, rle4))
    
    return predictions_list

In [17]:
start_time = time.time()
predicted_masks = final_fun_1(x[0:1000])
print("Total time taken for prediction: %s seconds" % (time.time() - start_time))

Total time taken for prediction: 33.25752305984497 seconds


### 11.1 Final Function - 2

In [10]:
# Implementing custom data generator for validation or evaluation
#https://towardsdatascience.com/implementing-custom-data-generators-in-keras-de56f013581c
class ValidationDataGenerator(tf.keras.utils.Sequence):
    def __init__(self,total_img_list, total_mask_list, img_folder_path, list_idcs, batch_size=32 ):
        self.batch_size = batch_size
        self.img_list = total_img_list
        self.mask_list = total_mask_list
        self.path = img_folder_path
        self.list_idcs = list_idcs
        self.indices = list(range(0,len(total_img_list)))
        self.on_epoch_end()

    def __len__(self):
        return len(self.list_idcs) // (self.batch_size)

    def __getitem__(self, index):
        index = self.index[index * self.batch_size:(index + 1) * self.batch_size]
        batch = [self.indices[k] for k in index]
        X,Y = self.__get_data(batch)
        return X,Y

    def on_epoch_end(self):
        self.index = np.arange(len(self.list_idcs))

    def __get_data(self, batch):
        X = np.empty((self.batch_size,256,1600,3),dtype=np.float32) # image place-holders
        Y = np.empty((self.batch_size,256,1600,4),dtype=np.float32)# 4 masks place-holders

        for i, id in enumerate(batch):
            img = Image.open( self.path + str(self.img_list[id]))
            X[i,] = img#input image
            for j in range(4): #looping for each class
                Y[i,:,:,j] = rle2mask(self.mask_list[id][j])#mask for each class

        return X, Y

        
def final_fun_2(X, Y):
    '''
    X: List of Image Ids
    Y: List of tuples(containing rles of actual/ground-truth masks) for the corresponding Image Ids
    returns: dice-coeeficient calculated based on the the predictions
    '''
       
    validtn_batches = ValidationDataGenerator(total_img_list = x,
                                          total_mask_list = y,
                                          img_folder_path = path + 'data/train_images/',
                                          list_idcs = list(range(0,len(x))))
    loss, dice_coefficient = model.evaluate(validtn_batches,verbose=None)
    return dice_coefficient


In [11]:
start_time = time.time()
dice_coeff = final_fun_2(x, y)
print('Dice Coefficient for validation data: ', dice_coeff)
print("Total time taken for metric computation: %s seconds" % (time.time() - start_time))

Dice Coefficient for validation data:  0.5786705017089844
Total time taken for metric computation: 468.059677362442 seconds


In [12]:
start_time = time.time()
dice_coeff = final_fun_2(x, y)
print('Dice Coefficient for validation data: ', dice_coeff)
print("Total time taken for metric computation: %s seconds" % (time.time() - start_time))


Dice Coefficient for validation data:  0.5786705017089844
Total time taken for metric computation: 33.57013773918152 seconds
