## Import Libraries

In [1]:
from keras.models import load_model
from keras.callbacks import Callback, ModelCheckpoint
from keras.losses import binary_crossentropy
from keras.layers.merge import concatenate
from keras.layers.pooling import MaxPooling2D
from tensorflow.keras.metrics import *
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import *
from tensorflow.keras import *
from tensorflow import keras
import tensorflow as tf
from PIL import Image
from pathlib import Path
import matplotlib as mpl
import matplotlib.patches as mpatches
import matplotlib.gridspec as gridspec
import seaborn as sns
import matplotlib.pyplot as plt
from glob import glob
from sklearn.model_selection import train_test_split
import pandas as pd
import itertools
from itertools import groupby
import random
import cv2
from tqdm.notebook import tqdm
from tqdm import tqdm
import os
from skimage import measure

import numpy as np
import pandas as pd 

import warnings
warnings.filterwarnings("ignore")
# from google.colab.patches import cv2_imshow

## Helper function

In [2]:
# ref: https://www.kaggle.com/paulorzp/run-length-encode-and-decode
# modified from: https://www.kaggle.com/inversion/run-length-decoding-quick-start
def rle_decode(mask_rle, shape, color=1):
    """ TBD
    
    Args:
        mask_rle (str): run-length as string formated (start length)
        shape (tuple of ints): (height,width) of array to return 
    
    Returns: 
        Mask (np.array)
            - 1 indicating mask
            - 0 indicating background

    """
    # Split the string by space, then convert it into a integer array
    s = np.array(mask_rle.split(), dtype=int)

    # Every even value is the start, every odd value is the "run" length
    starts = s[0::2] - 1
    lengths = s[1::2]
    ends = starts + lengths

    # The image image is actually flattened since RLE is a 1D "run"
    if len(shape)==3:
        h, w, d = shape
        img = np.zeros((h * w, d), dtype=np.float32)
    else:
        h, w = shape
        img = np.zeros((h * w,), dtype=np.float32)

    # The color here is actually just any integer you want!
    for lo, hi in zip(starts, ends):
        img[lo : hi] = color
        
    # Don't forget to change the image back to the original shape
    return img.reshape(shape)


def rle_encode(arr):
    arr = arr.reshape(-1)
    indexes = (np.where(arr[1:] != arr[:-1])[0])+1
    final = []
    one = indexes[0:-1:2]
    two = indexes[1::2]
    for start,end in zip(one, two):
        final.append(start)
        final.append(end-start)
    return ' '.join(map(str,final))

def open_gray16(_path, normalize=True, to_rgb=False):
    """ Helper to open files """
    if normalize:
        if to_rgb:
            return np.tile(np.expand_dims(cv2.imread(_path, cv2.IMREAD_ANYDEPTH)/65535., axis=-1), 3)
        else:
            return cv2.imread(_path, cv2.IMREAD_ANYDEPTH)/65535.
    else:
        if to_rgb:
            return np.tile(np.expand_dims(cv2.imread(_path, cv2.IMREAD_ANYDEPTH), axis=-1), 3)
        else:
            return cv2.imread(_path, cv2.IMREAD_ANYDEPTH)

#### Metrics

In [3]:
def iou_coef(y_true, y_pred, smooth=1):
  intersection = K.sum(K.abs(y_true * y_pred), axis=[1,2,3])
  union = K.sum(y_true,[1,2,3])+K.sum(y_pred,[1,2,3])-intersection
  iou = K.mean((intersection + smooth) / (union + smooth), axis=0)

  return iou
def dice_coef(y_true, y_pred, smooth=1):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

def mean_iou(y_true, y_pred):
    yt0 = y_true[:,:,:,0]
    yp0 = K.cast(y_pred[:,:,:,0] > 0.5, 'float32')
    inter = tf.count_nonzero(tf.logical_and(tf.equal(yt0, 1), tf.equal(yp0, 1)))
    union = tf.count_nonzero(tf.add(yt0, yp0))
    iou = tf.where(tf.equal(union, 0), 1., tf.cast(inter/union, 'float32'))
    return iou

def dice_loss(y_true, y_pred):
    smooth = 1.
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = y_true_f * y_pred_f
    score = (2. * K.sum(intersection) + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
    return 1. - score

def bce_dice_loss(y_true, y_pred):
    return binary_crossentropy(tf.cast(y_true, tf.float32), y_pred) + 0.5 * dice_loss(tf.cast(y_true, tf.float32), y_pred)    

## Load model

In [4]:
from tensorflow import keras

class FixedDropout(keras.layers.Dropout):
    def _get_noise_shape(self, inputs):
        if self.noise_shape is None:
            return self.noise_shape

        symbolic_shape = K.shape(inputs)
        noise_shape = [symbolic_shape[axis] if shape is None else shape
                       for axis, shape in enumerate(self.noise_shape)]
        return tuple(noise_shape)
    
    
custom_objects = {
#     'FixedDropout':FixedDropout,
    'dice_coef': dice_coef,
    'iou_coef': iou_coef,
    'bce_dice_loss': bce_dice_loss
}


model = keras.models.load_model('../input/unet-model/file/file/model_32', custom_objects=custom_objects)

2022-07-03 12:30:40.598033: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


## DATA

In [5]:
df1 = pd.read_csv('../input/uw-madison-gi-tract-image-segmentation/train.csv')
print(df1.head())

                         id        class segmentation
0  case123_day20_slice_0001  large_bowel          NaN
1  case123_day20_slice_0001  small_bowel          NaN
2  case123_day20_slice_0001      stomach          NaN
3  case123_day20_slice_0002  large_bowel          NaN
4  case123_day20_slice_0002  small_bowel          NaN


In [6]:
test_df = pd.read_csv('../input/uw-madison-gi-tract-image-segmentation/sample_submission.csv')

if len(test_df)==0:
    DEBUG=True
    test_df = df1.iloc[:300,:]
    test_df = test_df[["id", "class"]]
    test_df["predicted"] = ""
   
else:
    DEBUG=False

submission1 = test_df.copy()
print(test_df.head())


                         id        class predicted
0  case123_day20_slice_0001  large_bowel          
1  case123_day20_slice_0001  small_bowel          
2  case123_day20_slice_0001      stomach          
3  case123_day20_slice_0002  large_bowel          
4  case123_day20_slice_0002  small_bowel          


In [7]:
submission1.head()

Unnamed: 0,id,class,predicted
0,case123_day20_slice_0001,large_bowel,
1,case123_day20_slice_0001,small_bowel,
2,case123_day20_slice_0001,stomach,
3,case123_day20_slice_0002,large_bowel,
4,case123_day20_slice_0002,small_bowel,


In [8]:
# Metadata
def preprocessing(df, subset="train"):
    df["case"] = df["id"].apply(lambda x: int(x.split("_")[0].replace("case", "")))
    df["day"] = df["id"].apply(lambda x: int(x.split("_")[1].replace("day", "")))
    df["slice"] = df["id"].apply(lambda x: x.split("_")[3])
    
    if (subset=="train") or (DEBUG):
        DIR="../input/uw-madison-gi-tract-image-segmentation/train"
        #DIR="/content/drive/MyDrive/Kaggle/uw-madison-gi-tract-image-segmentation/train"
    else:
        DIR="../input/uw-madison-gi-tract-image-segmentation/test"
        #DIR="/content/drive/MyDrive/Kaggle/uw-madison-gi-tract-image-segmentation/test"
    
    all_images = glob(os.path.join(DIR, "**", "*.png"), recursive=True)
    x = all_images[0].rsplit("/", 4)[0] 

    path_partial_list = []
    for i in range(0, df.shape[0]):
        path_partial_list.append(os.path.join(x,
                              "case"+str(df["case"].values[i]),
                              "case"+str(df["case"].values[i])+"_"+ "day"+str(df["day"].values[i]),
                              "scans",
                              "slice_"+str(df["slice"].values[i])))
    df["path_partial"] = path_partial_list
    #--------------------------------------------------------------------------
    path_partial_list = []
    for i in range(0, len(all_images)):
        path_partial_list.append(str(all_images[i].rsplit("_",4)[0]))

    tmp_df = pd.DataFrame()
    tmp_df['path_partial'] = path_partial_list
    tmp_df['path'] = all_images

    #--------------------------------------------------------------------------
    df = df.merge(tmp_df, on="path_partial").drop(columns=["path_partial"])
    #print(df['id'])
    #--------------------------------------------------------------------------
    df["filename"] = df["path"].apply(lambda x: x.split('/')[-1])
    df['unique_filename'] = df.apply(lambda row: str(row.case) + '_' + str(row.day)+'_'+str(row.filename), axis=1)

    df["width"] = df["path"].apply(lambda x: int(x[:-4].rsplit("_",4)[1]))
    df["height"] = df["path"].apply(lambda x: int(x[:-4].rsplit("_",4)[2]))
    #--------------------------------------------------------------------------
    df["px_spacing_h"] = df["path"].apply(lambda x: float(x[:-4].rsplit("_",4)[3]))
    df["px_spacing_w"] = df["path"].apply(lambda x: float(x[:-4].rsplit("_",4)[4][:4]))
    #--------------------------------------------------------------------------
    del x, path_partial_list, tmp_df
    #--------------------------------------------------------------------------
    # new_col_order = ["id", "path", "segmentation"
    #                  "height", "width", "px_spacing_h", 
    #                  "px_spacing_w",  "case",  "day", "slice",]

    # if subset!="train": 
    #   new_col_order.insert(1, "class")

    # new_col_order = [_c for _c in new_col_order if _c in df.columns]
    # df = df[new_col_order]
    return df

In [9]:
test_df = preprocessing(test_df, subset="test")
print(test_df.shape)
test_df.head()

(300, 13)


Unnamed: 0,id,class,predicted,case,day,slice,path,filename,unique_filename,width,height,px_spacing_h,px_spacing_w
0,case123_day20_slice_0001,large_bowel,,123,20,1,../input/uw-madison-gi-tract-image-segmentatio...,slice_0001_266_266_1.50_1.50.png,123_20_slice_0001_266_266_1.50_1.50.png,266,266,1.5,1.5
1,case123_day20_slice_0001,small_bowel,,123,20,1,../input/uw-madison-gi-tract-image-segmentatio...,slice_0001_266_266_1.50_1.50.png,123_20_slice_0001_266_266_1.50_1.50.png,266,266,1.5,1.5
2,case123_day20_slice_0001,stomach,,123,20,1,../input/uw-madison-gi-tract-image-segmentatio...,slice_0001_266_266_1.50_1.50.png,123_20_slice_0001_266_266_1.50_1.50.png,266,266,1.5,1.5
3,case123_day20_slice_0002,large_bowel,,123,20,2,../input/uw-madison-gi-tract-image-segmentatio...,slice_0002_266_266_1.50_1.50.png,123_20_slice_0002_266_266_1.50_1.50.png,266,266,1.5,1.5
4,case123_day20_slice_0002,small_bowel,,123,20,2,../input/uw-madison-gi-tract-image-segmentatio...,slice_0002_266_266_1.50_1.50.png,123_20_slice_0002_266_266_1.50_1.50.png,266,266,1.5,1.5


In [10]:
train_df = preprocessing(df1, subset="train")
train_df.head()

Unnamed: 0,id,class,segmentation,case,day,slice,path,filename,unique_filename,width,height,px_spacing_h,px_spacing_w
0,case123_day20_slice_0001,large_bowel,,123,20,1,../input/uw-madison-gi-tract-image-segmentatio...,slice_0001_266_266_1.50_1.50.png,123_20_slice_0001_266_266_1.50_1.50.png,266,266,1.5,1.5
1,case123_day20_slice_0001,small_bowel,,123,20,1,../input/uw-madison-gi-tract-image-segmentatio...,slice_0001_266_266_1.50_1.50.png,123_20_slice_0001_266_266_1.50_1.50.png,266,266,1.5,1.5
2,case123_day20_slice_0001,stomach,,123,20,1,../input/uw-madison-gi-tract-image-segmentatio...,slice_0001_266_266_1.50_1.50.png,123_20_slice_0001_266_266_1.50_1.50.png,266,266,1.5,1.5
3,case123_day20_slice_0002,large_bowel,,123,20,2,../input/uw-madison-gi-tract-image-segmentatio...,slice_0002_266_266_1.50_1.50.png,123_20_slice_0002_266_266_1.50_1.50.png,266,266,1.5,1.5
4,case123_day20_slice_0002,small_bowel,,123,20,2,../input/uw-madison-gi-tract-image-segmentatio...,slice_0002_266_266_1.50_1.50.png,123_20_slice_0002_266_266_1.50_1.50.png,266,266,1.5,1.5


In [11]:
# Remove misslabeled training data
train_df = train_df[(train_df['case'] != 7) | (train_df['case'] != 0)].reset_index(drop=True)
train_df = train_df[(train_df['case'] != 81) | (train_df['case'] != 30)].reset_index(drop=True)


In [12]:
def segment(df, subset="train"):
    # RESTRUCTURE  DATAFRAME
    df_out = pd.DataFrame({'id': df['id'][::3]})

    if subset == "train":
        df_out['large_bowel'] = df['segmentation'][::3].values
        df_out['small_bowel'] = df['segmentation'][1::3].values
        df_out['stomach'] = df['segmentation'][2::3].values

    df_out['path'] = df['path'][::3].values
    df_out['case'] = df['case'][::3].values
    df_out['day'] = df['day'][::3].values
    df_out['slice'] = df['slice'][::3].values
    df_out['width'] = df['width'][::3].values
    df_out['height'] = df['height'][::3].values
    df_out['px_spacing_h'] =  df['px_spacing_h'][::3].values
    df_out['px_spacing_w'] =  df['px_spacing_w'][::3].values
    df_out['filename'] = df['filename'][::3].values
    df_out['unique_filename'] = df['unique_filename'][::3].values
  # final_df = final_df.drop_duplicates(subset=["id"])

    df_out.reset_index(inplace=True,drop=True)
    df_out.fillna('',inplace=True)
    if subset=="train":
        df_out['count'] = np.sum(df_out.iloc[:,1:4]!='',axis=1).values
    
    return df_out
    
train_df = segment(train_df)

In [13]:
train_df.sample(5)

Unnamed: 0,id,large_bowel,small_bowel,stomach,path,case,day,slice,width,height,px_spacing_h,px_spacing_w,filename,unique_filename,count
15331,case154_day16_slice_0100,30913 3 31178 7 31443 10 31708 13 31973 15 321...,22491 5 22755 9 23010 3 23020 12 23274 7 23284...,,../input/uw-madison-gi-tract-image-segmentatio...,154,16,100,266,266,1.5,1.5,slice_0100_266_266_1.50_1.50.png,154_16_slice_0100_266_266_1.50_1.50.png,2
21151,case148_day20_slice_0016,,,,../input/uw-madison-gi-tract-image-segmentatio...,148,20,16,360,310,1.5,1.5,slice_0016_360_310_1.50_1.50.png,148_20_slice_0016_360_310_1.50_1.50.png,0
24579,case108_day13_slice_0020,,,,../input/uw-madison-gi-tract-image-segmentatio...,108,13,20,266,266,1.5,1.5,slice_0020_266_266_1.50_1.50.png,108_13_slice_0020_266_266_1.50_1.50.png,0
12267,case113_day12_slice_0060,39474 9 39832 14 40191 17 40550 19 40909 20 41...,,30434 9 30791 15 31149 20 31508 22 31867 24 32...,../input/uw-madison-gi-tract-image-segmentatio...,113,12,60,360,310,1.5,1.5,slice_0060_360_310_1.50_1.50.png,113_12_slice_0060_360_310_1.50_1.50.png,2
11821,case113_day19_slice_0046,,,,../input/uw-madison-gi-tract-image-segmentatio...,113,19,46,360,310,1.5,1.5,slice_0046_360_310_1.50_1.50.png,113_19_slice_0046_360_310_1.50_1.50.png,0


## Data Generator

In [14]:
BATCH_SIZE = 32
im_height = 128
im_width = 128

In [15]:

class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, df, batch_size = BATCH_SIZE, subset="train", shuffle=False):
        super().__init__()
        self.df = df
        self.shuffle = shuffle
        self.subset = subset
        self.batch_size = batch_size
        self.indexes = np.arange(len(df))
        
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.df) / self.batch_size))
    
    def on_epoch_end(self):
      # 'Updates indexes after each epoch'
      # self.indexes = np.arrange(len(self.df))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)
    
    def __getitem__(self, index):
      #  GET ONE BATCH
        X = np.empty((self.batch_size,im_height,im_width,3))
        y = np.empty((self.batch_size,im_height,im_width,3))
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
   
        ids = list(self.df['id'].iloc[indexes])
        #print(self.ids)
        for i, img_path in enumerate(self.df['path'].iloc[indexes]):
            w=self.df['width'].iloc[indexes[i]]
            h=self.df['height'].iloc[indexes[i]]
            img = self.__load_grayscale(img_path)  # shape: (128,128,1)
            X[i,] =img   # broadcast to shape: (128,128,3)
            if self.subset == 'train':
                for k,j in enumerate(["large_bowel","small_bowel","stomach"]):
                    rles = self.df[j].iloc[indexes[i]]
                    # ids = ids.append([self.df['id'].iloc[indexes[i]], self.df[j].iloc[indexes[i]]])
                    mask = rle_decode(rles, shape=(h, w, 1))
                    mask = cv2.resize(mask, (im_height,im_width))
                    y[i,:,:,k] = mask
        if self.subset == 'train':
            return X, y
        else: 
            return X, ids
        
    def __load_grayscale(self, img_path):
        img = cv2.imread(img_path, cv2.IMREAD_ANYDEPTH)
        dsize = (im_height,im_width)
        img = cv2.resize(img, dsize)
        img = img.astype(np.float32) / 255.
        img = np.expand_dims(img, axis=-1)
        return img

## Test set generation

In [16]:
val_generator = DataGenerator(test_df, batch_size = BATCH_SIZE, subset="test", shuffle=False)

In [17]:
print(len(val_generator))
print(BATCH_SIZE)

10
32


In [18]:
_,idsss = val_generator[1]
print(idsss)

['case123_day20_slice_0011', 'case123_day20_slice_0012', 'case123_day20_slice_0012', 'case123_day20_slice_0012', 'case123_day20_slice_0013', 'case123_day20_slice_0013', 'case123_day20_slice_0013', 'case123_day20_slice_0014', 'case123_day20_slice_0014', 'case123_day20_slice_0014', 'case123_day20_slice_0015', 'case123_day20_slice_0015', 'case123_day20_slice_0015', 'case123_day20_slice_0016', 'case123_day20_slice_0016', 'case123_day20_slice_0016', 'case123_day20_slice_0017', 'case123_day20_slice_0017', 'case123_day20_slice_0017', 'case123_day20_slice_0018', 'case123_day20_slice_0018', 'case123_day20_slice_0018', 'case123_day20_slice_0019', 'case123_day20_slice_0019', 'case123_day20_slice_0019', 'case123_day20_slice_0020', 'case123_day20_slice_0020', 'case123_day20_slice_0020', 'case123_day20_slice_0021', 'case123_day20_slice_0021', 'case123_day20_slice_0021', 'case123_day20_slice_0022']


## Test set predictions

In [19]:
ids=[]
classs=[]
predics=[]
num_batches=len(val_generator)
for i in range(num_batches):
    # Predict
    X, id = val_generator[i]
    preds= model.predict(X,verbose=0)     # shape: (16,128,128,3)
    
    # Rle encode
    for j in range(len(id)):

            if not(len(ids) == len(classs) and len(classs)==len(predics)):
              print('**********************************')
            if not(id[j] in ids):
                w = int(test_df['width'][test_df['id'] == id[j]].unique())
                h = int(test_df['height'][test_df['id'] == id[j]].unique())
                
                #ids.append(id[j])
                ids.extend((id[j],id[j],id[j]))
                classs.extend(('large_bowel','small_bowel','stomach'))

                for k in range(3):
                    pred_img = cv2.resize(preds[j,:,:,k], (w, h), interpolation=cv2.INTER_NEAREST) # resize probabilities to original shape
                    pred_img = (pred_img>0.5).astype(dtype='uint8')    # classify
                    predics.append(rle_encode(pred_img))

2022-07-03 12:31:52.100872: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


In [20]:
dict = {'id': ids, 'class': classs, 'predicted': predics}
submission_df = pd.DataFrame(dict)
print(submission_df.shape)

if submission1.shape[0] > 0:
    del submission1['predicted']
    submission_df = submission1.merge(submission_df, on=['id', 'class'])
    
submission_df.to_csv("submission.csv", index=False)

(300, 3)


In [21]:
submission_df.head(10)

Unnamed: 0,id,class,predicted
0,case123_day20_slice_0001,large_bowel,
1,case123_day20_slice_0001,small_bowel,
2,case123_day20_slice_0001,stomach,
3,case123_day20_slice_0002,large_bowel,
4,case123_day20_slice_0002,small_bowel,
5,case123_day20_slice_0002,stomach,
6,case123_day20_slice_0003,large_bowel,
7,case123_day20_slice_0003,small_bowel,
8,case123_day20_slice_0003,stomach,
9,case123_day20_slice_0004,large_bowel,


In [22]:
submission_df.sample(10)

Unnamed: 0,id,class,predicted
106,case123_day20_slice_0036,small_bowel,
28,case123_day20_slice_0010,small_bowel,
241,case123_day20_slice_0081,small_bowel,
158,case123_day20_slice_0053,stomach,
52,case123_day20_slice_0018,small_bowel,
6,case123_day20_slice_0003,large_bowel,
164,case123_day20_slice_0055,stomach,
65,case123_day20_slice_0022,stomach,
291,case123_day20_slice_0098,large_bowel,14788 11 15054 11 15320 25 15586 25 15792 8 15...
135,case123_day20_slice_0046,large_bowel,
