In [1]:
## -- UW-Madison GI Tract Image Segmentation - MobileNetv2 + Data Augmentation + 2.5d version
## -- Authors: Sergio Dominguez Rodriguez & Alberto Fernandez Hernandez
## -- Objective: Second Model -> UNet

# [UW-Madison GI Tract Image Segmentation - Notebook for train phase: MobileNetV2](https://www.kaggle.com/competitions/uw-madison-gi-tract-image-segmentation/)
> Track healthy organs in medical scans to improve cancer treatment

<img src="https://storage.googleapis.com/kaggle-competitions/kaggle/27923/logos/header.png?t=2021-06-02-20-30-25">

# 🛠 Install Libraries

In [2]:
!pip install -q segmentation-models-pytorch
!pip install -qU wandb

# 📚 Import Libraries 

In [3]:
# -- Libraries
from   matplotlib              import pyplot as plt
from   sklearn.model_selection import StratifiedKFold, KFold, StratifiedGroupKFold
from   sklearn.model_selection import train_test_split
from   skimage                 import color
from   matplotlib.patches      import Rectangle
from   tqdm                    import tqdm
import matplotlib.gridspec     as gridspec
import matplotlib.patches      as mpatches
import segmentation_models_pytorch as smp
import tqdm.notebook           as tq
import matplotlib              as mpl
import numpy                   as np
import pandas                  as pd
import torch.nn                as nn
import albumentations          as A
import itertools
import warnings
import wandb
import random
import torch
import glob
import math
import gc
import cv2
import ast
import re
import os

warnings.filterwarnings('ignore')
plt.style.use('ggplot')

# -- Constants
ROOT_PATH    = '../input/uw-madison-gi-tract-image-segmentation/'
TRAIN_PATH   = glob.glob(ROOT_PATH + '/train/*')
TEST_PATH    = glob.glob(ROOT_PATH + '/test/')
MASKS_PATH   = glob.glob('../input/uwmgi-mask-dataset/np/uw-madison-gi-tract-image-segmentation/train/*/*/*/*.npy')
MODEL_NAME   = '03_uw_madison_seg_manet_25d_bce_dice_loss_error_plus_data_aug.pth'
ENCODER         = 'efficientnet-b1'
ENCODER_WEIGHTS = 'imagenet'
ACTIVATION      = 'sigmoid'
IMG_SIZE     = (224, 224)
LOSS_IMPROVE = 1e-04
PATIENT      = 6
RANDOM_STATE = 1234
NUM_WORKERS  = 4
BATCH_SIZE   = 32
EPOCHS       = 50

# ⭐ WandB: Weights & Biases

Weights & Biases (W&B) is MLOps platform for tracking our experiemnts. We can use it to Build better models faster with experiment tracking, dataset versioning, and model management. Some of the cool features of W&B:

* Track, compare, and visualize ML experiments

* Get live metrics, terminal logs, and system stats streamed to the centralized dashboard.

* Explain how your model works, show graphs of how model versions improved, discuss bugs, and demonstrate progress towards milestones.

In [4]:
# -- wandb connection
try:
    wandb.login(key="6e9dd2f54a703008450bc4e8c31d96dc6ef9d6e8")
    anonymous = None
except:
    anonymous = "must"
    print('To use your W&B account,\nGo to Add-ons -> Secrets and provide your W&B access token. Use the Label name as WANDB. \nGet your W&B access token from here: https://wandb.ai/authorize')

# 🌱 Set seed for reproducibility

In [5]:
# To ensure reproducibility
np.random.seed(RANDOM_STATE)
random.seed(RANDOM_STATE)
torch.manual_seed(RANDOM_STATE)
torch.cuda.manual_seed(RANDOM_STATE)
# When running on the CuDNN backend, two further options must be set
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
# Set a fixed value for the hash seed
os.environ['PYTHONHASHSEED'] = str(RANDOM_STATE)
print('> SEEDING DONE')

# Utils

In [6]:
def get_metadata(row):
    data = row['id'].split('_')
    case = int(data[0].replace('case',''))
    day = int(data[1].replace('day',''))
    slice_ = int(data[-1])
    row['case'] = case
    row['day'] = day
    row['slice'] = slice_
    return row

def path2info(row):
    path = row['image_path']
    data = path.split('/')
    slice_ = int(data[-1].split('_')[1])
    case = int(data[-3].split('_')[0].replace('case',''))
    day = int(data[-3].split('_')[1].replace('day',''))
    width = int(data[-1].split('_')[2])
    height = int(data[-1].split('_')[3])
    row['height'] = height
    row['width'] = width
    row['case'] = case
    row['day'] = day
    row['slice'] = slice_
    return row

In [7]:
# ref: https://www.kaggle.com/paulorzp/run-length-encode-and-decode
def rle_decode(mask_rle, shape):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)  # Needed to align to RLE direction


# ref.: https://www.kaggle.com/stainsby/fast-tested-rle
def rle_encode(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

* First label: __Large bowel__
* Second label: __Small bowel__
* Third label: __Stomach__

In [8]:
# -- Read .csv train dataframe
train_df = pd.read_csv('../input/uwmgi-mask-dataset/train.csv')
train_df = train_df[~(train_df['id'].str.contains('case7_day0')) & \
                    ~(train_df['id'].str.contains('case81_day30'))]
train_df['segmentation'] = train_df.segmentation.fillna('')
train_df['rle_len']      = train_df.segmentation.map(len)
train_df['mask_path']    = train_df.mask_path.str.replace('/png/','/np').str.replace('.png','.npy')

train_df_2 = train_df.groupby(['id'])['segmentation'].agg(list).to_frame().reset_index()
train_df_2 = train_df_2.merge(train_df.groupby(['id'])['rle_len'].agg(sum).to_frame().reset_index())

# -- First label: Large bowel
# -- Second label: Small bowel
# -- Third label: Stomach
train_df_prepared = train_df.drop(columns=['segmentation', 'class', 'rle_len'])
train_df_prepared = train_df_prepared.groupby(['id']).head(1).reset_index(drop=True)
train_df_prepared = train_df_prepared.merge(train_df_2, on=['id'])
train_df_prepared['empty'] = (train_df_prepared.rle_len==0) # empty masks
train_df_prepared['segmentation'] = train_df_prepared['segmentation'].apply(lambda x: '_'.join([label for i, label \
                                                                              in enumerate(['LargeBowel', 'SmallBowel', 'Stomach'])\
                                                                              if x[i] != '']))
train_df_prepared['segmentation'] = train_df_prepared['segmentation'].apply(lambda x: 'Empty' if x == '' else x)
channels=3
stride=1
for i in range(channels):
    train_df_prepared[f'image_path_{i:02}'] = train_df_prepared.groupby(['case','day'])['image_path'].shift(-i*stride).fillna(method="ffill")
train_df_prepared['image_paths'] = train_df_prepared[[f'image_path_{i:02d}' for i in range(channels)]].values.tolist()
train_df_prepared.head()

## Create folders

In [9]:
N_FOLD = 5
skf = StratifiedGroupKFold(n_splits=N_FOLD, shuffle=True, random_state=RANDOM_STATE)
for fold, (train_idx, val_idx) in enumerate(skf.split(train_df_prepared, train_df_prepared['empty'], groups = train_df_prepared["case"])):
    train_df_prepared.loc[val_idx, 'fold'] = fold
display(train_df_prepared.groupby(['fold','empty'])['id'].count())

## Add extra Data Augmentation

In [10]:
data_transforms = {
    "train": A.Compose([
        A.HorizontalFlip(p=0.2),
        A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=25, p=0.4),
        A.RandomCrop(height=round(IMG_SIZE[0] * 0.85), width=round(IMG_SIZE[1] * 0.85), always_apply=True),
        A.Blur(blur_limit=3, p=0.1),
        A.GaussNoise(var_limit=0.001, p=0.1),
        A.Resize(*IMG_SIZE, interpolation=cv2.INTER_NEAREST),
        A.CoarseDropout(max_holes=8, max_height=IMG_SIZE[0]//20, max_width=IMG_SIZE[1]//20,
                        min_holes=1, fill_value=0, mask_fill_value=0, p=0.1)
    ]),
    "train_resize": A.Compose([
        A.Resize(*IMG_SIZE, interpolation=cv2.INTER_NEAREST)
    ], p=1.0),
    "valid_resize": A.Compose([
        A.Resize(*IMG_SIZE, interpolation=cv2.INTER_NEAREST)
    ], p=1.0),
    "valid": A.Compose([
        A.CenterCrop(height=round(IMG_SIZE[0] * 0.85), width=round(IMG_SIZE[1] * 0.85)),
        A.Resize(*IMG_SIZE, interpolation=cv2.INTER_NEAREST)
    ], p=1.0)
}

## Custom Data Loader

In [11]:
def show_img(img, mask=None):
    plt.imshow(img, cmap='bone')
    
    if mask is not None:
        plt.imshow(mask, alpha=0.5)
        handles = [Rectangle((0,0),1,1, color=_c) for _c in [(0.667,0.0,0.0), 
                                                             (0.0,0.667,0.0), 
                                                             (0.0,0.0,0.667)]]
        labels = [ "Large Bowel", "Small Bowel", "Stomach"]
        plt.legend(handles,labels)
    plt.axis('off')

In [38]:
class TractImageDataset(torch.utils.data.Dataset):
    def __init__(self, df, label=True, transforms=None, train=None):
        self.df         = df
        self.label      = label
        self.img_paths  = df['image_paths']
        self.msk_paths  = df['mask_path']
        self.transforms = transforms
        self.train      = train
        
    def __len__(self):
        return len(self.df)
    
    def __load_img(self, path):
        img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
        img = img.astype('float32') # original is uint16
        return data_transforms['train_resize'](image=img)['image']
    
    def __adjust_gamma(self, image, gamma):
        image = ((image - np.min(image)) * 255 / (np.max(image) - np.min(image))).astype('uint8')
        if not gamma:
            return image
        else:
            invGamma = 1.0 / gamma
            table = np.array([((i / 255.0) ** invGamma) * 255
                for i in np.arange(0, 256)]).astype("uint8")
            # apply gamma correction using the lookup table
            return cv2.LUT(image, table)
    
    def __load_msk(self, path, transform_type):
        msk = np.load(path)
        msk = msk.astype('float32')
        msk = msk / 255.0
        return data_transforms['train_resize'](image=np.zeros((*IMG_SIZE, 1), dtype=np.uint16), mask=msk)['mask']

    def __getitem__(self, index):
        img_paths = self.df.image_paths[index]
        imgs = np.zeros((*IMG_SIZE, len(img_paths)), dtype=np.uint16)
        for i, img_path in enumerate(img_paths):
            img = self.__load_img(img_path)
            imgs[..., i] = self.__adjust_gamma(img, gamma=2)
        if self.label:
            msk_path = self.msk_paths[index]
            if self.train:
                msk = self.__load_msk(msk_path, transform_type='train_resize')
            else:
                msk = self.__load_msk(msk_path, transform_type='valid_resize')
            if self.train:
                data = self.transforms['train'](image=imgs, mask=msk)
                imgs = data['image']
                msk  = data['mask']
            imgs = imgs / np.max(imgs, axis=(0,1))
            imgs = np.transpose(imgs, (2, 0, 1))
            msk  = np.transpose(msk, (2, 0, 1))
            return torch.FloatTensor(imgs), torch.FloatTensor(msk)
        else:
            if self.train:
                data = self.transforms(image=imgs)
                imgs = data['image']
            return torch.FloatTensor(imgs)

## Plot images + masks

In [13]:
def show_image_plus_masks(img, masks):
    img   = img.squeeze(dim=0)
    masks = masks.squeeze(dim=0)
    img   = torch.moveaxis(img, 0, -1)
    fig   = plt.figure(figsize=(10, 5))
    gs    = gridspec.GridSpec(nrows=1, ncols=2)

    ax0 = fig.add_subplot(gs[0, 0])
    im  = ax0.imshow(img, cmap='bone')
    ax0.set_title("Image", fontsize=15, weight='bold', y=1.02)

    ax1 = fig.add_subplot(gs[0, 1])
    ax1.set_title("Mask", fontsize=15, weight='bold', y=1.02)

    colors1 = ['yellow']
    colors2 = ['green']
    colors3 = ['red']

    cmap1 = mpl.colors.ListedColormap(colors1)
    cmap2 = mpl.colors.ListedColormap(colors2)
    cmap3 = mpl.colors.ListedColormap(colors3)

    l0 = ax1.imshow(img, cmap='bone')
    l1 = ax1.imshow(np.ma.masked_where(masks[0]== 0,  masks[0]),cmap=cmap1, alpha=0.3)
    l2 = ax1.imshow(np.ma.masked_where(masks[1]== 0,  masks[1]),cmap=cmap2, alpha=0.3)
    l3 = ax1.imshow(np.ma.masked_where(masks[2]== 0,  masks[2]),cmap=cmap3, alpha=0.3)

    _ = [ax.set_axis_off() for ax in [ax0,ax1]]

    colors = [im.cmap(im.norm(1)) for im in [l1,l2, l3]]
    labels = ["Large Bowel", "Small Bowel", "Stomach"]
    patches = [ mpatches.Patch(color=colors[i], label=f"{labels[i]}") for i in range(len(labels))]

    plt.legend(handles=patches, bbox_to_anchor=(1.1, 0.65), loc=2, borderaxespad=0.4,fontsize = 14,
               title='Mask Labels', title_fontsize=14, edgecolor="black",  facecolor='#c5c6c7')
    plt.suptitle("", fontsize=20, weight='bold')
    plt.show()

# Data Loaders

__Beforehand, let's split DataFrame into train and validation__

In [14]:
val_df_prepared   = train_df_prepared[train_df_prepared['fold'] == 0]
train_df_prepared = train_df_prepared[train_df_prepared['fold'] != 0]

In [15]:
train_df_prepared.segmentation.value_counts()

In [16]:
val_df_prepared.segmentation.value_counts()

In [17]:
train_df_prepared.shape

In [18]:
val_df_prepared.shape

In [19]:
train_df_prepared.reset_index(drop=True, inplace=True)
val_df_prepared.reset_index(drop=True, inplace=True)

In [20]:
# - Train and val loader
train_dataset = TractImageDataset(train_df_prepared, label=True, transforms=data_transforms, train=True)
train_loader  = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE,
                                            num_workers=NUM_WORKERS, shuffle=True, drop_last=False)
val_dataset   = TractImageDataset(val_df_prepared, label=True, transforms=data_transforms, train=False)
val_loader    = torch.utils.data.DataLoader(val_dataset, batch_size=BATCH_SIZE,
                                            num_workers=NUM_WORKERS, shuffle=False, drop_last=False)

In [21]:
print("TRAIN DATASET SAMPLE")
for img_batch, mask_batch in train_loader:
    cont = 0
    for img, masks in zip(img_batch, mask_batch):
        show_image_plus_masks(img, masks)
        cont+=1
        if cont == 10:
            break
    break

# Create model

![](https://miro.medium.com/max/640/1*ZS7xxm9jkGIcRnH3QKs02g.gif)

In [22]:
# -- Create segmentation model with pretrained encoder
model = smp.Unet(
    encoder_name=ENCODER,
    in_channels=3,
    encoder_weights=ENCODER_WEIGHTS, 
    classes=3, 
    activation=None
)

# Loss functions

In [23]:
# Define error criterion and optimize functions
dice_loss = smp.losses.DiceLoss(mode='multilabel')
bce_loss  = smp.losses.SoftBCEWithLogitsLoss()
def criterion(y_pred, y_true):
    return 0.5 * dice_loss(y_pred, y_true) + 0.5 * bce_loss(y_pred, y_true)

optimizer = torch.optim.Adam(model.parameters(), lr = 1e-03, weight_decay=0.001)
# Define callbacks
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=2, factor=0.3, verbose=True)

# Accuracy: Intersection over Union or IoU

In [24]:
def dice_coef(y_true, y_pred, thr=0.5, dim=(2,3), epsilon=0.001):
    y_true = y_true.to(torch.float32)
    y_pred = (y_pred>thr).to(torch.float32)
    inter = (y_true*y_pred).sum(dim=dim)
    den = y_true.sum(dim=dim) + y_pred.sum(dim=dim)
    dice = ((2*inter+epsilon)/(den+epsilon)).mean(dim=(1,0))
    return dice

In [25]:
dice_coef(torch.round(torch.sigmoid(torch.rand(10,3,256,256))).int(), 
            torch.round(torch.sigmoid(torch.rand(10,3,256,256))).int())

# Define train and validation functions

In [26]:
# Define train function
def train(train_data, model, criterion):
    print('Training...')
    model.train()
    counter = 0
    correct = 0
    train_running_loss  = 0.0
    train_running_dice  = 0.0
    total               = 0.0
    pbar = tqdm(train_data)
    for input_data, label in pbar:
        # Switch to GPU if available
        if torch.cuda.is_available():
            input_data, label = input_data.cuda(), label.cuda()
        optimizer.zero_grad()
        outputs = model(input_data)

        # Apply sigmoid activation to get all the outputs between 0 and 1
        outputs_sig  = nn.Sigmoid()(outputs)
        predicted    = torch.round(outputs_sig)
        mean_dice    = dice_coef(label, predicted)
        
        # Loss
        loss = criterion(outputs.cpu(), label.cpu())
        loss_item = float(loss.detach().item())
        counter += 1
        pbar.set_description("Loss: {} - Dice coef: {}".format(loss_item, mean_dice.detach().cpu()))
        
        train_running_loss += loss_item
        train_running_dice += mean_dice.detach().cpu()
        
        # Backpropagation
        loss.backward()
        
        # Update optimizer parameters
        optimizer.step()
        
    train_loss = train_running_loss  / counter
    train_dice  = train_running_dice / counter
    torch.cuda.empty_cache()
    gc.collect()
    return train_loss, train_dice

In [27]:
# Define val function
def val(val_data, model, criterion):
    print('Validating...')
    model.eval()
    counter = 0
    val_running_loss = 0.0
    val_running_dice = 0.0
    val_dice         = 0.0
    pbar = tqdm(val_data)
    with torch.no_grad():
        for input_data, label in pbar:
            # Again, switch to GPU if available
            if torch.cuda.is_available():
                input_data, label = input_data.cuda(), label.cuda()
            outputs  = model(input_data)

            # Apply sigmoid activation to get all the outputs between 0 and 1
            outputs_sig = nn.Sigmoid()(outputs)
            predicted   = torch.round(outputs_sig)
            mean_dice    = dice_coef(label, predicted)

            # Loss
            counter += 1
            loss_item = criterion(outputs.cpu(), label.cpu()).detach().item()
            pbar.set_description("Loss: {} - Dice coef: {}".format(loss_item, mean_dice.detach().cpu()))
            
            val_running_loss += loss_item
            val_running_dice += mean_dice.detach().cpu()
        
    val_loss = val_running_loss  / counter
    val_dice  = val_running_dice / counter
    torch.cuda.empty_cache()
    gc.collect()
    return val_loss, val_dice

In [None]:
run = wandb.init(project='uw-maddison-gi-tract', 
                 name='unet-2022-07-07', 
                 group='unet-224x224-model')

In [None]:
###### -- Start the training and validation
train_loss     = []
train_accuracy = []
valid_loss     = []
valid_accuracy = []
total_train_predictions = []
total_val_predictions   = []

best_val_loss = float('inf')
best_val_auc  = float(0)

patient_counter = 0

if torch.cuda.is_available():
    model = model.cuda()

# -- To automatically log gradients
wandb.watch(model, log_freq=100)

for epoch in range(EPOCHS):
    print(f"Epoch {epoch+1} of {EPOCHS}")
    train_epoch_loss, train_epoch_accuracy = train(
        train_loader, model, criterion
    )
    val_epoch_loss, val_epoch_accuracy = val(
        val_loader, model, criterion
    )
    
    # Call ReduceLR Callback (after validation step)
    scheduler.step(val_epoch_loss)
    
    if best_val_loss - val_epoch_loss >= LOSS_IMPROVE:
        print("Val mean loss has improved. From {} to {}. Saving model...".format(best_val_loss, val_epoch_loss))
        best_val_loss   = val_epoch_loss
        patient_counter = 0
        torch.save(model, MODEL_NAME)
        wandb.save(MODEL_NAME)
    else:
        print("Val mean loss did not improve")
        patient_counter+=1
        if patient_counter == PATIENT:
            break
            
    print(f"Train mean Dice: {train_epoch_accuracy:.4f}")
    print(f'Val mean Dice: {val_epoch_accuracy:.4f}')
    print(f"Train mean Loss: {train_epoch_loss:.4f}")
    print(f'Val mean Loss: {val_epoch_loss:.4f}')
    print("-"*80)
            
    wandb.log({"Train Loss": train_epoch_loss, 
               "Valid Loss": val_epoch_loss,
               "Train Dice": train_epoch_accuracy,
               "Valid Dice": val_epoch_accuracy,
               "LR":optimizer.param_groups[0]['lr']})
        
    train_loss.append(train_epoch_loss)
    train_accuracy.append(train_epoch_accuracy)
    valid_loss.append(val_epoch_loss)
    valid_accuracy.append(val_epoch_accuracy)

# Testing some validation images

__FPN architecture__

In [29]:
model = torch.load('../input/fpn-2022-06-18-efficientnet-b1/03_uw_madison_seg_fpn_25d_bce_dice_loss_error_plus_data_aug.pth')
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print("Total number of parameters: {}".format(params))

__UNet architecture__

In [37]:
model = torch.load('../input/efficientnet-b1-25d-gamma-contrast-stride-1/03_uw_madison_seg_unet_efficientnetb1_25d_dice_loss_error_plus_data_aug (1).pth')
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print("Total number of parameters: {}".format(params))

### Dice coef

![Dice loss formula](https://miro.medium.com/max/1400/1*Z1hkDvyhFBogT9EkzVkX2A.png)

In [30]:
def dice_coef_by_label(y_true, y_pred, dim=(1,2), epsilon=0.001):
    dice_score_list = [0.,0.,0.]
    for label in range(0,3):
        inter = (y_true[:,label,...]*y_pred[:,label,...]).sum(dim=dim)
        den   = y_true[:,label,...].sum(dim=dim) + y_pred[:,label,...].sum(dim=dim)
        dice  = ((2*inter+epsilon)/(den+epsilon)).mean(dim=(0))
        dice_score_list[label] += dice
    return dice_score_list

def competition_score(y_true, y_pred):
    y_true = torch.round(y_true).int() # -- E.g (32, 3, 224, 224)
    y_pred = torch.round(y_pred).int() # -- E.g (32, 3, 224, 224)

    dice_score = dice_coef(y_true, y_pred)
    return dice_score

In [31]:
# Define val function
def test(test_data, model, criterion):
    print('Testing...')
    model.eval()
    counter = 0
    test_competition_score = 0.0
    total_dice_score_list  = [0.,0.,0.]
    total_dice_score_list_acc = []
    images_list = []
    pred_list = []
    real_mask_list = []
    pbar = tqdm(test_data)
    dice_coef_list = []
    original_img = []
    
    kernel = np.ones((8,8),np.uint8)
    if torch.cuda.is_available():
        model = model.cuda()
    with torch.no_grad():
        for input_data, label in pbar:
            # Again, switch to GPU if available
            if torch.cuda.is_available():
                input_data, label = input_data.cuda(), label.cuda()
            outputs  = model(input_data)

            # Apply sigmoid activation to get all the outputs between 0 and 1
            outputs_sig = nn.Sigmoid()(outputs)
            predicted   = torch.round(outputs_sig)
            predicted_np= np.zeros(((1, 3, 224, 224)))
            for i in range(0,3):
                predicted_np[0,i,...] = cv2.morphologyEx(predicted[0,i,...].cpu().numpy(), cv2.MORPH_CLOSE, kernel)
            predicted = torch.from_numpy(predicted_np).cuda()
            # Competition score
            competition_score = criterion(label.cpu(), predicted.cpu()).detach().cpu()
            
            if competition_score < 0.1:
                original_img.append(input_data.cpu())
                pred_list.append(predicted.cpu())
                real_mask_list.append(label.cpu())
            
            dice_coef = dice_coef_by_label(label.cpu(), predicted.cpu())
            dice_coef_list.append(dice_coef)
            total_dice_score_list = np.add(total_dice_score_list, dice_coef)
            #if torch.cuda.is_available():
            #    pbar.set_description("Dice score: {}".format(competition_score))
            counter += 1
        
    avg_dice = total_dice_score_list / counter
    torch.cuda.empty_cache()
    gc.collect()
    return avg_dice, pred_list, real_mask_list, original_img, dice_coef_list

## Average Dice score by label: large bowel, small bowel and stomach

### Validation

__Non-empty masks__

In [39]:
# -- Change BATCH SIZE to 1
val_dataset   = TractImageDataset(val_df_prepared[~val_df_prepared['empty']].reset_index(drop=True), 
                                  label=True, transforms=data_transforms, train=False)
val_loader    = torch.utils.data.DataLoader(val_dataset, batch_size=1,
                                            num_workers=NUM_WORKERS, shuffle=False, drop_last=False)

__FPN__

In [33]:
avg_dice, pred_list, real_mask_list, original_img, dice_coef_list = test(val_loader, model, competition_score)
for label, dice in zip(['Large bowel', 'Small bowel', 'Stomach'], avg_dice):
    print("Label: {} - Average Dice score: {}".format(label, dice))

__UNet__

In [40]:
avg_dice, pred_list, real_mask_list, original_img, dice_coef_list = test(val_loader, model, competition_score)
for label, dice in zip(['Large bowel', 'Small bowel', 'Stomach'], avg_dice):
    print("Label: {} - Average Dice score: {}".format(label, dice))

__Empty masks__

In [41]:
# -- Change BATCH SIZE to 1
val_dataset   = TractImageDataset(val_df_prepared[val_df_prepared['empty']].reset_index(drop=True), 
                                  label=True, transforms=data_transforms, train=False)
val_loader    = torch.utils.data.DataLoader(val_dataset, batch_size=1,
                                            num_workers=NUM_WORKERS, shuffle=False, drop_last=False)

__FPN__

In [35]:
avg_dice, pred_list, real_mask_list, original_img, dice_coef_list = test(val_loader, model, competition_score)
for label, dice in zip(['Large bowel', 'Small bowel', 'Stomach'], avg_dice):
    print("Label: {} - Average Dice score: {}".format(label, dice))

__UNet__

In [42]:
avg_dice, pred_list, real_mask_list, original_img, dice_coef_list = test(val_loader, model, competition_score)
for label, dice in zip(['Large bowel', 'Small bowel', 'Stomach'], avg_dice):
    print("Label: {} - Average Dice score: {}".format(label, dice))

In [43]:
torch.cuda.empty_cache()

# Export patient sample to DICOM

In [None]:
patient_sample_files = glob.glob('../input/uw-madison-gi-tract-image-segmentation/train/case123/case123_day20/scans/*')
patient_sample_files.sort()
patient_sample_files
image_sample = np.array([cv2.imread(file, cv2.IMREAD_UNCHANGED) for file in patient_sample_files])

In [None]:
!mkdir sample

In [None]:
import pydicom
from pydicom.dataset import Dataset, FileDataset
from pydicom.uid import ExplicitVRLittleEndian
import pydicom._storage_sopclass_uids

for i, image2d in tqdm(enumerate(image_sample)):
    # Populate required values for file meta information

    meta = pydicom.Dataset()
    meta.MediaStorageSOPClassUID = pydicom._storage_sopclass_uids.MRImageStorage
    meta.MediaStorageSOPInstanceUID = pydicom.uid.generate_uid()
    meta.TransferSyntaxUID = pydicom.uid.ExplicitVRLittleEndian  

    ds = Dataset()
    ds.file_meta = meta

    ds.is_little_endian = True
    ds.is_implicit_VR = False

    ds.SOPClassUID = pydicom._storage_sopclass_uids.MRImageStorage
    ds.PatientName = "Test^GI^Tract^Image^Segmentation"
    ds.PatientID = "123456"

    ds.Modality = "MR"
    ds.SeriesInstanceUID = pydicom.uid.generate_uid()
    ds.StudyInstanceUID = pydicom.uid.generate_uid()
    ds.FrameOfReferenceUID = pydicom.uid.generate_uid()

    ds.BitsStored = 16
    ds.BitsAllocated = 16
    ds.SamplesPerPixel = 1
    ds.HighBit = 15

    ds.ImagesInAcquisition = "116"

    ds.Rows = image2d.shape[0]
    ds.Columns = image2d.shape[1]
    ds.InstanceNumber = 1

    ds.ImagePositionPatient = r"0\0\1"
    ds.ImageOrientationPatient = r"1\0\0\0\-1\0"
    ds.ImageType = r"ORIGINAL\PRIMARY\AXIAL"

    ds.RescaleIntercept = "0"
    ds.RescaleSlope = "1"
    ds.PixelSpacing = r"1\1"
    ds.PhotometricInterpretation = "MONOCHROME2"
    ds.PixelRepresentation = 1

    pydicom.dataset.validate_file_meta(ds.file_meta, enforce_standard=True)
    ds.PixelData = image2d.tobytes()
    ds.save_as(r"./sample/sample_slice_{}.dcm".format(i))

In [None]:
!zip -r sample.zip ./sample