# PyTorch UNet Model for Pneumothorax Segmentation

In [1]:
# Imports
import numpy as np
import pandas as pd
from glob import glob
import pydicom
import random

# import image manipulation
from PIL import Image

# import matplotlib for visualization
from matplotlib.pyplot import imshow
import matplotlib.pyplot as plt

# Import PyTorch
import torch
from torch import nn
from torch import optim
from torch.optim import lr_scheduler
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms.functional as TF
from torch.utils.data.sampler import SubsetRandomSampler
from torch.autograd import Variable

from skimage.morphology import binary_opening, disk, label

# Import rle utils
import sys
sys.path.insert(0, '../input/siim-acr-pneumothorax-segmentation')
from mask_functions import rle2mask, mask2rle # import mask utilities

from tqdm import tqdm_notebook

In [2]:
! pip install albumentations



In [3]:
import cv2
from albumentations import (
    Compose, HorizontalFlip, CLAHE, HueSaturationValue,
    RandomBrightness, RandomContrast, RandomGamma,OneOf,
    ToFloat, ShiftScaleRotate,GridDistortion, ElasticTransform, JpegCompression, HueSaturationValue,
    RGBShift, RandomBrightness, RandomContrast, Blur, MotionBlur, MedianBlur, GaussNoise,CenterCrop,
    IAAAdditiveGaussianNoise,GaussNoise,OpticalDistortion
)
from albumentations.pytorch import ToTensor

## Dataset Utility Functions

In [4]:
# Data loading utility
def load_data(datafilepath = '../input/siim-train-test/siim/', healthy_num = 2000):
    '''
    Function to load the dataset.
    INPUT:
        datafilepath - path to directory containing the dataset.
    OUTPUT:
        train_fns - train dataset
        train_fns - test dataset
        df_masks - pandas dataframe containing masks for train dataset
    '''
    # Load full training and test sets
    train_fns = sorted(glob(datafilepath + 'dicom-images-train/*/*/*.dcm'))
    test_fns = sorted(glob(datafilepath + 'dicom-images-test/*/*/*.dcm'))
    # Load csv masks
    df_masks = pd.read_csv(datafilepath + 'train-rle.csv', index_col='ImageId')
    # create a list of filenames with images to use
    
    counter = 0
    files_list = []
    for fname in train_fns:
        try:
            if '-1' in df_masks.loc[fname.split('/')[-1][:-4],' EncodedPixels']:
                if counter <= healthy_num:
                    files_list.append(fname)
                    counter += 1
            else:
                files_list.append(fname)
        except:
            pass

    return train_fns, test_fns, df_masks, files_list

In [5]:
def normalize(arr):
    """
    Function performs the linear normalizarion of the array.
    https://stackoverflow.com/questions/7422204/intensity-normalization-of-image-using-pythonpil-speed-issues
    http://en.wikipedia.org/wiki/Normalization_%28image_processing%29
    INPUT:
        arr - orginal numpy array
    OUTPUT:
        arr - normalized numpy array
    """
    arr = arr.astype('float')
    # Do not touch the alpha channel
    for i in range(3):
        minval = arr[...,i].min()
        maxval = arr[...,i].max()
        if minval != maxval:
            arr[...,i] -= minval
            arr[...,i] *= (255.0/(maxval-minval))
    return arr

def normalize_image(img):
    """
    Function performs the normalization of the image.
    https://stackoverflow.com/questions/7422204/intensity-normalization-of-image-using-pythonpil-speed-issues
    INPUT:
        image - PIL image to be normalized
    OUTPUT:
        new_img - PIL image normalized
    """
    arr = np.array(img)
    new_img = Image.fromarray(normalize(arr).astype('uint8'),'RGB')
    return new_img

In [6]:
# Define the dataset
class PneumothoraxDataset(Dataset):
    '''
    The dataset for pneumothorax segmentation.
    '''

    def __init__(self, fns, df_masks, files_list, transform=True, size = (224, 224), mode = 'train'):
        '''
        INPUT:
            fns - glob containing the images
            df_masks - dataframe containing image masks
            transform (optional) - enable transforms for the images
        '''
        self.labels_frame = df_masks
        self.fns = fns
        self.transform = transform
        self.size = size
        self.transforms_mask = transforms.Compose([transforms.Resize(self.size), transforms.ToTensor()])
        self.transforms_image = transforms.Compose([transforms.Resize(self.size), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
        self.alb_transforms = Compose([HorizontalFlip(p=0.5),
                                             OneOf([RandomContrast(),
                                                    RandomGamma(),
                                                    RandomBrightness(),], p=0.3),
                                             OneOf([ElasticTransform(alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03),
                                                    GridDistortion(),
                                                    OpticalDistortion(distort_limit=2, shift_limit=0.5),], p=0.3)])
        self.mode = mode
        self.files_list = files_list

    def __len__(self):
        if (self.mode == 'validation'):
            return len(self.fns)
        else:
            return len(self.files_list)

    def __getitem__(self, idx):
        '''
        Function to get items from dataset by idx.
        INPUT:
            idx - id of the image in the dataset
        '''
        # image height and width
        im_height = 1024
        im_width = 1024
        # image channels
        im_chan = 1

        # get train image and mask
        np_image = np.zeros((im_height, im_width, im_chan), dtype=np.uint8)
        np_mask = np.zeros((im_height, im_width, 1), dtype=np.bool)
        
        # if validation then return filename instead of mask
        if self.mode == 'validation':
            # read dcm file with image
            dataset = pydicom.read_file(self.fns[idx])
            np_image = np.expand_dims(dataset.pixel_array, axis=2)
        
            image = Image.fromarray(np_image.reshape(im_height, im_width) , 'L')
            image = image.convert('RGB')
            
            image = self.transforms_image(image)
            return [image, self.fns[idx].split('/')[-1][:-4]]
        
        # read dcm file with image
        dataset = pydicom.read_file(self.files_list[idx])
        np_image = np.expand_dims(dataset.pixel_array, axis=2)

        # load mask
        try:
            # no pneumothorax
            if '-1' in self.labels_frame.loc[self.files_list[idx].split('/')[-1][:-4],' EncodedPixels']:
                np_mask = np.zeros((im_height, im_width, 1), dtype=np.bool)
            else:
                # there is pneumothorax
                if type(self.labels_frame.loc[self.files_list[idx].split('/')[-1][:-4],' EncodedPixels']) == str:
                    np_mask = np.expand_dims(rle2mask(self.labels_frame.loc[self.files_list[idx].split('/')[-1][:-4],' EncodedPixels'], im_height, im_width), axis=2)
                else:
                    np_mask = np.zeros((1024, 1024, 1))
                    for x in self.labels_frame.loc[self.files_list[idx].split('/')[-1][:-4],' EncodedPixels']:
                        np_mask =  np_mask + np.expand_dims(rle2mask(x, 1024, 1024), axis=2)
        except KeyError:
            # couldn't find mask in dataframe
            np_mask = np.zeros((im_height, im_width, 1), dtype=np.bool) # Assume missing masks are empty masks.

        # convert to PIL
        image = Image.fromarray(np_image.reshape(im_height, im_width) , 'L')
        image = image.convert('RGB')
        
        np_mask = np.transpose(np_mask)
        mask = Image.fromarray(np_mask.reshape(im_height, im_width).astype(np.uint8) , 'L')
        
        if self.transform:
            augmented = self.alb_transforms(image=np.array(image), mask=np.array(mask))
            image = Image.fromarray(augmented['image'], 'RGB')
            
            mask = Image.fromarray(augmented['mask'], 'L')
        
        # apply required transforms normalization, reshape and convert to tensor
        #image = normalize_image(image)
        image = self.transforms_image(image)
        mask = self.transforms_mask(mask)
        
        # convert to tensor and clip mask
        mask = torch.from_numpy(np.array(mask, dtype=np.int64))
        mask = np.clip(mask, 0, 1)

        return image, mask

## Training Utilities

U-Net Model:

In [7]:
# https://www.kaggle.com/cpmpml/fast-iou-metric-in-numpy-and-tensorflow
def get_iou_vector(A, B):    
    batch_size = A.shape[0]
    metric = 0.0
        
    for batch in range(batch_size):
        p, t = A[batch].reshape(224,224), B[batch]
        true = np.sum(t)
        pred = np.sum(p)
        
        # deal with empty mask first
        if true == 0:
            metric += (pred == 0)
            continue
        
        # non empty mask case.  Union is never empty 
        # hence it is safe to divide by its number of pixels
        
        intersection = np.sum(t * p)
        union = true + pred - intersection
        iou = intersection / union
        
        # iou metric is a stepwise approximation of the real iou over 0.5
        iou = np.floor(max(0, (iou - 0.45)*20)) / 10
              
        metric += iou
        
    # teake the average over all images in batch
    metric /= batch_size
    return metric

def my_iou_metric(pred, label):
    return get_iou_vector((pred > 0.5).float().detach().cpu().numpy(), label.detach().cpu().numpy())

In [8]:
def dice_coef(y_pred, y_true):
    y_true_f = y_true.view(-1)
    y_pred = y_pred.float()
    y_pred_f = (y_pred.view(-1) >= 0.5).float()
    intersection = y_true_f * y_pred_f
    score = 2. * torch.sum(intersection) / (torch.sum(y_true_f) + torch.sum(y_pred_f))
    return score

def dice_loss(y_pred, y_true):
    smooth = 1.
    y_true_f = y_true.view(-1)
    y_pred_f = y_pred.view(-1)
    intersection = y_true_f * y_pred_f
    score = (2. * torch.sum(intersection) + smooth) / (torch.sum(y_true_f) + torch.sum(y_pred_f) + smooth)
    return 1. - score

def bce_dice_loss(y_pred, y_true):
    criterion = nn.BCELoss()
    return criterion(y_pred.reshape(-1,224,224), y_true) + dice_loss(y_pred.reshape(-1,224,224), y_true)

def bce_logdice_loss(y_pred, y_true):
    criterion = nn.BCELoss()
    return criterion(y_pred.reshape(-1,224,224), y_true) - torch.log(1. - dice_loss(y_pred.reshape(-1,224,224), y_true))

In [9]:
def conv3x3(in_, out):
    return nn.Conv2d(in_, out, 3, padding=1)


class ConvRelu(nn.Module):
    def __init__(self, in_, out):
        super().__init__()
        self.conv = conv3x3(in_, out)
        self.activation = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.conv(x)
        x = self.activation(x)
        return x


class DecoderBlock(nn.Module):
    def __init__(self, in_channels, middle_channels, out_channels):
        super().__init__()

        self.block = nn.Sequential(
            ConvRelu(in_channels, middle_channels),
            nn.ConvTranspose2d(middle_channels, out_channels, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.block(x)


class UNet11(nn.Module):
    def __init__(self, num_filters=32, pretrained=False):
        """
        :param num_classes:
        :param num_filters:
        :param pretrained:
            False - no pre-trained network is used
            True  - encoder is pre-trained with VGG11
        """
        super().__init__()
        self.pool = nn.MaxPool2d(2, 2)

        self.encoder = models.vgg11(pretrained=pretrained).features

        self.relu = self.encoder[1]
        self.conv1 = self.encoder[0]
        self.conv2 = self.encoder[3]
        self.conv3s = self.encoder[6]
        self.conv3 = self.encoder[8]
        self.conv4s = self.encoder[11]
        self.conv4 = self.encoder[13]
        self.conv5s = self.encoder[16]
        self.conv5 = self.encoder[18]

        self.center = DecoderBlock(num_filters * 8 * 2, num_filters * 8 * 2, num_filters * 8)
        self.dec5 = DecoderBlock(num_filters * (16 + 8), num_filters * 8 * 2, num_filters * 8)
        self.dec4 = DecoderBlock(num_filters * (16 + 8), num_filters * 8 * 2, num_filters * 4)
        self.dec3 = DecoderBlock(num_filters * (8 + 4), num_filters * 4 * 2, num_filters * 2)
        self.dec2 = DecoderBlock(num_filters * (4 + 2), num_filters * 2 * 2, num_filters)
        self.dec1 = ConvRelu(num_filters * (2 + 1), num_filters)

        self.final = nn.Conv2d(num_filters, 1, kernel_size=1)

    def forward(self, x):
        conv1 = self.relu(self.conv1(x))
        conv2 = self.relu(self.conv2(self.pool(conv1)))
        conv3s = self.relu(self.conv3s(self.pool(conv2)))
        conv3 = self.relu(self.conv3(conv3s))
        conv4s = self.relu(self.conv4s(self.pool(conv3)))
        conv4 = self.relu(self.conv4(conv4s))
        conv5s = self.relu(self.conv5s(self.pool(conv4)))
        conv5 = self.relu(self.conv5(conv5s))

        center = self.center(self.pool(conv5))

        dec5 = self.dec5(torch.cat([center, conv5], 1))
        dec4 = self.dec4(torch.cat([dec5, conv4], 1))
        dec3 = self.dec3(torch.cat([dec4, conv3], 1))
        dec2 = self.dec2(torch.cat([dec3, conv2], 1))
        dec1 = self.dec1(torch.cat([dec2, conv1], 1))
        return self.final(dec1)


def unet11(pretrained=False, **kwargs):
    """
    pretrained:
            False - no pre-trained network is used
            True  - encoder is pre-trained with VGG11
            carvana - all weights are pre-trained on
                Kaggle: Carvana dataset https://www.kaggle.com/c/carvana-image-masking-challenge
    """
    model = UNet11(pretrained=pretrained, **kwargs)
    return model


class Interpolate(nn.Module):
    def __init__(self, size=None, scale_factor=None, mode='nearest', align_corners=False):
        super(Interpolate, self).__init__()
        self.interp = nn.functional.interpolate
        self.size = size
        self.mode = mode
        self.scale_factor = scale_factor
        self.align_corners = align_corners
        
    def forward(self, x):
        x = self.interp(x, size=self.size, scale_factor=self.scale_factor, 
                        mode=self.mode, align_corners=self.align_corners)
        return x


class DecoderBlockV2(nn.Module):
    def __init__(self, in_channels, middle_channels, out_channels, is_deconv=True):
        super(DecoderBlockV2, self).__init__()
        self.in_channels = in_channels

        if is_deconv:
            """
                Paramaters for Deconvolution were chosen to avoid artifacts, following
                link https://distill.pub/2016/deconv-checkerboard/
            """

            self.block = nn.Sequential(
                ConvRelu(in_channels, middle_channels),
                nn.ConvTranspose2d(middle_channels, out_channels, kernel_size=4, stride=2,
                                   padding=1),
                nn.ReLU(inplace=True)
            )
        else:
            self.block = nn.Sequential(
                Interpolate(scale_factor=2, mode='bilinear'),
                ConvRelu(in_channels, middle_channels),
                ConvRelu(middle_channels, out_channels),
            )

    def forward(self, x):
        return self.block(x)


class AlbuNet(nn.Module):
    """
        UNet (https://arxiv.org/abs/1505.04597) with Resnet34(https://arxiv.org/abs/1512.03385) encoder
        Proposed by Alexander Buslaev: https://www.linkedin.com/in/al-buslaev/
        """

    def __init__(self, num_classes=1, num_filters=32, pretrained=False, is_deconv=False):
        """
        :param num_classes:
        :param num_filters:
        :param pretrained:
            False - no pre-trained network is used
            True  - encoder is pre-trained with resnet34
        :is_deconv:
            False: bilinear interpolation is used in decoder
            True: deconvolution is used in decoder
        """
        super().__init__()
        self.num_classes = num_classes

        self.pool = nn.MaxPool2d(2, 2)

        self.encoder = models.resnet34(pretrained=pretrained)

        self.relu = nn.ReLU(inplace=True)

        self.conv1 = nn.Sequential(self.encoder.conv1,
                                   self.encoder.bn1,
                                   self.encoder.relu,
                                   self.pool)

        self.conv2 = self.encoder.layer1

        self.conv3 = self.encoder.layer2

        self.conv4 = self.encoder.layer3

        self.conv5 = self.encoder.layer4

        self.center = DecoderBlockV2(512, num_filters * 8 * 2, num_filters * 8, is_deconv)

        self.dec5 = DecoderBlockV2(512 + num_filters * 8, num_filters * 8 * 2, num_filters * 8, is_deconv)
        self.dec4 = DecoderBlockV2(256 + num_filters * 8, num_filters * 8 * 2, num_filters * 8, is_deconv)
        self.dec3 = DecoderBlockV2(128 + num_filters * 8, num_filters * 4 * 2, num_filters * 2, is_deconv)
        self.dec2 = DecoderBlockV2(64 + num_filters * 2, num_filters * 2 * 2, num_filters * 2 * 2, is_deconv)
        self.dec1 = DecoderBlockV2(num_filters * 2 * 2, num_filters * 2 * 2, num_filters, is_deconv)
        self.dec0 = ConvRelu(num_filters, num_filters)
        self.final = nn.Conv2d(num_filters, num_classes, kernel_size=1)

    def forward(self, x):
        conv1 = self.conv1(x)
        conv2 = self.conv2(conv1)
        conv3 = self.conv3(conv2)
        conv4 = self.conv4(conv3)
        conv5 = self.conv5(conv4)

        center = self.center(self.pool(conv5))

        dec5 = self.dec5(torch.cat([center, conv5], 1))

        dec4 = self.dec4(torch.cat([dec5, conv4], 1))
        dec3 = self.dec3(torch.cat([dec4, conv3], 1))
        dec2 = self.dec2(torch.cat([dec3, conv2], 1))
        dec1 = self.dec1(dec2)
        dec0 = self.dec0(dec1)

        if self.num_classes > 1:
            x_out = F.log_softmax(self.final(dec0), dim=1)
        else:
            x_out = self.final(dec0)

        return x_out


class UNet16(nn.Module):
    def __init__(self, num_classes=1, num_filters=32, pretrained=False, is_deconv=False):
        """
        :param num_classes:
        :param num_filters:
        :param pretrained:
            False - no pre-trained network used
            True - encoder pre-trained with VGG16
        :is_deconv:
            False: bilinear interpolation is used in decoder
            True: deconvolution is used in decoder
        """
        super().__init__()
        self.num_classes = num_classes

        self.pool = nn.MaxPool2d(2, 2)

        self.encoder = models.vgg16(pretrained=pretrained).features

        self.relu = nn.ReLU(inplace=True)

        self.conv1 = nn.Sequential(self.encoder[0],
                                   self.relu,
                                   self.encoder[2],
                                   self.relu)

        self.conv2 = nn.Sequential(self.encoder[5],
                                   self.relu,
                                   self.encoder[7],
                                   self.relu)

        self.conv3 = nn.Sequential(self.encoder[10],
                                   self.relu,
                                   self.encoder[12],
                                   self.relu,
                                   self.encoder[14],
                                   self.relu)

        self.conv4 = nn.Sequential(self.encoder[17],
                                   self.relu,
                                   self.encoder[19],
                                   self.relu,
                                   self.encoder[21],
                                   self.relu)

        self.conv5 = nn.Sequential(self.encoder[24],
                                   self.relu,
                                   self.encoder[26],
                                   self.relu,
                                   self.encoder[28],
                                   self.relu)

        self.center = DecoderBlockV2(512, num_filters * 8 * 2, num_filters * 8, is_deconv)

        self.dec5 = DecoderBlockV2(512 + num_filters * 8, num_filters * 8 * 2, num_filters * 8, is_deconv)
        self.dec4 = DecoderBlockV2(512 + num_filters * 8, num_filters * 8 * 2, num_filters * 8, is_deconv)
        self.dec3 = DecoderBlockV2(256 + num_filters * 8, num_filters * 4 * 2, num_filters * 2, is_deconv)
        self.dec2 = DecoderBlockV2(128 + num_filters * 2, num_filters * 2 * 2, num_filters, is_deconv)
        self.dec1 = ConvRelu(64 + num_filters, num_filters)
        self.final = nn.Conv2d(num_filters, num_classes, kernel_size=1)

    def forward(self, x):
        conv1 = self.conv1(x)
        conv2 = self.conv2(self.pool(conv1))
        conv3 = self.conv3(self.pool(conv2))
        conv4 = self.conv4(self.pool(conv3))
        conv5 = self.conv5(self.pool(conv4))

        center = self.center(self.pool(conv5))

        dec5 = self.dec5(torch.cat([center, conv5], 1))

        dec4 = self.dec4(torch.cat([dec5, conv4], 1))
        dec3 = self.dec3(torch.cat([dec4, conv3], 1))
        dec2 = self.dec2(torch.cat([dec3, conv2], 1))
        dec1 = self.dec1(torch.cat([dec2, conv1], 1))

        if self.num_classes > 1:
            x_out = F.log_softmax(self.final(dec1), dim=1)
        else:
            x_out = self.final(dec1)

        return x_out

## Train the Model

In [10]:
def train(model, device, trainloader, testloader, optimizer, epochs):
    model.to(device)
    steps = 0
    running_loss = 0
    running_iou = 0
    print_every = 100
    
    # learning rate cosine annealing
    scheduler = lr_scheduler.CosineAnnealingLR(optimizer, len(trainloader), eta_min=0.0000001)

    for epoch in range(epochs):
        scheduler.step()
        
        for inputs, labels in trainloader:

            steps += 1
            # Move input and label tensors to the default device
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model.forward(inputs)
            loss = bce_dice_loss(torch.sigmoid(outputs), labels.reshape(-1, 224, 224).float())
            loss.backward()
            optimizer.step()

            running_loss += loss
            running_iou += my_iou_metric(torch.sigmoid(outputs), labels.reshape(-1, 224, 224).float())

            if steps % print_every == 0:
                test_loss = 0
                iou = 0
                model.eval()
                with torch.no_grad():
                    for inputs, labels in testloader:
                        inputs, labels = inputs.to(device), labels.to(device)
                        outputs = model.forward(inputs)
                        
                        test_loss += bce_dice_loss(torch.sigmoid(outputs), labels.reshape(-1, 224, 224).float())
                        
                        iou += my_iou_metric(torch.sigmoid(outputs), labels.reshape(-1, 224, 224).float())

                print(f"Epoch {epoch+1}/{epochs}.. "
                      f"Train loss: {running_loss/print_every:.3f}.. "
                      f"Test loss: {test_loss/len(testloader):.3f}.. "
                      f"Train IOU: {running_iou/print_every:.3f}.. "
                      f"Test IOU: {iou/len(testloader):.3f}.. ")
 
                running_loss = 0
                running_iou = 0
                   
                model.train()

In [11]:
# Load data
print('Loading data: \n')
train_fns, test_fns, df_masks, files_list = load_data()

# Training presets
batch_size = 16
epochs = 80
learning_rate = 0.00001
test_split = .1

original_size = 1024
width = 224
height = 224

# Create dataset and data loader
print('Preparing the dataset: \n')
train_ds = PneumothoraxDataset(train_fns, df_masks, files_list, transform=True, size = (height, width), mode = 'train')

# Creating data indices for training and validation splits:
dataset_size = len(train_ds)
indices = list(range(dataset_size))
split = int(np.floor(test_split * dataset_size))
np.random.seed(42)
np.random.shuffle(indices)
train_indices, test_indices = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_indices)
test_sampler = SubsetRandomSampler(test_indices)

trainloader = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, sampler=train_sampler, num_workers=4)
testloader = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, sampler=test_sampler, num_workers=4)

valid_ds = PneumothoraxDataset(test_fns, None, None, transform=False, size = (height, width), mode = 'validation')
validloader = DataLoader(valid_ds, batch_size=8, shuffle=False, num_workers=1)

torch.cuda.empty_cache()

# Prepare for training: initialize model, loss function, optimizer
class param:
    unet_depth = 6
    unet_start_filters = 8
model = UNet16(num_classes=1, pretrained = True)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Setup device for training
device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")

# Train the model
print('Train the model: \n')

train_stats_df = pd.DataFrame(columns = ['Epoch','Train loss','Test loss'])
train(model, device, trainloader, testloader, optimizer, epochs)

# Save the model
#print('Save the model: \n')
#filepath = 'simple_unet.pth'
#checkpoint = {'state_dict': model.state_dict()}
#torch.save(checkpoint, filepath)

Loading data: 

Preparing the dataset: 



Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /tmp/.cache/torch/checkpoints/vgg16-397923af.pth
100%|██████████| 553433881/553433881 [00:21<00:00, 25993590.70it/s]


Train the model: 

Epoch 1/80.. Train loss: 1.326.. Test loss: 1.043.. Train IOU: 0.196.. Test IOU: 0.499.. 
Epoch 1/80.. Train loss: 1.010.. Test loss: 0.965.. Train IOU: 0.474.. Test IOU: 0.495.. 
Epoch 2/80.. Train loss: 0.932.. Test loss: 0.910.. Train IOU: 0.392.. Test IOU: 0.412.. 
Epoch 2/80.. Train loss: 0.880.. Test loss: 0.830.. Train IOU: 0.257.. Test IOU: 0.324.. 
Epoch 3/80.. Train loss: 0.838.. Test loss: 0.802.. Train IOU: 0.279.. Test IOU: 0.228.. 
Epoch 3/80.. Train loss: 0.762.. Test loss: 0.786.. Train IOU: 0.316.. Test IOU: 0.201.. 
Epoch 3/80.. Train loss: 0.791.. Test loss: 0.843.. Train IOU: 0.308.. Test IOU: 0.135.. 
Epoch 4/80.. Train loss: 0.737.. Test loss: 0.755.. Train IOU: 0.329.. Test IOU: 0.400.. 
Epoch 4/80.. Train loss: 0.711.. Test loss: 0.759.. Train IOU: 0.340.. Test IOU: 0.412.. 
Epoch 5/80.. Train loss: 0.702.. Test loss: 0.708.. Train IOU: 0.334.. Test IOU: 0.388.. 
Epoch 5/80.. Train loss: 0.696.. Test loss: 0.769.. Train IOU: 0.348.. Test IOU: 

## Create Submission

In [12]:
submission = {'ImageId': [], 'EncodedPixels': []}

model.eval()
torch.cuda.empty_cache()

for X, fns in tqdm_notebook(validloader):
    X = Variable(X).cuda()
    output = model(X)
    
    for i, fname in enumerate(fns):
        mask = torch.sigmoid(output[i].reshape(224,224)).data.cpu().numpy()
        mask = binary_opening(mask > 0.9, disk(2))
        
        im = Image.fromarray(((mask)*255).astype(np.uint8)).resize((original_size,original_size))
        im = np.transpose(np.asarray(im))
        
        submission['EncodedPixels'].append(mask2rle(im, original_size, original_size))
        submission['ImageId'].append(fname)

HBox(children=(IntProgress(value=0, max=173), HTML(value='')))




In [13]:
submission_df = pd.DataFrame(submission, columns=['ImageId', 'EncodedPixels'])
submission_df.loc[submission_df.EncodedPixels=='', 'EncodedPixels'] = '-1'
submission_df.to_csv('submission09.csv', index=False)
submission_df.sample(10)

Unnamed: 0,ImageId,EncodedPixels
602,1.2.276.0.7230010.3.1.4.8323329.6347.151787519...,-1
749,1.2.276.0.7230010.3.1.4.8323329.6482.151787519...,-1
1142,1.2.276.0.7230010.3.1.4.8323329.684.1517875164...,-1
590,1.2.276.0.7230010.3.1.4.8323329.6336.151787519...,-1
1028,1.2.276.0.7230010.3.1.4.8323329.6736.151787519...,-1
561,1.2.276.0.7230010.3.1.4.8323329.631.1517875163...,-1
50,1.2.276.0.7230010.3.1.4.8323329.5842.151787519...,-1
1092,1.2.276.0.7230010.3.1.4.8323329.6794.151787520...,-1
617,1.2.276.0.7230010.3.1.4.8323329.6360.151787519...,411858 5 1019 5 1019 5 1019 5 1019 5 1015 13 1...
177,1.2.276.0.7230010.3.1.4.8323329.5959.151787519...,-1


In [14]:
torch.cuda.empty_cache()

for X, fns in tqdm_notebook(validloader):
    X = Variable(X).cuda()
    output = model(X)
    
    for i, fname in enumerate(fns):
        mask = torch.sigmoid(output[i].reshape(224,224)).data.cpu().numpy()
        mask = binary_opening(mask > 0.5, disk(2))
        
        im = Image.fromarray(((mask)*255).astype(np.uint8)).resize((original_size,original_size))
        im = np.transpose(np.asarray(im))
        
        submission['EncodedPixels'].append(mask2rle(im, original_size, original_size))
        submission['ImageId'].append(fname)

HBox(children=(IntProgress(value=0, max=173), HTML(value='')))




In [15]:
submission_df = pd.DataFrame(submission, columns=['ImageId', 'EncodedPixels'])
submission_df.loc[submission_df.EncodedPixels=='', 'EncodedPixels'] = '-1'
submission_df.to_csv('submission05.csv', index=False)
submission_df.sample(10)

Unnamed: 0,ImageId,EncodedPixels
1106,1.2.276.0.7230010.3.1.4.8323329.6807.151787520...,932658 5 32 4 983 5 32 4 983 5 32 4 983 5 32 4...
439,1.2.276.0.7230010.3.1.4.8323329.6198.151787519...,-1
1253,1.2.276.0.7230010.3.1.4.8323329.6940.151787520...,-1
1062,1.2.276.0.7230010.3.1.4.8323329.6767.151787520...,683182 4 1020 4 1020 4 1020 4 1020 4 1015 14 1...
2410,1.2.276.0.7230010.3.1.4.8323329.6740.151787519...,-1
2228,1.2.276.0.7230010.3.1.4.8323329.6575.151787519...,-1
964,1.2.276.0.7230010.3.1.4.8323329.6678.151787519...,-1
222,1.2.276.0.7230010.3.1.4.8323329.600.1517875163...,-1
943,1.2.276.0.7230010.3.1.4.8323329.6659.151787519...,-1
2419,1.2.276.0.7230010.3.1.4.8323329.6749.151787519...,-1
