In [179]:
import torch.optim as optim

import torch
from torch.utils.data import DataLoader
from torch.utils.data.sampler import RandomSampler, SequentialSampler

import os
import datetime
import time

from multiprocessing import cpu_count

import multiprocessing

import cv2
import numpy as np

import torch.nn as nn
import torch.nn.functional as F

import scipy.misc as scipy
from tensorboardX import SummaryWriter
import gzip
import csv

from torch.autograd import Variable
from tqdm import tqdm
from collections import OrderedDict

from PIL import Image, ImageDraw
from sklearn.model_selection import train_test_split

from kaggle_data.downloader import KaggleDataDownloader

import torch.utils.data as data

import matplotlib.pylab as plt

CUDA_DEVICE = 2

# transformer

In [180]:
def image_to_tensor(image, mean=0, std=1.):
    """
    Transforms an image to a tensor
    Args:
        image (np.ndarray): A RGB array image
        mean: The mean of the image values
        std: The standard deviation of the image values

    Returns:
        tensor: A Pytorch tensor
    """
    image = image.astype(np.float32)
    image = (image - mean) / std
    image = image.transpose((2, 0, 1))
    tensor = torch.from_numpy(image)
    return tensor


def mask_to_tensor(mask, threshold):
    """
    Transforms a mask to a tensor
    Args:
        mask (np.ndarray): A greyscale mask array
        threshold: The threshold used to consider the mask present or not

    Returns:
        tensor: A Pytorch tensor
    """
    mask = mask
    mask = (mask > threshold).astype(np.float32)
    tensor = torch.from_numpy(mask).type(torch.FloatTensor)
    return tensor

# dataset

In [181]:
# Reference: https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py#L66
class TrainImageDataset(data.Dataset):
    def __init__(self, X_data, y_data=None, input_img_resize=(128, 128), output_img_resize=(128, 128),
                 X_transform=None, y_transform=None, threshold=0.5):
        """
            A dataset loader taking images paths as argument and return
            as them as tensors from getitem()

            Args:
                threshold (float): The threshold used to consider the mask present or not
                X_data (list): List of paths to the training images
                y_data (list, optional): List of paths to the target images
                input_img_resize (tuple): Tuple containing the new size of the input images
                output_img_resize (tuple): Tuple containing the new size of the output images
                X_transform (callable, optional): A function/transform that takes in 2 numpy arrays.
                    Assumes X_data and y_data are not None.
                    (train_img, mask_img) and returns a transformed version with the same signature
                y_transform (callable, optional): A function/transform that takes in 2 numpy arrays.
                    Assumes X_data and y_data are not None.
                    (train_img, mask_img) and returns a transformed version with the same signature
        """
        self.threshold = threshold
        self.X_train = X_data
        self.y_train_masks = y_data
        self.input_img_resize = input_img_resize
        self.output_img_resize = output_img_resize
        self.y_transform = y_transform
        self.X_transform = X_transform

    def __getitem__(self, index):
        """
            Args:
                index (int): Index
            Returns:
                tuple: (image, target) where target is class_index of the target class.
        """
        img = Image.open(self.X_train[index])
        img = img.resize(self.input_img_resize, Image.ANTIALIAS)
        img = np.asarray(img.convert("RGB"), dtype=np.float32)

        # Pillow reads gifs
        mask = Image.open(self.y_train_masks[index])
        mask = mask.resize(self.output_img_resize, Image.ANTIALIAS)
        mask = np.asarray(mask.convert("L"), dtype=np.float32)  # GrayScale

        if self.X_transform:
            img, mask = self.X_transform(img, mask)

        if self.y_transform:
            img, mask = self.y_transform(img, mask)

        img = image_to_tensor(img)
        mask = mask_to_tensor(mask, self.threshold)
        return img, mask

    def __len__(self):
        assert len(self.X_train) == len(self.y_train_masks)
        return len(self.X_train)


class TestImageDataset(data.Dataset):
    def __init__(self, X_data, img_resize=(128, 128)):
        """
            A dataset loader taking images paths as argument and return
            as them as tensors from getitem()
            Args:
                X_data (list): List of paths to the training images
                img_resize (tuple): Tuple containing the new size of the images
        """
        self.img_resize = img_resize
        self.X_train = X_data

    def __getitem__(self, index):
        """
        Args:
            index (int): Index
        Returns:
            tuple: (image, target) where target is class_index of the target class.
        """
        img_path = self.X_train[index]
        img = Image.open(img_path)
        img = img.resize(self.img_resize, Image.ANTIALIAS)
        img = np.asarray(img.convert("RGB"), dtype=np.float32)

        img = image_to_tensor(img)
        return img, img_path.split("/")[-1]

    def __len__(self):
        return len(self.X_train)

# Helpers

In [182]:
def st_time(show_func_name=True):
    """
        Decorator to calculate the total time of a func

    Args:
        show_func_name (bool): Whether to show the function name or not
    """

    def wrapper(func):
        def st_func(*args, **keyArgs):
            t1 = time.time()
            r = func(*args, **keyArgs)
            t2 = time.time()
            if show_func_name:
                print("Function=%s, Time elapsed = %ds" % (func.__name__, t2 - t1))
            else:
                print("Time elapsed = %ds" % (t2 - t1))
            return r

        return st_func

    return wrapper


def clear_logs_folder():
    """
        Clear the output directories such
        as output/ and logs/

    """
    script_dir = os.path.dirname(os.path.abspath(__file__))
    folder = os.path.join(script_dir, '../logs/')
    for the_file in folder:
        file_path = os.path.join(folder, the_file)
        try:
            if os.path.isfile(file_path):
                os.unlink(file_path)
        except Exception as e:
            print(e)


def get_model_timestamp():
    """
        Returns a timestamp string formatted for
        file names
    Returns:
        str: Timestamp string
    """
    ts = time.time()
    return datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d_%Hh%M')


class DatasetFetcher:
    def __init__(self):
        """
            A tool used to automatically download, check, split and get
            relevant information on the dataset
        """
        self.train_data = None
        self.test_data = None
        self.train_masks_data = None
        self.train_files = None
        self.test_files = None
        self.train_masks_files = None
        self.train_ids = None
        self.masks_ids = None
        self.test_ids = None

    def download_dataset(self, hq_files=False):
        """
        Downloads the dataset and return the input paths
        Args:
            hq_files (bool): Whether to download the hq files or not

        Returns:
            list: [train_data, test_data, metadata_csv, train_masks_csv, train_masks_data]

        """
        competition_name = "carvana-image-masking-challenge"

        destination_path = '../input/'
        prefix = ""
        if hq_files:
            prefix = "_hq"
        files = ["train" + prefix + ".zip", "test" + prefix + ".zip", "metadata.csv.zip",
                 "train_masks.csv.zip", "train_masks.zip"]
        datasets_path = [destination_path + "train" + prefix, destination_path + "test" + prefix,
                         destination_path + "metadata.csv", destination_path + "train_masks.csv",
                         destination_path + "train_masks"]
        is_datasets_present = True

        for dir_path in datasets_path:
            if not os.path.exists(dir_path):
                is_datasets_present = False

        if not is_datasets_present:
            downloader = KaggleDataDownloader('frostics', os.getenv("KAGGLE_PASSWD"), competition_name)

            for file in files:
                output_path = downloader.download_dataset(file, destination_path)
                downloader.decompress(output_path, destination_path)
                os.remove(output_path)
        else:
            print("All datasets are present.")

        self.train_data = datasets_path[0]
        self.test_data = datasets_path[1]
        self.train_masks_data = datasets_path[4]
        self.train_files = sorted(os.listdir(self.train_data))
        self.test_files = sorted(os.listdir(self.test_data))
        self.train_masks_files = sorted(os.listdir(self.train_masks_data))
        self.train_ids = list(set(t.split("_")[0] for t in self.train_files))
        self.masks_ids = list(set(t.split("_")[0] for t in self.train_masks_files))
        self.test_ids = list(set(t.split("_")[0] for t in self.test_files))
        return datasets_path

    def get_car_image_files(self, car_image_id, test_file=False, get_mask=False):
        if get_mask:
            if car_image_id in self.masks_ids:
                return [self.train_masks_data + "/" + s for s in self.train_masks_files if car_image_id in s]
            else:
                raise Exception("No mask with this ID found")
        elif test_file:
            if car_image_id in self.test_ids:
                return [self.test_data + "/" + s for s in self.test_files if car_image_id in s]
        else:
            if car_image_id in self.train_ids:
                return [self.train_data + "/" + s for s in self.train_files if car_image_id in s]
        raise Exception("No image with this ID found")

    def get_image_matrix(self, image_path):
        img = Image.open(image_path)
        return np.asarray(img, dtype=np.uint8)

    def get_image_size(self, image):
        img = Image.open(image)
        return img.size

    def get_train_files(self, validation_size=0.2, sample_size=None):
        """

        Args:
            validation_size (float):
                 Value between 0 and 1
            sample_size (float, None):
                Value between 0 and 1 or None.
                Whether you want to have a sample of your dataset.

        Returns:
            list :
                Returns the dataset in the form:
                [train_data, train_masks_data, valid_data, valid_masks_data]
        """
        train_ids = self.train_ids

        if sample_size:
            rnd = np.random.choice(self.train_ids, int(len(self.train_ids) * sample_size))
            train_ids = rnd.ravel()

        if validation_size:
            ids_train_split, ids_valid_split = train_test_split(train_ids, test_size=validation_size)
        else:
            ids_train_split = train_ids
            ids_valid_split = []

        train_ret = []
        train_masks_ret = []
        valid_ret = []
        valid_masks_ret = []

        for id in ids_train_split:
            train_ret.append(self.get_car_image_files(id))
            train_masks_ret.append(self.get_car_image_files(id, get_mask=True))

        for id in ids_valid_split:
            valid_ret.append(self.get_car_image_files(id))
            valid_masks_ret.append(self.get_car_image_files(id, get_mask=True))

        return [np.array(train_ret).ravel(), np.array(train_masks_ret).ravel(),
                np.array(valid_ret).ravel(), np.array(valid_masks_ret).ravel()]

    def get_test_files(self, sample_size):
        test_files = self.test_files

        if sample_size:
            rnd = np.random.choice(self.test_files, int(len(self.test_files) * sample_size))
            test_files = rnd.ravel()

        ret = [None] * len(test_files)
        for i, file in enumerate(test_files):
            ret[i] = self.test_data + "/" + file

        return np.array(ret)

# Tools

In [183]:
def get_learning_rate(optimizer):
    lr = []
    for param_group in optimizer.param_groups:
        lr += [param_group['lr']]
    return lr


class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

# Callbacks

In [184]:
class Callback:
    def __call__(self, *args, **kwargs):
        raise NotImplementedError


class TensorboardVisualizerCallback(Callback):
    def __init__(self, path_to_files):
        """
            Callback intended to be executed at each epoch
            of the training which goal is to display the result
            of the last validation batch in Tensorboard
        Args:
            path_to_files (str): The path where to store the log files
        """
        self.path_to_files = path_to_files

    def _apply_mask_overlay(self, image, mask, color=(0, 255, 0)):
        mask = np.dstack((mask, mask, mask)) * np.array(color)
        mask = mask.astype(np.uint8)
        return cv2.addWeighted(mask, 0.5, image, 0.5, 0.)  # image * α + mask * β + λ

    def _get_mask_representation(self, image, mask):
        """
         Given a mask and an image this method returns
         one image representing 3 patches of the same image.
         These patches represent:
            - The original image
            - The original mask
            - The mask applied to the original image
        Args:
            image (np.ndarray): The original image
            mask (np.ndarray): The predicted mask

        Returns (np.ndarray):
            An image of size (original_image_height, (original_image_width * 3))
            showing 3 patches of the original image
        """

        H, W, C = image.shape
        mask = cv2.resize(mask, (H, W))
        results = np.zeros((H, 3 * W, 3), np.uint8)
        p = np.zeros((H * W, 3), np.uint8)

        m = np.zeros((H * W), np.uint8)
        l = mask.reshape(-1)
        masked_img = self._apply_mask_overlay(image, mask)

        a = (2 * l + m)
        miss = np.where(a == 2)[0]
        hit = np.where(a == 3)[0]
        fp = np.where(a == 1)[0]
        p[miss] = np.array([0, 0, 255])
        p[hit] = np.array([64, 64, 64])
        p[fp] = np.array([0, 255, 0])
        p = p.reshape(H, W, 3)

        results[:, 0:W] = image
        results[:, W:2 * W] = p
        results[:, 2 * W:3 * W] = masked_img
        return results

    def __call__(self, *args, **kwargs):
        if kwargs['step_name'] != "epoch":
            return

        epoch_id = kwargs['epoch_id']
        last_images, last_targets, last_preds = kwargs['last_val_batch']
        writer = SummaryWriter(self.path_to_files)

        for i, (image, target_mask, pred_mask) in enumerate(zip(last_images, last_targets, last_preds)):

            image = image.data.float().cpu().numpy().astype(np.uint8)
            image = np.transpose(image, (1, 2, 0))  # Invert c, h, w to h, w, c
            target_mask = target_mask.float().data.cpu().numpy().astype(np.uint8)
            pred_mask = pred_mask.float().data.cpu().numpy().astype(np.uint8)
            expected_result = self._get_mask_representation(image, target_mask)
            pred_result = self._get_mask_representation(image, pred_mask)
            writer.add_image("Epoch_" + str(epoch_id) + '-Image_' + str(i + 1) + '-Expected', expected_result, epoch_id)
            writer.add_image("Epoch_" + str(epoch_id) + '-Image_' + str(i + 1) + '-Predicted', pred_result, epoch_id)
            if i == 1:  # 2 Images are sufficient
                break
        writer.close()


class TensorboardLoggerCallback(Callback):
    def __init__(self, path_to_files):
        """
            Callback intended to be executed at each epoch
            of the training which goal is to add valuable
            information to the tensorboard logs such as the losses
            and accuracies
        Args:
            path_to_files (str): The path where to store the log files
        """
        self.path_to_files = path_to_files

    def __call__(self, *args, **kwargs):
        if kwargs['step_name'] != "epoch":
            return

        epoch_id = kwargs['epoch_id']

        writer = SummaryWriter(self.path_to_files)
        writer.add_scalar('data/train_loss', kwargs['train_loss'], epoch_id)
        writer.add_scalar('data/train_dice_coeff', kwargs['train_dice_coeff'], epoch_id)
        writer.add_scalar('data/val_loss', kwargs['val_loss'], epoch_id)
        writer.add_scalar('data/val_dice_coeff', kwargs['val_dice_coeff'], epoch_id)
        writer.close()


class ModelSaverCallback(Callback):
    def __init__(self, path_to_model, verbose=False):
        """
            Callback intended to be executed each time a whole train pass
            get finished. This callback saves the model in the given path
        Args:
            verbose (bool): True or False to make the callback verbose
            path_to_model (str): The path where to store the model
        """
        self.verbose = verbose
        self.path_to_model = path_to_model
        self.suffix = ""

    def set_suffix(self, suffix):
        """

        Args:
            suffix (str): The suffix to append to the model file name
        """
        self.suffix = suffix

    def __call__(self, *args, **kwargs):
        if kwargs['step_name'] != "train":
            return

        pth = self.path_to_model + self.suffix
        net = kwargs['net']
        torch.save(net.state_dict(), pth)

        if self.verbose:
            print("Model saved in {}".format(pth))


class PredictionsSaverCallback(Callback):
    def __init__(self, to_file, origin_img_size, threshold):
        self.threshold = threshold
        self.origin_img_size = origin_img_size
        self.to_file = to_file
        self.file = gzip.open(to_file, "wt", newline="")
        self.writer = csv.writer(self.file)
        self.writer.writerow(["img", "rle_mask"])

    # https://www.kaggle.com/stainsby/fast-tested-rle
    def run_length_encode(self, mask):
        """
        Args:
            mask (np.ndarray): 1 = mask, 0 = background

        Returns:
            str: run length as string formated
        """
        inds = mask.flatten()
        runs = np.where(inds[1:] != inds[:-1])[0] + 2
        runs[1::2] = runs[1::2] - runs[:-1:2]
        rle = ' '.join([str(r) for r in runs])
        return rle

    def get_mask_rle(self, prediction, name):
        """

        Args:
            prediction (np.ndarray): An array of predicted values

        Returns:
            str: A length encoded version of the passed prediction
        """
        mask = cv2.resize(prediction, self.origin_img_size)
        mask = mask > self.threshold
        prnt_mask(mask, self.origin_img_size, name)
        return self.run_length_encode(mask)

    def __call__(self, *args, **kwargs):
        if kwargs['step_name'] != "predict":
            return

        probs = kwargs['probs']
        files_name = kwargs['files_name']
        for (pred, name) in zip(probs, files_name):
            rle = self.get_mask_rle(pred, name)
            self.writer.writerow([name, rle])

    def close_saver(self):
        self.file.flush()
        self.file.close()
        print("Predictions wrote in {} file".format(self.to_file))

# Augmentations

In [185]:
def random_shift_scale_rotate(image, angle, scale, aspect, shift_dx, shift_dy,
                              borderMode=cv2.BORDER_CONSTANT, u=0.5):
    if np.random.random() < u:
        if len(image.shape) == 3:  # Img or mask
            height, width, channels = image.shape
        else:
            height, width = image.shape

        sx = scale * aspect / (aspect ** 0.5)
        sy = scale / (aspect ** 0.5)
        dx = round(shift_dx * width)
        dy = round(shift_dy * height)

        cc = np.math.cos(angle / 180 * np.math.pi) * sx
        ss = np.math.sin(angle / 180 * np.math.pi) * sy
        rotate_matrix = np.array([[cc, -ss], [ss, cc]])

        box0 = np.array([[0, 0], [width, 0], [width, height], [0, height], ])
        box1 = box0 - np.array([width / 2, height / 2])
        box1 = np.dot(box1, rotate_matrix.T) + np.array([width / 2 + dx, height / 2 + dy])

        box0 = box0.astype(np.float32)
        box1 = box1.astype(np.float32)
        mat = cv2.getPerspectiveTransform(box0, box1)

        image = cv2.warpPerspective(image, mat, (width, height), flags=cv2.INTER_LINEAR,
                                    borderMode=borderMode, borderValue=(0, 0, 0, 0))
    return image


def random_horizontal_flip(image, mask, u=0.5):
    if np.random.random() < u:
        image = cv2.flip(image, 1)
        mask = cv2.flip(mask, 1)

    return image, mask


def augment_img(img, mask):
    rotate_limit = (-45, 45)
    aspect_limit = (0, 0)
    scale_limit = (-0.1, 0.1)
    shift_limit = (-0.0625, 0.0625)
    shift_dx = np.random.uniform(shift_limit[0], shift_limit[1])
    shift_dy = np.random.uniform(shift_limit[0], shift_limit[1])
    angle = np.random.uniform(rotate_limit[0], rotate_limit[1])  # degree
    scale = np.random.uniform(1 + scale_limit[0], 1 + scale_limit[1])
    aspect = np.random.uniform(1 + aspect_limit[0], 1 + aspect_limit[1])

    img = random_shift_scale_rotate(img, angle, scale, aspect, shift_dx, shift_dy)
    mask = random_shift_scale_rotate(mask, angle, scale, aspect, shift_dx, shift_dy)

    img, mask = random_horizontal_flip(img, mask)
    return img, mask

# Losses

In [186]:
class BinaryCrossEntropyLoss2d(nn.Module):
    def __init__(self, weight=None, size_average=True):
        """
        Binary cross entropy loss 2D
        Args:
            weight:
            size_average:
        """
        super(BinaryCrossEntropyLoss2d, self).__init__()
        self.bce_loss = nn.BCELoss(weight, size_average)

    def forward(self, logits, targets):
        probs = F.sigmoid(logits)
        probs_flat = probs.view(-1)  # Flatten
        targets_flat = targets.view(-1)  # Flatten
        return self.bce_loss(probs_flat, targets_flat)


class SoftDiceLoss(nn.Module):
    def __init__(self, weight=None, size_average=True):
        super(SoftDiceLoss, self).__init__()

    def forward(self, logits, targets):
        smooth = 1
        num = targets.size(0)
        probs = F.sigmoid(logits)
        m1 = probs.view(num, -1)
        m2 = targets.view(num, -1)
        intersection = (m1 * m2)

        score = 2. * (intersection.sum(1) + smooth) / (m1.sum(1) + m2.sum(1) + smooth)
        score = 1 - score.sum() / num
        return score


# https://github.com/pytorch/pytorch/issues/1249
def dice_coeff(pred, target):
    smooth = 1.
    num = pred.size(0)
    m1 = pred.view(num, -1)  # Flatten
    m2 = target.view(num, -1)  # Flatten
    intersection = (m1 * m2).sum()

    return (2. * intersection + smooth) / (m1.sum() + m2.sum() + smooth)

# Classifier

In [187]:
class CarvanaClassifier:
    def __init__(self, net, max_epochs):
        """
        The classifier for carvana used for training and launching predictions
        Args:
            net (nn.Module): The neural net module containing the definition of your model
            max_epochs (int): The maximum number of epochs on which the model will train
        """
        self.net = net
        self.max_epochs = max_epochs
        self.epoch_counter = 0
        self.use_cuda = torch.cuda.is_available()

    def restore_model(self, model_path):
        """
            Restore a model parameters from the one given in argument
        Args:
            model_path (str): The path to the model to restore

        """
        self.net.load_state_dict(torch.load(model_path))
        if self.use_cuda:
            self.net.cuda(CUDA_DEVICE)

    def _criterion(self, logits, labels):
        return BinaryCrossEntropyLoss2d().forward(logits, labels) + \
            SoftDiceLoss().forward(logits, labels)

    def _validate_epoch(self, valid_loader, threshold):
        losses = AverageMeter()
        dice_coeffs = AverageMeter()

        it_count = len(valid_loader)
        batch_size = valid_loader.batch_size

        images = None  # To save the last images batch
        targets = None  # To save the last target batch
        preds = None  # To save the last prediction batch
        with tqdm(total=it_count, desc="Validating", leave=False) as pbar:
            for ind, (images, targets) in enumerate(valid_loader):
                if self.use_cuda:
                    images = images.cuda(CUDA_DEVICE)
                    targets = targets.cuda(CUDA_DEVICE)

                # Volatile because we are in pure inference mode
                # http://pytorch.org/docs/master/notes/autograd.html#volatile
                images = Variable(images, volatile=True)
                targets = Variable(targets, volatile=True)

                logits = self.net(images)
                probs = F.sigmoid(logits)
                preds = (probs > threshold).float()

                loss = self._criterion(logits, targets)
                acc = dice_coeff(preds, targets)
                losses.update(loss.data[0], batch_size)
                dice_coeffs.update(acc.data[0], batch_size)
                pbar.update(1)

        return losses.avg, dice_coeffs.avg, images, targets, preds

    def _train_epoch(self, train_loader, optimizer, threshold):
        losses = AverageMeter()
        dice_coeffs = AverageMeter()

        batch_size = train_loader.batch_size
        it_count = len(train_loader)
        with tqdm(total=it_count,
                  desc="Epochs {}/{}".format(self.epoch_counter + 1, self.max_epochs),
                  bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{remaining}{postfix}]'
                  ) as pbar:
            for ind, (inputs, target) in enumerate(train_loader):

                if self.use_cuda:
                    inputs = inputs.cuda(CUDA_DEVICE)
                    target = target.cuda(CUDA_DEVICE)
                inputs, target = Variable(inputs), Variable(target)

                logits = self.net.forward(inputs)
                probs = F.sigmoid(logits)
                pred = (probs > threshold).float()

                loss = self._criterion(logits, target)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                acc = dice_coeff(pred, target)

                losses.update(loss.data[0], batch_size)
                dice_coeffs.update(acc.data[0], batch_size)

                pbar.set_postfix(OrderedDict(loss='{0:1.5f}'.format(loss.data[0]),
                                             dice_coeff='{0:1.5f}'.format(acc.data[0])))
                pbar.update(1)
        return losses.avg, dice_coeffs.avg

    @st_time(show_func_name=False)
    def _run_epoch(self, train_loader: DataLoader, valid_loader: DataLoader,
                   optimizer, threshold=0.5, callbacks=None):
        self.net.train()

        train_loss, train_dice_coeff = self._train_epoch(train_loader, optimizer, threshold)

        self.net.eval()

        val_loss, val_dice_coeff, last_images, last_targets, last_preds = \
            self._validate_epoch(valid_loader, threshold)

        if callbacks:
            for cb in callbacks:
                cb(step_name="epoch",
                   net=self.net,
                   last_val_batch=(last_images, last_targets, last_preds),
                   epoch_id=self.epoch_counter + 1,
                   train_loss=train_loss, train_dice_coeff=train_dice_coeff,
                   val_loss=val_loss, val_dice_coeff=val_dice_coeff
                   )
        print("train_loss = {:03f}, train_dice_coeff = {:03f}\n"
              "val_loss   = {:03f}, val_dice_coeff   = {:03f}"
              .format(train_loss, train_dice_coeff, val_loss, val_dice_coeff))
        self.epoch_counter += 1

    def train(self, train_loader: DataLoader, valid_loader: DataLoader,
              optimizer, epochs, threshold=0.5, callbacks=None):
        """
            Trains the neural net
        Args:
            train_loader (DataLoader): The Dataloader for training
            valid_loader (DataLoader): The Dataloader for validation
            optimizer (Optimizer): The nn optimizer
            epochs (int): number of epochs
            threshold (float): The threshold used to consider the mask present or not
            callbacks (list): List of callbacks functions to call at each epoch
        Returns:
            str, None: The path where the model was saved, or None if it wasn't saved
        """
        if self.use_cuda:
            self.net.cuda(CUDA_DEVICE)

        for epoch in range(epochs):
            self._run_epoch(train_loader, valid_loader, optimizer, threshold, callbacks)

        if callbacks:
            for cb in callbacks:
                cb(step_name="train",
                   net=self.net,
                   epoch_id=self.epoch_counter + 1,
                   )

    def predict(self, test_loader, callbacks=None):
        """
            Launch the prediction on the given loader and pass
            each predictions to the given callbacks.
        Args:
            test_loader (DataLoader): The loader containing the test dataset
            callbacks (list): List of callbacks functions to call at prediction pass
        """
        self.net.eval()

        it_count = len(test_loader)

        with tqdm(total=it_count, desc="Classifying") as pbar:
            for ind, (images, files_name) in enumerate(test_loader):
                if self.use_cuda:
                    images = images.cuda(CUDA_DEVICE)

                images = Variable(images, volatile=True)

                logits = self.net(images)
                probs = F.sigmoid(logits)
                probs = probs.data.cpu().numpy()

                if callbacks:
                    for cb in callbacks:
                        cb(step_name="predict",
                           net=self.net,
                           probs=probs,
                           files_name=files_name
                           )

                pbar.update(1)

# Unet1024

In [188]:
BN_EPS = 1e-4


class ConvBnRelu2d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=(3, 3), padding=1):
        super(ConvBnRelu2d, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=padding, bias=False)
        self.bn = nn.BatchNorm2d(out_channels, eps=BN_EPS)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)
        return x


class StackEncoder1024(nn.Module):
    def __init__(self, x_channels, y_channels, kernel_size=(3, 3)):
        super(StackEncoder1024, self).__init__()
        padding = (kernel_size - 1) // 2
        self.encode = nn.Sequential(
            ConvBnRelu2d(x_channels, y_channels, kernel_size=kernel_size, padding=padding),
            ConvBnRelu2d(y_channels, y_channels, kernel_size=kernel_size, padding=padding),
        )

    def forward(self, x):
        x = self.encode(x)
        x_small = F.max_pool2d(x, kernel_size=2, stride=2)
        return x, x_small


class StackDecoder1024(nn.Module):
    def __init__(self, x_big_channels, x_channels, y_channels, kernel_size=3):
        super(StackDecoder1024, self).__init__()
        padding = (kernel_size - 1) // 2

        self.decode = nn.Sequential(
            ConvBnRelu2d(x_big_channels + x_channels, y_channels, kernel_size=kernel_size, padding=padding),
            ConvBnRelu2d(y_channels, y_channels, kernel_size=kernel_size, padding=padding),
            ConvBnRelu2d(y_channels, y_channels, kernel_size=kernel_size, padding=padding),
        )

    def forward(self, x, down_tensor):
        _, channels, height, width = down_tensor.size()
        x = F.upsample(x, size=(height, width), mode='bilinear')
        x = torch.cat([x, down_tensor], 1)
        x = self.decode(x)
        return x


# 1024x1024
class UNet1024(nn.Module):
    def __init__(self, in_shape):
        super(UNet1024, self).__init__()
        channels, height, width = in_shape

        # 1024
        self.down1 = StackEncoder1024(channels, 24, kernel_size=3)  # 512
        self.down2 = StackEncoder1024(24, 64, kernel_size=3)  # 256
        self.down3 = StackEncoder1024(64, 128, kernel_size=3)  # 128
        self.down4 = StackEncoder1024(128, 256, kernel_size=3)  # 64
        self.down5 = StackEncoder1024(256, 512, kernel_size=3)  # 32
        self.down6 = StackEncoder1024(512, 768, kernel_size=3)  # 16

        self.center = nn.Sequential(
            ConvBnRelu2d(768, 768, kernel_size=3, padding=1),
        )

        # 8
        self.up6 = StackDecoder1024(768, 768, 512, kernel_size=3)  # 16
        self.up5 = StackDecoder1024(512, 512, 256, kernel_size=3)  # 32
        self.up4 = StackDecoder1024(256, 256, 128, kernel_size=3)  # 64
        self.up3 = StackDecoder1024(128, 128, 64, kernel_size=3)  # 128
        self.up2 = StackDecoder1024(64, 64, 24, kernel_size=3)  # 256
        self.up1 = StackDecoder1024(24, 24, 24, kernel_size=3)  # 512
        self.classify = nn.Conv2d(24, 1, kernel_size=1, bias=True)

    def forward(self, x):
        out = x
        down1, out = self.down1(out)
        down2, out = self.down2(out)
        down3, out = self.down3(out)
        down4, out = self.down4(out)
        down5, out = self.down5(out)
        down6, out = self.down6(out)

        out = self.center(out)
        out = self.up6(out, down6)
        out = self.up5(out, down5)
        out = self.up4(out, down4)
        out = self.up3(out, down3)
        out = self.up2(out, down2)
        out = self.up1(out, down1)

        out = self.classify(out)
        out = torch.squeeze(out, dim=1)
        return out

# Unet Original

In [189]:
class ConvBnRelu(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, padding, stride):
        super(ConvBnRelu, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=padding, stride=stride)
        self.bn = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)
        return x


class StackEncoder(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(StackEncoder, self).__init__()
        self.convr1 = ConvBnRelu(in_channels, out_channels, kernel_size=(3, 3), stride=1, padding=0)
        self.convr2 = ConvBnRelu(out_channels, out_channels, kernel_size=(3, 3), stride=1, padding=0)
        self.maxPool = nn.MaxPool2d(kernel_size=(2, 2), stride=2)

    def forward(self, x):
        x = self.convr1(x)
        x = self.convr2(x)
        x_trace = x
        x = self.maxPool(x)
        return x, x_trace


class StackDecoder(nn.Module):
    def __init__(self, in_channels, out_channels, upsample_size):
        super(StackDecoder, self).__init__()

        self.upSample = nn.Upsample(size=upsample_size, scale_factor=(2, 2), mode="bilinear")
        self.convr1 = ConvBnRelu(in_channels, out_channels, kernel_size=(3, 3), stride=1, padding=0)
        self.convr2 = ConvBnRelu(in_channels, out_channels, kernel_size=(3, 3), stride=1, padding=0)

    def _crop_concat(self, upsampled, bypass):
        """
         Crop y to the (h, w) of x and concat them.
         Used for the expansive path.
        Returns:
            The concatenated tensor
        """
        c = (bypass.size()[2] - upsampled.size()[2]) // 2
        bypass = F.pad(bypass, (-c, -c, -c, -c))

        return torch.cat((upsampled, bypass), 1)

    def forward(self, x, down_tensor):
        x = self.upSample(x)
        x = self.convr1(x)
        x = self._crop_concat(x, down_tensor)
        x = self.convr2(x)
        return x


class UNetOriginal(nn.Module):
    def __init__(self, in_shape):
        super(UNetOriginal, self).__init__()
        channels, height, width = in_shape

        self.down1 = StackEncoder(channels, 64)
        self.down2 = StackEncoder(64, 128)
        self.down3 = StackEncoder(128, 256)
        self.down4 = StackEncoder(256, 512)

        self.center = nn.Sequential(
            ConvBnRelu(512, 1024, kernel_size=(3, 3), stride=1, padding=0),
            ConvBnRelu(1024, 1024, kernel_size=(3, 3), stride=1, padding=0)
        )

        self.up1 = StackDecoder(in_channels=1024, out_channels=512, upsample_size=(56, 56))
        self.up2 = StackDecoder(in_channels=512, out_channels=256, upsample_size=(104, 104))
        self.up3 = StackDecoder(in_channels=256, out_channels=128, upsample_size=(200, 200))
        self.up4 = StackDecoder(in_channels=128, out_channels=64, upsample_size=(392, 392))

        self.output_seg_map = nn.Conv2d(64, 1, kernel_size=(1, 1), padding=0, stride=1)

    def forward(self, x):
        x, x_trace1 = self.down1(x) 
        x, x_trace2 = self.down2(x)
        x, x_trace3 = self.down3(x)
        x, x_trace4 = self.down4(x)

        x = self.center(x)

        x = self.up1(x, x_trace4)
        x = self.up2(x, x_trace3)
        x = self.up3(x, x_trace2)
        x = self.up4(x, x_trace1)

        out = self.output_seg_map(x)
        out = torch.squeeze(out, dim=1)
        return out

 # Hyper parameters

In [205]:
# Hyperparameters
# input_img_resize = (572, 572)  # The resize size of the input images of the neural net
# output_img_resize = (388, 388)  # The resize size of the output images of the neural net
input_img_resize = (1024, 1024)
output_img_resize = (1024, 1024)
batch_size = 1
epochs = 20
threshold = 0.5
validation_size = 0.2
sample_size = None  # Put 'None' to work on full dataset or a value between 0 and 1
# -- Optional parameters
threads = 0
use_cuda = torch.cuda.is_available()
script_dir = '..'
# Training callbacks
tb_viz_cb = TensorboardVisualizerCallback('../logs/tb_viz')
tb_logs_cb = TensorboardLoggerCallback('../logs/tb_logs')
model_saver_cb = ModelSaverCallback('../output/models/model_' + get_model_timestamp(), verbose=True)

In [206]:
# Download the datasets
ds_fetcher = DatasetFetcher()
ds_fetcher.download_dataset()

# Get the path to the files for the neural net
X_train, y_train, X_valid, y_valid = ds_fetcher.get_train_files(sample_size=sample_size,
                                                                validation_size=validation_size)
full_x_test = ds_fetcher.get_test_files(sample_size)

# -- Computed parameters
# Get the original images size (assuming they are all the same size)
origin_img_size = ds_fetcher.get_image_size(X_train[0])

# Testing callbacks
pred_saver_cb = PredictionsSaverCallback('../output/submit.csv.gz', origin_img_size, threshold)

All datasets are present.


In [207]:
# -- Define our neural net architecture
net = UNet1024((3, *input_img_resize))
# net = UNetOriginal((3, *input_img_resize))
classifier = CarvanaClassifier(net, epochs)
optimizer = optim.RMSprop(net.parameters(), lr=0.0002)
# optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.99)

train_ds = TrainImageDataset(X_train, y_train, input_img_resize, output_img_resize,
                             X_transform=augment_img)
train_loader = DataLoader(train_ds, batch_size,
                          sampler=RandomSampler(train_ds),
                          num_workers=threads,
                          pin_memory=use_cuda)

valid_ds = TrainImageDataset(X_valid, y_valid, input_img_resize, output_img_resize,
                             threshold=threshold)
valid_loader = DataLoader(valid_ds, batch_size,
                          sampler=SequentialSampler(valid_ds),
                          num_workers=threads,
                          pin_memory=use_cuda)

print("Training on {} samples and validating on {} samples "
      .format(len(train_loader.dataset), len(valid_loader.dataset)))

Training on 4064 samples and validating on 1024 samples 


In [15]:
# Train the classifier
classifier.train(train_loader, valid_loader, optimizer,
                 epochs, callbacks=[tb_viz_cb, tb_logs_cb, model_saver_cb])

Epochs 1/20: 100%|██████████| 4064/4064 [00:00, loss=0.46439, dice_coeff=0.74792]
Epochs 2/20:   0%|          | 0/4064 [?]                       

train_loss = 0.413764, train_dice_coeff = 0.859783
val_loss   = 0.204594, val_dice_coeff   = 0.929871
Time elapsed = 1901s


Epochs 2/20: 100%|██████████| 4064/4064 [00:00, loss=0.07686, dice_coeff=0.98761]
Epochs 3/20:   0%|          | 0/4064 [?]                       

train_loss = 0.309429, train_dice_coeff = 0.878662
val_loss   = 0.094185, val_dice_coeff   = 0.979300
Time elapsed = 1878s


Epochs 3/20: 100%|██████████| 4064/4064 [00:00, loss=0.37151, dice_coeff=0.84923]
Epochs 4/20:   0%|          | 0/4064 [?]                       

train_loss = 0.300634, train_dice_coeff = 0.882374
val_loss   = 0.135845, val_dice_coeff   = 0.969714
Time elapsed = 1970s


Epochs 4/20: 100%|██████████| 4064/4064 [00:00, loss=0.09236, dice_coeff=0.98857]
Epochs 5/20:   0%|          | 0/4064 [?]                       

train_loss = 0.296027, train_dice_coeff = 0.883612
val_loss   = 0.096553, val_dice_coeff   = 0.980308
Time elapsed = 1866s


Epochs 5/20: 100%|██████████| 4064/4064 [00:00, loss=0.64470, dice_coeff=0.70972]
Epochs 6/20:   0%|          | 0/4064 [?]                       

train_loss = 0.293514, train_dice_coeff = 0.883937
val_loss   = 0.188762, val_dice_coeff   = 0.948449
Time elapsed = 1850s


Epochs 6/20: 100%|██████████| 4064/4064 [00:00, loss=0.06974, dice_coeff=0.98991]
Epochs 7/20:   0%|          | 0/4064 [?]                       

train_loss = 0.294005, train_dice_coeff = 0.882317
val_loss   = 0.086634, val_dice_coeff   = 0.982344
Time elapsed = 1892s


Epochs 7/20: 100%|██████████| 4064/4064 [00:00, loss=0.37855, dice_coeff=0.84020]
Epochs 8/20:   0%|          | 0/4064 [?]                       

train_loss = 0.288630, train_dice_coeff = 0.884417
val_loss   = 0.091588, val_dice_coeff   = 0.981333
Time elapsed = 1894s


Epochs 8/20: 100%|██████████| 4064/4064 [00:00, loss=0.64806, dice_coeff=0.80330]  
Epochs 9/20:   0%|          | 0/4064 [?]                       

train_loss = 0.280389, train_dice_coeff = 0.887728
val_loss   = 0.060502, val_dice_coeff   = 0.987392
Time elapsed = 2086s


Epochs 9/20: 100%|██████████| 4064/4064 [00:00, loss=0.56640, dice_coeff=0.70240]
Epochs 10/20:   0%|          | 0/4064 [?]                      

train_loss = 0.278680, train_dice_coeff = 0.887551
val_loss   = 0.103730, val_dice_coeff   = 0.975881
Time elapsed = 2233s


Epochs 10/20: 100%|██████████| 4064/4064 [00:00, loss=0.32159, dice_coeff=0.83467]
Epochs 11/20:   0%|          | 0/4064 [?]                      

train_loss = 0.273902, train_dice_coeff = 0.889257
val_loss   = 0.104832, val_dice_coeff   = 0.984185
Time elapsed = 2158s


Epochs 11/20: 100%|██████████| 4064/4064 [00:00, loss=0.35978, dice_coeff=0.79825]
Epochs 12/20:   0%|          | 0/4064 [?]                      

train_loss = 0.274221, train_dice_coeff = 0.888115
val_loss   = 0.097211, val_dice_coeff   = 0.979512
Time elapsed = 1862s


Epochs 12/20: 100%|██████████| 4064/4064 [00:00, loss=0.27385, dice_coeff=0.84521]
Epochs 13/20:   0%|          | 0/4064 [?]                      

train_loss = 0.274172, train_dice_coeff = 0.887405
val_loss   = 0.071034, val_dice_coeff   = 0.988548
Time elapsed = 1827s


Epochs 13/20: 100%|██████████| 4064/4064 [00:00, loss=0.09271, dice_coeff=0.99388]
Epochs 14/20:   0%|          | 0/4064 [?]                      

train_loss = 0.275440, train_dice_coeff = 0.885557
val_loss   = 0.087194, val_dice_coeff   = 0.989322
Time elapsed = 1835s


Epochs 14/20: 100%|██████████| 4064/4064 [00:00, loss=0.32414, dice_coeff=0.85696]
Epochs 15/20:   0%|          | 0/4064 [?]                      

train_loss = 0.267366, train_dice_coeff = 0.889260
val_loss   = 0.080708, val_dice_coeff   = 0.978500
Time elapsed = 1832s


Epochs 15/20: 100%|██████████| 4064/4064 [00:00, loss=0.32251, dice_coeff=0.85530]
Epochs 16/20:   0%|          | 0/4064 [?]                      

train_loss = 0.272494, train_dice_coeff = 0.885511
val_loss   = 0.189972, val_dice_coeff   = 0.927945
Time elapsed = 1828s


Epochs 16/20: 100%|██████████| 4064/4064 [00:00, loss=0.11866, dice_coeff=0.98563]
Epochs 17/20:   0%|          | 0/4064 [?]                      

train_loss = 0.268286, train_dice_coeff = 0.886893
val_loss   = 0.121162, val_dice_coeff   = 0.976839
Time elapsed = 1830s


Epochs 17/20: 100%|██████████| 4064/4064 [00:00, loss=0.15426, dice_coeff=0.96162]
Epochs 18/20:   0%|          | 0/4064 [?]                      

train_loss = 0.266104, train_dice_coeff = 0.887602
val_loss   = 0.094164, val_dice_coeff   = 0.986987
Time elapsed = 1838s


Epochs 18/20: 100%|██████████| 4064/4064 [00:00, loss=0.31795, dice_coeff=0.87613]
Epochs 19/20:   0%|          | 0/4064 [?]                      

train_loss = 0.263719, train_dice_coeff = 0.889251
val_loss   = 0.093368, val_dice_coeff   = 0.986791
Time elapsed = 1849s


Epochs 19/20: 100%|██████████| 4064/4064 [00:00, loss=0.06892, dice_coeff=0.97744]
Epochs 20/20:   0%|          | 0/4064 [?]                      

train_loss = 0.259693, train_dice_coeff = 0.890328
val_loss   = 0.071182, val_dice_coeff   = 0.985885
Time elapsed = 1866s


Epochs 20/20: 100%|██████████| 4064/4064 [00:00, loss=0.25377, dice_coeff=0.88411]
                                                               

train_loss = 0.257975, train_dice_coeff = 0.891184
val_loss   = 0.069470, val_dice_coeff   = 0.991247
Time elapsed = 1848s
Model saved in ../output/models/model_2018-05-22_22h16


In [208]:
classifier.restore_model('../output/models/model_2018-05-22_22h16')

In [209]:
def prnt_mask(mask, origin_image_size, name):
    image = Image.new("L", origin_image_size, (1))
    draw = ImageDraw.Draw(image)
    for i in range(origin_image_size[0]):
        for j in range(origin_image_size[1]):
            if mask[j][i]:
                draw.point((i,j), 255)
    del draw
    image.save(name + ".png", "PNG")

In [210]:
test_ds = TestImageDataset(full_x_test, input_img_resize)
test_loader = DataLoader(test_ds, batch_size,
                         sampler=SequentialSampler(test_ds),
                         num_workers=threads,
                         pin_memory=use_cuda)

# Predict & save
classifier.predict(test_loader, callbacks=[pred_saver_cb])
pred_saver_cb.close_saver()

Classifying: 100%|██████████| 1/1 [00:03<00:00,  3.31s/it]

Predictions wrote in ../output/submit.csv.gz file



