See https://www.kaggle.com/rishabhiitbhu/unet-starter-kernel-pytorch-lb-0-88

In [1]:
import os
import cv2
cv = cv2
import torch
import time
import numpy as np
import pandas as pd

import torch.nn as nn
from torch.optim import Adam, SGD
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import Dataset, DataLoader
from torch.backends import cudnn

from sklearn.model_selection import train_test_split

import segmentation_models_pytorch as smp

from albumentations import HorizontalFlip, Normalize, Compose
from albumentations.pytorch import ToTensor

import warnings
warnings.filterwarnings("ignore")

import random

from catalyst.dl import SupervisedRunner, MetricCallback

seed = 69
random.seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
np.random.seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True

# RLE-Mask utility functions

In [2]:
#https://www.kaggle.com/paulorzp/rle-functions-run-lenght-encode-decode
def mask2rle(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def make_mask(row_id, df):
    '''Given a row index, return image_id and mask (256, 1600, 4)'''
    fname = df.iloc[row_id].name
    labels = df.iloc[row_id][:4]
    masks = np.zeros((256, 1600, 4), dtype=np.float32) # float32 is V.Imp
    # 4:class 1～4 (ch:0～3)

    for idx, label in enumerate(labels.values):
        if label is not np.nan:
            label = label.split(" ")
            positions = map(int, label[0::2])
            length = map(int, label[1::2])
            mask = np.zeros(256 * 1600, dtype=np.uint8)
            for pos, le in zip(positions, length):
                mask[pos:(pos + le)] = 1
            masks[:, :, idx] = mask.reshape(256, 1600, order='F')
    return fname, masks



# Dataloader

In [3]:
class SteelDataset(Dataset):
    def __init__(self, df, data_folder, mean, std, phase, catalyst=False):
        self.df = df
        self.root = data_folder
        self.mean = mean
        self.std = std
        self.phase = phase
        self.transforms = get_transforms(phase, mean, std)
        self.fnames = self.df.index.tolist()
        self.catalyst = catalyst

    def __getitem__(self, idx):
        image_id, mask = make_mask(idx, self.df)
        image_path = os.path.join(self.root, "train_images",  image_id)
        img = cv2.imread(image_path)
        augmented = self.transforms(image=img, mask=mask)
        img = augmented['image']
        mask = augmented['mask'] # 1x256x1600x4
        mask = mask[0].permute(2, 0, 1) # 1x4x256x1600
        
        if self.catalyst:
            return {'targets': mask, 'features': img}
        else:
            return img, mask

    def __len__(self):
        return len(self.fnames)


def get_transforms(phase, mean, std):
    list_transforms = []
    if phase == "train":
        list_transforms.extend(
            [
                HorizontalFlip(), # only horizontal flip as of now
            ]
        )
    list_transforms.extend(
        [
            Normalize(mean=mean, std=std, p=1),
            ToTensor(),
        ]
    )
    list_trfms = Compose(list_transforms)
    return list_trfms

def provider(
    data_folder,
    df_path,
    mean=None,
    std=None,
    batch_size=8,
    num_workers=4,
    catalyst=False,
):
    '''Returns dataloader for the model training'''
    df = pd.read_csv(df_path)
    # some preprocessing
    # https://www.kaggle.com/amanooo/defect-detection-starter-u-net
    df['ImageId'], df['ClassId'] = zip(*df['ImageId_ClassId'].str.split('_'))
    df['ClassId'] = df['ClassId'].astype(int)
    df = df.pivot(index='ImageId',columns='ClassId',values='EncodedPixels')
    df['defects'] = df.count(axis=1)
    
    train_df, val_df = train_test_split(df, test_size=0.2, stratify=df["defects"])
    
    train_dataset = SteelDataset(train_df, data_folder, mean, std, 'train', catalyst=catalyst)
    train_dataloader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        num_workers=num_workers,
        pin_memory=True,
        shuffle=True,   
    )
    
    val_dataset = SteelDataset(val_df, data_folder, mean, std, 'val', catalyst=catalyst)
    val_dataloader = DataLoader(
        val_dataset,
        batch_size=batch_size,
        num_workers=num_workers,
        pin_memory=True,
        shuffle=True,   
    )

    return train_dataloader, val_dataloader

# Train

In [5]:
from tqdm import tqdm_notebook

In [6]:
sample_submission_path = './dataset/sample_submission.csv'
train_df_path = './dataset/train.csv'
data_folder = "./dataset/"
test_data_folder = "./dataset/"

In [7]:
from catalyst.dl.callbacks import metrics
from catalyst.utils import get_activation_fn

In [8]:
class DiceCallback(MetricCallback):
    """
    Dice metric callback.
    """

    def __init__(
        self,
        input_key: str = "targets",
        output_key: str = "logits",
        prefix: str = "dice",
        eps: float = 1e-7,
        threshold: float = None,
        activation: str = "Sigmoid"
    ):
        """
        :param input_key: input key to use for dice calculation;
            specifies our `y_true`.
        :param output_key: output key to use for dice calculation;
            specifies our `y_pred`.
        """
        super().__init__(
            prefix=prefix,
            metric_fn=self.dice,
            input_key=input_key,
            output_key=output_key,
            eps=eps,
            threshold=threshold,
            activation=activation
        )
        
    def dice(self, outputs, targets, eps=1e-7, threshold=0.5, activation='Sigmoid'):
        activation_fn = get_activation_fn(activation)
        outputs = activation_fn(outputs)

        if threshold is not None:
            outputs = (outputs > threshold).float()
            
        batch_size = len(targets)
        outputs = outputs.view(batch_size, -1)
        targets = targets.view(batch_size, -1)

        intersection = torch.sum(targets * outputs, dim=1)
        union = torch.sum(targets, dim=1) + torch.sum(outputs, dim=1)
        dice = (2 * intersection / (union + eps)).cpu().numpy()
        
        result = []
        for i, d in enumerate(dice):
            if d >= eps:
                result.append(d)
                continue
            
            s = torch.sum(targets[i]).cpu().numpy()
            result.append(1 if s < eps else d)

        return np.mean(result)

In [9]:
class IouCallback(MetricCallback):
    """
    IoU (Jaccard) metric callback.
    """

    def __init__(
        self,
        input_key: str = "targets",
        output_key: str = "logits",
        prefix: str = "iou",
        eps: float = 1e-7,
        threshold: float = None,
        activation: str = "Sigmoid",
    ):
        """
        Args:
            input_key (str): input key to use for iou calculation
                specifies our ``y_true``.
            output_key (str): output key to use for iou calculation;
                specifies our ``y_pred``
            prefix (str): key to store in logs
            eps (float): epsilon to avoid zero division
            threshold (float): threshold for outputs binarization
            activation (str): An torch.nn activation applied to the outputs.
                Must be one of ['none', 'Sigmoid', 'Softmax2d']
        """
        super().__init__(
            prefix=prefix,
            metric_fn=self.iou,
            input_key=input_key,
            output_key=output_key,
            eps=eps,
            threshold=threshold,
            activation=activation
        )
        
    def iou(self, outputs: torch.Tensor,
            targets: torch.Tensor,
            eps: float = 1e-7,
            threshold: float = None,
            activation: str = "Sigmoid"
           ):
        """
        Args:
            outputs (torch.Tensor): A list of predicted elements
            targets (torch.Tensor):  A list of elements that are to be predicted
            eps (float): epsilon to avoid zero division
            threshold (float): threshold for outputs binarization
            activation (str): An torch.nn activation applied to the outputs.
                Must be one of ["none", "Sigmoid", "Softmax2d"]

        Returns:
            float: IoU (Jaccard) score
        """
        activation_fn = get_activation_fn(activation)
        outputs = activation_fn(outputs)

        if threshold is not None:
            outputs = (outputs > threshold).float()
            
        batch_size = len(targets)
        outputs = outputs.view(batch_size, -1)
        targets = targets.view(batch_size, -1)

        intersection = torch.sum(targets * outputs, dim=1)
        union = torch.sum(targets, dim=1) + torch.sum(outputs, dim=1)
        iou = (intersection / (union - intersection + eps)).cpu().numpy()
        
        result = []
        for i, d in enumerate(iou):
            if d >= eps:
                result.append(d)
                continue
            
            s = torch.sum(targets[i]).cpu().numpy()
            result.append(1 if s < eps else d)

        return np.mean(result)

In [10]:
model = smp.Unet("resnet50", encoder_weights="imagenet", classes=4, activation=None)

In [11]:
logdir = "./logdir/1.1.resnet50_1e4_adam_weightdecay"
num_epochs = 50
batch_size = 8
default_batch_size = 32
lr = 1e-4

In [12]:
train_dataloader, val_dataloader =  provider(
                data_folder=data_folder,
                df_path=train_df_path,
                mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225),
                batch_size=batch_size,
                num_workers=6,
    catalyst=True
            )

loaders = {"train": train_dataloader, "valid": val_dataloader}

In [13]:
criterion = nn.BCEWithLogitsLoss()
optimizer = Adam(model.parameters(), lr=lr, weight_decay=1e-5)
# optimizer = SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=1e-5)
scheduler = ReduceLROnPlateau(optimizer, mode="min", patience=3, verbose=True)

In [14]:
runner = SupervisedRunner()

In [15]:
from catalyst.dl.callbacks import OptimizerCallback, CriterionCallback

In [None]:
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    logdir=logdir,
    num_epochs=num_epochs,
    verbose=True,
    callbacks=[
#         CriterionCallback(),
#         OptimizerCallback(accumulation_steps=32 // batch_size),
        DiceCallback(threshold=0.5),
               IouCallback(threshold=0.5),
              ],
    fp16=True,
)

Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
0/50 * Epoch (train): 100% 1257/1257 [09:42<00:00,  2.16it/s, _timers/_fps=609.870, dice=1.000, iou=1.000, loss=0.050]
0/50 * Epoch (valid): 100% 315/315 [00:41<00:00,  7.50it/s, _timers/_fps=877.171, dice=0.245, iou=0.162, loss=0.066]
[2019-09-02 15:51:05,794] 
0/50 * Epoch 0 (train): _base/lr=0.0001 | _base/mo

10/50 * Epoch (valid): 100% 315/315 [00:41<00:00,  7.55it/s, _timers/_fps=868.588, dice=1.000, iou=1.000, loss=5.933e-04]
[2019-09-02 17:36:28,360] 
10/50 * Epoch 10 (train): _base/lr=0.0001 | _base/momentum=0.9000 | _timers/_fps=499.1667 | _timers/batch_time=0.0172 | _timers/data_time=0.0011 | _timers/model_time=0.0161 | dice=0.8178 | iou=0.7539 | loss=0.0090
10/50 * Epoch 10 (valid): _base/lr=0.0001 | _base/momentum=0.9000 | _timers/_fps=632.7639 | _timers/batch_time=0.0176 | _timers/data_time=0.0032 | _timers/model_time=0.0144 | dice=0.8068 | iou=0.7409 | loss=0.0108
11/50 * Epoch (train):  51% 646/1257 [05:00<04:40,  2.18it/s, _timers/_fps=411.090, dice=0.798, iou=0.718, loss=0.004]    