### кратко
Задачей в соревновании была сегментация облаков на 4 класса, причем на одном снимке могли возникать несколько облаков разного класса, маски могли пересекаться.

Первоначально выбиралась архитектура сегментатора, выбор был между PSPNet и Unet, с энкодером ResNet34, лучше себя показал Unet.
После прочтения решения Heng стало понятно, что этот вопрос стоило проработать глубже. Он использовал обе архитектуры, просто с разными энкодерами.

Были испробованы различные архитектуры энкодеров: ResNet18, ResNet34, ResNet50, ResNext50, EfficientNet-b0, EfficientNet-b2. Так как маски выглядели просто как пятна, оказалось, что лучше всего работали не очень глубокие архитектуры.

В качетсве лоссов тестились BCE,BCE-Dice, Focal-Dice, лучше всего показал себя BCE-Dice, для каждого из лоссов был выбран свой вес, также каждый класс имел свой вес согласно частоте возникновения в датасете.

Энкодером был выбран ResNet34, предобученный на Imagenet, encoder_lr = 5e-4, decoder_lr = 5e-4, scheduler = reduceOnPlateau.
Лучший результат выбирался по positive dice за эпоху( потому что для улучшения качества предсказания использовался классификатор).

В результате, как показали решения других участников, стоило пробовать InceptionNet в качестве энкодера, но по каким-то даже мне неведомым причинам я его не попробовал.

В результате хорошие места заняли решения на Unet+resnet/inceptionnet blend Pspnet+resnet/inceptionnet, все сети были получены на out-of-fold-cv.

## Imports

In [4]:
import torch
import torchvision
from segmentation_models_pytorch import Unet, PSPNet
import os
import cv2
import pdb
import time
import warnings
import random
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader, Dataset, sampler
from matplotlib import pyplot as plt
from albumentations import (HorizontalFlip, VerticalFlip, ShiftScaleRotate, Normalize, Resize, Compose, GaussNoise, RandomBrightness, RandomContrast)
from albumentations.pytorch import ToTensor
warnings.filterwarnings("ignore")
seed = 69
random.seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
np.random.seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True

## RLE-Mask utility functions

In [5]:
#https://www.kaggle.com/paulorzp/rle-functions-run-lenght-encode-decode
def mask2rle(img):
    '''
    img: numpy array, 1 -> mask, 0 -> background
    Returns run length as string formated
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def make_mask(row_id, df):
    '''Given a row index, return image_id and mask (1400, 2100, 4) from the dataframe `df`'''
    fname = df.iloc[row_id].name
    labels = df.iloc[row_id][:4]
    masks = np.zeros((1400, 2100, 4), dtype=np.float32) # float32 is V.Imp
    # 4:class 1～4 (ch:0～3)

    for idx, label in enumerate(labels.values):
        if label is not np.nan:
            label = label.split(" ")
            positions = map(int, label[0::2])
            length = map(int, label[1::2])
            mask = np.zeros(1400 * 2100, dtype=np.uint8)
            for pos, le in zip(positions, length):
                mask[pos:(pos + le)] = 1
            masks[:, :, idx] = mask.reshape(1400, 2100, order='F')
    return fname, masks

## Dataloader

In [6]:
class CloudDataset(Dataset):
    def __init__(self, df, data_folder, mean, std, phase):
        self.df = df
        self.root = data_folder
        self.mean = mean
        self.std = std
        self.phase = phase
        self.transforms = get_transforms(phase, mean, std)
        self.fnames = self.df.index.tolist()

    def __getitem__(self, idx):
        image_id, mask = make_mask(idx, self.df)
        image_path = os.path.join(self.root, "train_images",  image_id)
        img = cv2.imread(image_path)
        augmented = self.transforms(image=img, mask=mask)
        img = augmented['image']
        mask = augmented['mask'] 
        mask = mask[0].permute(2, 0, 1) 
        return img, mask

    def __len__(self):
        return len(self.fnames)


def get_transforms(phase, mean, std):
    list_transforms = []
    if phase == "train":
        list_transforms.extend(
            [   
                
                HorizontalFlip(p=0.5),
                VerticalFlip(p=0.5),
                RandomBrightness(),
                RandomContrast(),
                GaussNoise()
            ]
        )
    list_transforms.extend(
        [
            Resize(512,768),
            Normalize(mean=mean, std=std, p=1),
            ToTensor(),
        ]
    )
    list_trfms = Compose(list_transforms)
    return list_trfms

def provider(
    data_folder,
    df_path,
    phase,
    mean=None,
    std=None,
    batch_size=8,
    num_workers=4,
):
    '''Returns dataloader for the model training'''
    df = pd.read_csv(df_path)
    # https://www.kaggle.com/amanooo/defect-detection-starter-u-net
    df['Image'], df['Label'] = df['Image_Label'].str.split('_').str
    df = df.pivot(index='Image',columns='Label',values='EncodedPixels')
    df['defects'] = df.count(axis=1)
    
    train_df, val_df = train_test_split(df, test_size=0.2, stratify=df["defects"], random_state=69)#
    df = train_df if phase == "train" else val_df
    image_dataset = CloudDataset(df, data_folder, mean, std, phase)
    dataloader = DataLoader(
        image_dataset,
        batch_size=batch_size,
        num_workers=num_workers,
        pin_memory=True,
        shuffle=True,   
    )

    return dataloader


## Some more utility functions

Dice and IoU metric implementations, metric logger for training and validation.

In [8]:
def metric(probability, truth, threshold=0.5, reduction='none'):
    '''Calculates dice of positive and negative images seperately'''
    '''probability and truth must be torch tensors'''
    batch_size = len(truth)
    with torch.no_grad():
        probability = probability.view(batch_size, -1)
        truth = truth.view(batch_size, -1)
        assert(probability.shape == truth.shape)

        p = (probability > threshold).float()
        t = (truth > 0.5).float()

        t_sum = t.sum(-1)
        p_sum = p.sum(-1)
        neg_index = torch.nonzero(t_sum == 0)
        pos_index = torch.nonzero(t_sum >= 1)

        dice_neg = (p_sum == 0).float()
        dice_pos = 2 * (p*t).sum(-1)/((p+t).sum(-1))

        dice_neg = dice_neg[neg_index]
        dice_pos = dice_pos[pos_index]
        dice = torch.cat([dice_pos, dice_neg])

        dice_neg = np.nan_to_num(dice_neg.mean().item(), 0)
        dice_pos = np.nan_to_num(dice_pos.mean().item(), 0)
        dice = dice.mean().item()

        num_neg = len(neg_index)
        num_pos = len(pos_index)

    return dice, dice_neg, dice_pos, num_neg, num_pos

class Meter:
    '''A meter to keep track of iou and dice scores throughout an epoch'''
    def __init__(self, phase, epoch):
        self.base_threshold = 0.5 # <<<<<<<<<<< here's the threshold
        self.base_dice_scores = []
        self.dice_neg_scores = []
        self.dice_pos_scores = []

    def update(self, targets, outputs):
        probs = torch.sigmoid(outputs)
        dice, dice_neg, dice_pos, _, _ = metric(probs, targets, self.base_threshold)
        self.base_dice_scores.append(dice)
        self.dice_pos_scores.append(dice_pos)
        self.dice_neg_scores.append(dice_neg)
        preds = predict(probs, self.base_threshold)

    def get_metrics(self):
        dice = np.mean(self.base_dice_scores)
        dice_neg = np.mean(self.dice_neg_scores)
        dice_pos = np.mean(self.dice_pos_scores)
        dices = [dice, dice_neg, dice_pos]
        return dices

def epoch_log(phase, epoch, epoch_loss, meter, start):
    '''logging the metrics at the end of an epoch'''
    dices = meter.get_metrics()
    dice, dice_neg, dice_pos = dices
    print("Loss: %0.4f | dice: %0.4f | dice_neg: %0.4f | dice_pos: %0.4f" % (epoch_loss, dice, dice_neg, dice_pos))
    return dice, dice_pos

In [9]:
import torch
from torch import nn
from torch.nn import functional as F

def diceloss(input, target, eps=1e-7):
    if type(target) is not torch.long:
        target = target.long()
    true_1_hot = torch.eye(2)[target.squeeze(1)].to(input.device) # B x C x H x W x 2
    true_1_hot = true_1_hot.permute(0, 4, 1, 2, 3).float() # B x 2 x C x H x W
    true_1_hot_f = true_1_hot[:, 0:1, ...] # B x 1 x C x H x W, falses
    true_1_hot_s = true_1_hot[:, 1:2, ...] # B x 1 x C x H x W, true
    true_1_hot = torch.cat([true_1_hot_s, true_1_hot_f], dim=1)
    pos_prob = torch.sigmoid(input)
    neg_prob = 1. - pos_prob
    probas = torch.cat([pos_prob[:, None, ...], neg_prob[:, None, ...]], dim=1)
    intersection = torch.sum(probas * true_1_hot, dim=(3, 4))
    cardinality = torch.sum(probas + true_1_hot, dim=(3, 4))
    dice_loss = (2. * intersection / (cardinality + eps)).mean(dim=(0, 1))
    return (1 - dice_loss)

class FocalLoss(nn.Module):
    def __init__(self, gamma):
        super().__init__()
        self.gamma = gamma
        
    def forward(self, input, target):
        # Inspired by the implementation of binary_cross_entropy_with_logits
        batch_size, num_classes = input.shape[0], input.shape[1]
        
        if not (target.size() == input.size()):
            raise ValueError("Target size ({}) must be the same as input size ({})".format(target.size(), input.size()))
        
        input, target = input.reshape((batch_size, num_classes, -1)), target.reshape((batch_size, num_classes, -1))
        
        max_val = (-input).clamp(min=0)
        loss = input - input * target + max_val + ((-max_val).exp() + (-input - max_val).exp()).log()

        # This formula gives us the log sigmoid of 1-p if y is 0 and of p if y is 1
        invprobs = F.logsigmoid(-input * (target * 2 - 1))
        loss = (invprobs * self.gamma).exp() * loss
        return loss.mean(dim=(0, 2))
    
class DiceLoss(nn.Module):
    
    def __init__(self, eps=1e-7):
        super().__init__()
        self.eps = eps
        
    def forward(self, input, target):
        if not (target.size() == input.size()):
            raise ValueError("Target size ({}) must be the same as input size ({})".format(target.size(), input.size()))
        dice_loss_class = diceloss(input, target, eps=self.eps)
        return dice_loss_class
    
class BCELoss(nn.Module):
    
    def __init__(self):
        super(BCELoss, self).__init__()
        
    def forward(self, input, target):
        batch_size, num_classes = input.shape[0], input.shape[1]
        if not (target.size() == input.size()):
            raise ValueError("Target size ({}) must be the same as input size ({})".format(target.size(), input.size()))
        
        input, target = input.reshape((batch_size, num_classes, -1)), target.reshape((batch_size, num_classes, -1))
        
        loss =  F.binary_cross_entropy_with_logits(input, target, reduce=False)
        return loss.mean(dim=(0, 2))
    
    
class BCEDiceLoss(nn.Module):
    
    def __init__(self, weight, eps):
        super().__init__()
        if not isinstance(weight, torch.Tensor):
            weight = torch.tensor(weight)
        self.register_buffer('weight', weight)
        self.bce = BCELoss()
        self.dice = DiceLoss(eps=eps)
        
    def forward(self, input, target):
        if input.device != self.weight.device:
            self.weight = self.weight.to(input.device)
        bce_loss = self.bce(input, target)
        dice_loss = self.dice(input, target)
        return self.weight[0] * bce_loss + self.weight[1] * dice_loss
    
class FocalDiceLoss(nn.Module):
    
    def __init__(self, weight, gamma, eps):
        super().__init__()
        if not isinstance(weight, torch.Tensor):
            weight = torch.tensor(weight)
        self.register_buffer('weight', weight)
        self.focal = FocalLoss(gamma)
        self.dice = DiceLoss(eps=eps)
        
    def forward(self, input, target):
        focal_loss = self.focal(input, target)
        dice_loss = self.dice(input, target)
        return self.weight[0] * focal_loss + self.weight[1] * dice_loss
    
losses_type = {
    'bce': BCELoss,
    'dice': DiceLoss,
    'focal': FocalLoss,
    'bce_dice': BCEDiceLoss,
    'focal_dice': FocalDiceLoss
}

class WeightedLoss(nn.Module):
    
    def __init__(self, weight, loss_type, loss_params):
        super().__init__()
        if not isinstance(weight, torch.Tensor):
            weight = torch.tensor(weight)
        self.register_buffer('weight', weight)
        self.loss = losses_type[loss_type](**loss_params)
        
    def forward(self, input, target):
        weight = self.weight
        if input.device != weight.device:
            weight = weight.to(input.device)
        loss_per_class = self.loss(input, target)
        return (weight * loss_per_class).sum()

## Model Initialization

In [12]:
model = Unet("resnet34", encoder_weights='loaded', classes=4, activation=None,attention_type='scse',center=True)

In [15]:
model # a *deeper* look

Unet(
  (encoder): ResNetEncoder(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_s

### Training and Validation

In [21]:
class Trainer(object):
    '''This class takes care of training and validation of our model'''
    def __init__(self, model):
        self.num_workers = 6
        self.batch_size = {"train": 4, "val": 4}
        self.accumulation_steps = 8 // self.batch_size['train']
        self.lr = 5e-4
        self.num_epochs = 30
        self.best_loss = float("inf")
        self.best_dice = float(0)
        self.phases = ["train", "val"]
        self.device = torch.device("cuda:1")
        torch.set_default_tensor_type("torch.cuda.FloatTensor")
        self.net = model
        self.criterion = WeightedLoss([0.25, 0.32, 0.23, 0.2], 'bce_dice',{'weight':torch.tensor([2.7,0.7], device=self.device),
                                                                          'eps':1e-7})
        self.optimizer = optim.Adam([
                                        {"params": model.encoder.parameters(), "lr": 5e-4},
                                        {"params": model.decoder.parameters(), "lr": 5e-3}
                                    ], lr=self.lr)
        self.scheduler = ReduceLROnPlateau(self.optimizer, mode="min", patience=3, verbose=True)
        self.net = self.net.to(self.device)
        cudnn.benchmark = True
        self.dataloaders = {
            phase: provider(
                data_folder=data_folder,
                df_path=train_df_path,
                phase=phase,
                mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225),
                batch_size=self.batch_size[phase],
                num_workers=self.num_workers,
            )
            for phase in self.phases
        }
        self.losses = {phase: [] for phase in self.phases}
        self.dice_scores = {phase: [] for phase in self.phases}
        
    def forward(self, images, targets):
        images = images.to(self.device)
        masks = targets.to(self.device)
        outputs = self.net(images)
        loss = self.criterion(outputs, masks)
        return loss, outputs

    def iterate(self, epoch, phase):
        meter = Meter(phase, epoch)
        start = time.strftime("%H:%M:%S")
        print(f"Starting epoch: {epoch} | phase: {phase} | ⏰: {start}")
        batch_size = self.batch_size[phase]
        self.net.train(phase == "train")
        dataloader = self.dataloaders[phase]
        running_loss = 0.0
        total_batches = len(dataloader)
#         tk0 = tqdm(dataloader, total=total_batches)
        self.optimizer.zero_grad()
        for itr, batch in enumerate(dataloader): # replace `dataloader` with `tk0` for tqdm
            images, targets = batch
            loss, outputs = self.forward(images, targets)
            loss = loss / self.accumulation_steps
            if phase == "train":
                loss.backward()
                if (itr + 1 ) % self.accumulation_steps == 0:
                    self.optimizer.step()
                    self.optimizer.zero_grad()
            running_loss += loss.item()
            outputs = outputs.detach().cpu()
            meter.update(targets, outputs)
#             tk0.set_postfix(loss=(running_loss / ((itr + 1))))
        epoch_loss = (running_loss * self.accumulation_steps) / total_batches
        dice, dice_pos = epoch_log(phase, epoch, epoch_loss, meter, start)
        self.losses[phase].append(epoch_loss)
        self.dice_scores[phase].append(dice)
        torch.cuda.empty_cache()
        return epoch_loss, dice

    def start(self):
        for epoch in range(self.num_epochs):
            self.iterate(epoch, "train")
            state = {
                "epoch": epoch,
                "best_loss": self.best_loss,
                "state_dict": self.net.state_dict(),
                "optimizer": self.optimizer.state_dict(),
                'best_dice': self.best_dice
            }
            with torch.no_grad():
                val_loss, dice = self.iterate(epoch, "val")
                self.scheduler.step(val_loss)
            if dice > self.best_dice:
                print("******** New optimal found, saving state ********")
                state["best_dice"] = self.best_dice = dice
                torch.save(state, "./model/Unet34_bcedice_enc_dec_all_pic.pth")
            print()


In [22]:
sample_submission_path = '/var/home/a.kulikov/clouds/data/sample_submission.csv'
train_df_path = '/var/home/a.kulikov/clouds/data/train.csv'
data_folder = "/var/home/a.kulikov/clouds/data/"
test_data_folder = "/var/home/a.kulikov/clouds/data/test_images"

In [23]:
from torch.autograd import Variable

In [22]:
#resnet_34_aug_30epochs+bce_dice_loss
model_trainer = Trainer(model)
model_trainer.start()

Starting epoch: 0 | phase: train | ⏰: 13:55:32
Loss: 1.3051 | dice: 0.2752 | dice_neg: 0.0000 | dice_pos: 0.2752
Starting epoch: 0 | phase: val | ⏰: 14:08:16
Loss: 1.1544 | dice: 0.3950 | dice_neg: 0.0000 | dice_pos: 0.3950
******** New optimal found, saving state ********

Starting epoch: 1 | phase: train | ⏰: 14:09:58
Loss: 1.1764 | dice: 0.4118 | dice_neg: 0.0000 | dice_pos: 0.4118
Starting epoch: 1 | phase: val | ⏰: 14:22:38
Loss: 1.1429 | dice: 0.4750 | dice_neg: 0.0000 | dice_pos: 0.4750
******** New optimal found, saving state ********

Starting epoch: 2 | phase: train | ⏰: 14:24:19
Loss: 1.1432 | dice: 0.4424 | dice_neg: 0.0000 | dice_pos: 0.4424
Starting epoch: 2 | phase: val | ⏰: 14:37:00
Loss: 1.0805 | dice: 0.4794 | dice_neg: 0.0000 | dice_pos: 0.4794
******** New optimal found, saving state ********

Starting epoch: 3 | phase: train | ⏰: 14:38:39
Loss: 1.1233 | dice: 0.4548 | dice_neg: 0.0000 | dice_pos: 0.4548
Starting epoch: 3 | phase: val | ⏰: 14:51:13
Loss: 1.0624 | di