### кратко
Первоначально в соревновании стояла задача сегментации, для улучшения модели было решено обучить классификатор, который отсекал бы те снимки, на которых нет объектов искомых классов. Для этого из датасета получались макси, после чего сводились к метке 1 или 0 в зависимости от того, есть ли пиксели со значением 1 в масках. 

Были испробованы различные архитектуры: ResNet18, ResNet34, ResNet50, ResNext50, EfficientNet-b0, EfficientNet-b2. Так как маски выглядели просто как пятна, оказалось, что лучше всего работали не очень глубокие архитектуры.

В качестве лоссов тестились BCE,BCE-Dice, Focal-Dice, лучше всего показал себя BCE.
Был выбран ResNet34, предобученный на Imagenet, lr = 5e-3, scheduler = reduceOnPlateau.
Лучший результат выбирался по validation accuracy за эпоху.

## Imports

In [4]:
import torch
import torchvision
import os
import cv2
import pdb
import time
import warnings
import random
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader, Dataset, sampler
from matplotlib import pyplot as plt
from albumentations import (HorizontalFlip, VerticalFlip, ShiftScaleRotate, Normalize, Resize, Compose, GaussNoise, RandomBrightness, RandomContrast)
from albumentations.pytorch import ToTensor
warnings.filterwarnings("ignore")
seed = 69
random.seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
np.random.seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True

## RLE-Mask utility functions

In [5]:
#https://www.kaggle.com/paulorzp/rle-functions-run-lenght-encode-decode
def mask2rle(img):
    '''
    img: numpy array, 1 -> mask, 0 -> background
    Returns run length as string formated
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def make_mask(row_id, df):
    '''Given a row index, return image_id and mask (1400, 2100, 4) from the dataframe `df`'''
    fname = df.iloc[row_id].name
    labels = df.iloc[row_id][:4]
    masks = np.zeros((1400, 2100, 4), dtype=np.float32) # float32 is V.Imp
    # 4:class 1～4 (ch:0～3)

    for idx, label in enumerate(labels.values):
        if label is not np.nan:
            label = label.split(" ")
            positions = map(int, label[0::2])
            length = map(int, label[1::2])
            mask = np.zeros(1400 * 2100, dtype=np.uint8)
            for pos, le in zip(positions, length):
                mask[pos:(pos + le)] = 1
            masks[:, :, idx] = mask.reshape(1400, 2100, order='F')
    return fname, masks

## Dataloader

In [6]:
class CloudDataset(Dataset):
    def __init__(self, df, data_folder, mean, std, phase):
        self.df = df
        self.root = data_folder
        self.mean = mean
        self.std = std
        self.phase = phase
        self.transforms = get_transforms(phase, mean, std)
        self.fnames = self.df.index.tolist()

    def __getitem__(self, idx):
#         print(idx)
        image_id, mask = make_mask(idx, self.df)
        image_path = os.path.join(self.root, "train_images",  image_id)
        img = cv2.imread(image_path)
        augmented = self.transforms(image=img, mask=mask)
        img = augmented['image']
        mask = augmented['mask'] # 1x256x1600x4
        mask = mask[0].permute(2, 0, 1) # 1x4x256x1600
        return img, mask

    def __len__(self):
        return len(self.fnames)


def get_transforms(phase, mean, std):
    list_transforms = []
    if phase == "train":
        list_transforms.extend(
            [   
                
                HorizontalFlip(p=0.5),
                VerticalFlip(p=0.5),
                RandomBrightness(),
                RandomContrast(),
                GaussNoise()
            ]
        )
    list_transforms.extend(
        [
            Resize(512,768),
            Normalize(mean=mean, std=std, p=1),
            ToTensor(),
        ]
    )
    list_trfms = Compose(list_transforms)
    return list_trfms

def provider(
    data_folder,
    df_path,
    phase,
    mean=None,
    std=None,
    batch_size=8,
    num_workers=4,
):
    '''Returns dataloader for the model training'''
    df = pd.read_csv(df_path)
    # https://www.kaggle.com/amanooo/defect-detection-starter-u-net
    df['Image'], df['Label'] = df['Image_Label'].str.split('_').str
    df = df.pivot(index='Image',columns='Label',values='EncodedPixels')
    df['defects'] = df.count(axis=1)
    
    train_df, val_df = train_test_split(df, test_size=0.1, stratify=df["defects"], random_state=69)#
    df = train_df if phase == "train" else val_df
    image_dataset = CloudDataset(df, data_folder, mean, std, phase)
    dataloader = DataLoader(
        image_dataset,
        batch_size=batch_size,
        num_workers=num_workers,
        pin_memory=True,
        shuffle=True,   
    )

    return dataloader


## Model Initialization

In [11]:
from torchvision import models

In [12]:
model = models.resnet34( )

In [13]:
PATH = '/var/home/a.kulikov/clouds/model/resnet34-333f7ec4.pth'
model.load_state_dict(torch.load(PATH))

IncompatibleKeys(missing_keys=[], unexpected_keys=[])

In [14]:
in_feats = model.fc.in_features

In [15]:
model.fc

Linear(in_features=512, out_features=1000, bias=True)

In [16]:
model.fc = torch.nn.Linear(in_features=in_feats,out_features=4,bias = True)

In [17]:
for param in model.layer1.parameters():
    param.requires_grad = False

In [18]:
for param in model.layer2.parameters():
    param.requires_grad = False

In [22]:
model # a *deeper* look

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Co

### Training and Validation

In [25]:
def calc_accuracy(input,target,threshold):
    input = torch.sigmoid(input)
    correct_preds_count = int(((input>threshold).reshape(-1).long()==target.reshape(-1).long()).sum())
    return correct_preds_count/(target.shape[0]*target.shape[1]), correct_preds_count

In [30]:
class Trainer(object):
    '''This class takes care of training and validation of our model'''
    def __init__(self, model):
        self.num_workers = 6
        self.batch_size = {"train": 8, "val": 8}
        self.accumulation_steps = 32 // self.batch_size['train']
        self.lr = 5e-3
        self.num_epochs = 30
        self.best_loss = float("inf")
        self.best_accuracy = float(0)
        self.phases = ["train", "val"]
        self.device = torch.device("cuda:0")
        torch.set_default_tensor_type("torch.cuda.FloatTensor")
        self.net = model
        self.criterion = torch.nn.BCEWithLogitsLoss()
        self.optimizer = optim.Adam(self.net.parameters(), lr=self.lr)
        self.scheduler = ReduceLROnPlateau(self.optimizer, mode="min", patience=3, verbose=True)
        self.net = self.net.to(self.device)
        cudnn.benchmark = True
        self.dataloaders = {
            phase: provider(
                data_folder=data_folder,
                df_path=train_df_path,
                phase=phase,
                mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225),
                batch_size=self.batch_size[phase],
                num_workers=self.num_workers,
            )
            for phase in self.phases
        }
        self.losses = {phase: [] for phase in self.phases}
        self.iou_scores = {phase: [] for phase in self.phases}
        self.dice_scores = {phase: [] for phase in self.phases}
        
    def forward(self, images, targets):
        images = images.to(self.device)
        targets = targets.to(self.device)
        outputs = self.net(images).float()
        loss = self.criterion(outputs, targets)
        
        return loss, outputs

    def iterate(self, epoch, phase):
        meter = Meter(phase, epoch)
        start = time.strftime("%H:%M:%S")
        print(f"Starting epoch: {epoch} | phase: {phase} | ⏰: {start}")
        batch_size = self.batch_size[phase]
        self.net.train(phase == "train")
        dataloader = self.dataloaders[phase]
        running_loss = 0.0
        total_batches = len(dataloader)
#         tk0 = tqdm(dataloader, total=total_batches)
        self.optimizer.zero_grad()
        accuracy_accum = 0
        correct_answ_accum = 0
        for itr, batch in enumerate(dataloader): # replace `dataloader` with `tk0` for tqdm
            images, targets = batch
            #made classification targets from masks
            targets = (targets.sum(dim=[2,3])>0).float()
            loss, outputs = self.forward(images, targets)
            loss = loss / self.accumulation_steps
            batch_accuracy, correct_answ = calc_accuracy(outputs,targets.to(self.device).long(),0.5)
            accuracy_accum += batch_accuracy
            correct_answ_accum += correct_answ
            if phase == "train":
                loss.backward()
                if (itr + 1 ) % self.accumulation_steps == 0:
                    self.optimizer.step()
                    self.optimizer.zero_grad()
            running_loss += loss.item()
            outputs = outputs.detach().cpu()
#             tk0.set_postfix(loss=(running_loss / ((itr + 1))))
        epoch_loss = (running_loss * self.accumulation_steps) / total_batches
        epoch_accuracy = correct_answ_accum/(total_batches*self.batch_size[phase]*4)
        print("epoch_loss: ",epoch_loss,', mean batch accuracy: ', accuracy_accum / total_batches, ' epoch accuracy: ',epoch_accuracy)
        self.losses[phase].append(epoch_loss)
        torch.cuda.empty_cache()
        return epoch_loss, epoch_accuracy

    def start(self):
        for epoch in range(self.num_epochs):
            self.iterate(epoch, "train")
            state = {
                "epoch": epoch,
                "best_loss": self.best_loss,
                "state_dict": self.net.state_dict(),
                "optimizer": self.optimizer.state_dict(),
                'best_accuracy' : self.best_accuracy
            }
            with torch.no_grad():
                val_loss, epoch_accuracy = self.iterate(epoch, "val")
                self.scheduler.step(val_loss)
            if epoch_accuracy > self.best_accuracy:
                print("******** New optimal found, saving state ********")
                state["best_accuracy"] = self.best_accuracy = epoch_accuracy
                torch.save(state, "./model/classification_model_resnet34_all_data.pth")
            print()


In [27]:
sample_submission_path = '/var/home/a.kulikov/clouds/data/sample_submission.csv'
train_df_path = '/var/home/a.kulikov/clouds/data/train.csv'
data_folder = "/var/home/a.kulikov/clouds/data/"
test_data_folder = "/var/home/a.kulikov/clouds/data/test_images"

In [28]:
from torch.autograd import Variable

In [29]:
#resnet_18_aug_40epochs+focal_loss+from_scratch
model_trainer = Trainer(model)
model_trainer.start()

Starting epoch: 0 | phase: train | ⏰: 22:15:09
epoch_loss:  0.6254506495178622 , mean batch accuracy:  0.6455078125  epoch accuracy:  0.6449908088235294
Starting epoch: 0 | phase: val | ⏰: 22:20:29
epoch_loss:  0.6213184991741881 , mean batch accuracy:  0.6861213235294118  epoch accuracy:  0.6861213235294118
******** New optimal found, saving state ********

Starting epoch: 1 | phase: train | ⏰: 22:20:57
epoch_loss:  0.5953654337345677 , mean batch accuracy:  0.6792279411764706  epoch accuracy:  0.6786534926470589
Starting epoch: 1 | phase: val | ⏰: 22:26:12
epoch_loss:  0.5972999549087357 , mean batch accuracy:  0.7136948529411765  epoch accuracy:  0.7136948529411765
******** New optimal found, saving state ********

Starting epoch: 2 | phase: train | ⏰: 22:26:39
epoch_loss:  0.5763110792921746 , mean batch accuracy:  0.6948529411764706  epoch accuracy:  0.6942210477941176
Starting epoch: 2 | phase: val | ⏰: 22:32:02
epoch_loss:  0.5448783530908472 , mean batch accuracy:  0.7380514705

epoch_loss:  0.49286108946099 , mean batch accuracy:  0.7635569852941176  epoch accuracy:  0.7635569852941176

Starting epoch: 25 | phase: train | ⏰: 01:39:15
epoch_loss:  0.4970493678770521 , mean batch accuracy:  0.7575252757352942  epoch accuracy:  0.7568933823529411
Starting epoch: 25 | phase: val | ⏰: 01:53:21
epoch_loss:  0.4856630815740894 , mean batch accuracy:  0.7637867647058824  epoch accuracy:  0.7637867647058824

Starting epoch: 26 | phase: train | ⏰: 01:54:29
epoch_loss:  0.49876191532787156 , mean batch accuracy:  0.7553423713235294  epoch accuracy:  0.7547679227941176
Starting epoch: 26 | phase: val | ⏰: 02:09:07
epoch_loss:  0.48654372486121517 , mean batch accuracy:  0.7642463235294118  epoch accuracy:  0.7642463235294118

Starting epoch: 27 | phase: train | ⏰: 02:10:16
epoch_loss:  0.4988954532343675 , mean batch accuracy:  0.7549402573529411  epoch accuracy:  0.7541360294117647
Starting epoch: 27 | phase: val | ⏰: 02:24:05
epoch_loss:  0.483316822525333 , mean batch