# Imports

In [7]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="1"

%reload_ext autoreload
%autoreload 2
%matplotlib inline
import sys
sys.path.append('../fastai/') #fastai version 1.0
from fastai.vision import *
from fastai.vision.learner import model_meta

sys.path.append('../models-pytorch/pretrained-models.pytorch')
import pretrainedmodels
from pretrainedmodels import *

import pandas as pd
import numpy as np
import os
import torch
import torchvision
from torchvision.models import *
from torchsummary import summary
from pathlib import Path
from functools import partial, update_wrapper
from tqdm import tqdm_notebook as tqdm
import PIL
import imageio

PATH = Path('/home/Deep_Learner/work/network/datasets/2018_dsb')
TRAIN_NAME = 'stage1_train_fixed'
TRAIN = PATH/TRAIN_NAME
MASKS_NAME = 'stage1_masks'
MASKS = PATH/MASKS_NAME
CSV_NAME = 'stage1_train_labels.csv'
TRAIN_PATHS_CSV_NAME = 'stage1_train_paths_fixed.csv'
TRAIN_PATHS_CSV = PATH/TRAIN_PATHS_CSV_NAME
LABELS = PATH/CSV_NAME
TEST_NAME = 'stage1_test'
TEST = PATH/TEST_NAME
SAMPLE = PATH/'stage1_sample_submission.csv'
SUBMISSIONS = PATH/'submissions'
SUBMISSIONS.mkdir(exist_ok=True)

nw = 8   #number of workers for data loader
torch.backends.cudnn.benchmark=True
seed = 42
np.random.seed(seed)

def batch_stats(self, funcs:Collection[Callable]=None)->Tensor:
        "Grab a batch of data and call reduction function `func` per channel"
        funcs = ifnone(funcs, [torch.mean,torch.std])
        x = self.one_batch(ds_type=DatasetType.Train, denorm=False)[0].cpu()
        return [func(channel_view(x), 1) for func in funcs]

    
vision.data.ImageDataBunch.batch_stats = batch_stats

def split_path(path:Union[str, Path])-> list:
    return os.path.normpath(path).split(os.sep)
    

sz = 512
bs = 8
epochs_freezed = 1
epochs_unfreezed = 20

#fastai defaults
tta_beta = 0.4 
tta_scale = 1.35
dropout = 0.5
wd = 0.01

#non defaults
#wd = 0.1 not better for se_resnext50

# Data

## train paths csv 

In [None]:
df = pd.read_csv(LABELS)
df.head()

ids = list(set(df.ImageId))

paths = []

for n, i in tqdm(enumerate(ids), total=len(ids)):
    p = Path(f'{TRAIN_NAME}/{i}/images/{i}.png')
    try:
        open_image(PATH/p)
        paths.append(p)
    except:
        continue

len(paths)

df_paths = pd.DataFrame(data=paths, columns=['paths'])

df_paths.to_csv(index=False, path_or_buf=TRAIN_PATHS_CSV)

## Combine single nucleus masks to one mask per image

In [8]:
def get_mask(path:Union[str,Path], size:int)->np.array:
    """
    path: path to folder with masks of single nuclei
    size: size for resizing imgs
    """
    shape = open_mask(list(path.iterdir())[0]).shape   
    mask = np.zeros(shape=(shape[1], shape[2], 1),dtype=np.uint8)
    for m in list(path.iterdir()):
        nucleus = np.array(PIL.Image.open(m), dtype=np.uint8)
        nucleus = np.expand_dims(nucleus, axis=-1)
        mask = np.maximum(mask, nucleus)
    return np.squeeze(mask)

    
def show_mask(mask:np.array):
    plt.imshow(np.squeeze(mask))

    
def save_mask(mask:np.array, path:Path, filename:str, suffix:str):
    path.mkdir(exist_ok=True)
    imageio.imwrite(f'{path}/{filename}{suffix}', mask)

ids = [os.path.splitext(os.path.split(p)[-1])[0] for p in pd.read_csv(TRAIN_PATHS_CSV).paths]

for i in tqdm(ids[:]):
    p = PATH/TRAIN_NAME/i/'masks'
    mask = get_mask(p, sz)
    save_mask(mask, MASKS, i, '.png')

HBox(children=(IntProgress(value=0, max=664), HTML(value='')))

## Databunch

In [51]:
s = SegmentationItemList.from_csv(path=PATH, csv_name=TRAIN_PATHS_CSV_NAME); s

SegmentationItemList (664 items)
Image (3, 1024, 1024),Image (3, 256, 320),Image (3, 1024, 1024),Image (3, 520, 696),Image (3, 520, 696)
Path: /home/Deep_Learner/work/network/datasets/2018_dsb

In [53]:
s = s.split_by_rand_pct(valid_pct=0.2, seed = seed)

In [58]:
def get_y_fn(x): return PATH/MASKS/os.path.split(x)[-1]
s = s.label_from_func(get_y_fn, classes=array(['background', 'nucleus']))