In [1]:
import numpy as np
import pathlib
import os
import random
import time
import numba
import gc 
import sys
from tqdm.notebook import tqdm 

#import warnings #
#warnings.filterwarnings('ignore') #

#data structure
import pandas as pd

#graphics
import matplotlib.pyplot as plt
%matplotlib inline

#tiff file
import rasterio 
from rasterio.windows import Window 
from rasterio.enums import Resampling

#models
import torch

#data augmentation
import albumentations as A 
import torchvision
from torchvision import transforms as T

In [2]:
!mkdir -p /tmp/pip/cache/
!cp ../input/segmentationmodelspytorch/segmentation_models/efficientnet_pytorch-0.6.3.xyz /tmp/pip/cache/efficientnet_pytorch-0.6.3.tar.gz
!cp ../input/segmentationmodelspytorch/segmentation_models/pretrainedmodels-0.7.4.xyz /tmp/pip/cache/pretrainedmodels-0.7.4.tar.gz
!cp ../input/segmentationmodelspytorch/segmentation_models/segmentation-models-pytorch-0.1.2.xyz /tmp/pip/cache/segmentation_models_pytorch-0.1.2.tar.gz
!cp ../input/segmentationmodelspytorch/segmentation_models/timm-0.1.20-py3-none-any.whl /tmp/pip/cache/
!cp ../input/segmentationmodelspytorch/segmentation_models/timm-0.2.1-py3-none-any.whl /tmp/pip/cache/
!pip install --no-index --find-links /tmp/pip/cache/ efficientnet-pytorch
!pip install --no-index --find-links /tmp/pip/cache/ segmentation-models-pytorch

Looking in links: /tmp/pip/cache/
Processing /tmp/pip/cache/efficientnet_pytorch-0.6.3.tar.gz
Building wheels for collected packages: efficientnet-pytorch
  Building wheel for efficientnet-pytorch (setup.py) ... [?25l- \ done
[?25h  Created wheel for efficientnet-pytorch: filename=efficientnet_pytorch-0.6.3-py3-none-any.whl size=12419 sha256=b625036affb1d67137aa58a2d70f8364e717b338cf6416abcf8bf52c260a95b7
  Stored in directory: /root/.cache/pip/wheels/13/61/ce/bb0a5a0cbdc953125d9e5eac180085769c9298201dcb8f81b4
Successfully built efficientnet-pytorch
Installing collected packages: efficientnet-pytorch
Successfully installed efficientnet-pytorch-0.6.3
Looking in links: /tmp/pip/cache/
Processing /tmp/pip/cache/segmentation_models_pytorch-0.1.2.tar.gz
Processing /tmp/pip/cache/pretrainedmodels-0.7.4.tar.gz
Processing /tmp/pip/cache/timm-0.2.1-py3-none-any.whl
Building wheels for collected packages: segmentation-models-pytorch, pretrainedmodels
  Building wheel for segm

In [3]:
def set_seeds(seed=21):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

In [4]:
BASE_DIR = '../input/hubmap-kidney-segmentation'
SAVE_DIR = "/kaggle/working/"
SEED = 21
WINDOW = 1024
NEW_SIZE = 256
MIN_OVERLAP = 32
OVERLAP = 32 
EPOCHS = 12
BATCH_SIZE = 16
NUM_WORKERS = 4
REDUCE_RATE = 4
THRESHOLD = 50
NFOLDS = 5
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' 

torch.cuda.empty_cache()
set_seeds();

In [5]:
# used for converting the decoded image to rle mask
def mask2rle(im):
    '''
    im: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels = im.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = (np.asarray(pixels[1:] != pixels[:-1]).nonzero())[0] + 1
    runs[1::2] -= runs[::2]
    
    return ' '.join(str(x) for x in runs)

def rle2mask(mask_rle, shape=(NEW_SIZE, NEW_SIZE)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape, order='F')

#voir ce que fait cette fonction --> numba optimisé
@numba.njit()
def rle_numba(pixels):
    size = len(pixels)
    points = []
    if pixels[0] == 1: 
        points.append(0)
    flag = True
    for i in range(1, size):
        if pixels[i] != pixels[i-1]:
            if flag:
                points.append(i+1)
                flag = False
            else:
                points.append(i+1 - points[-1])
                flag = True
    if pixels[-1] == 1: 
        points.append(size-points[-1]+1)    
    return points

def rle_numba_encode(image):
    pixels = image.T.flatten()
    points = rle_numba(pixels)
    return ' '.join(str(x) for x in points)

def make_grid(shape, window=WINDOW, min_overlap=MIN_OVERLAP):
    """
        Return Array of size (N,4), where N - number of tiles,
        2nd axis represente slices: x1,x2,y1,y2 
    """
    x, y = shape
    
    nx = x // (window - min_overlap) + 1 # + 1 seulement si division reste 0?
    x1 = np.linspace(0, x, num=nx, endpoint=False, dtype=np.int64)
    x1[-1] = x - window
    x2 = (x1 + window).clip(0, x) #assert x1.size == x2.size ?
    
    ny = y // (window - min_overlap) + 1
    y1 = np.linspace(0, y, num=ny, endpoint=False, dtype=np.int64)
    y1[-1] = y - window
    y2 = (y1 + window).clip(0, y)
    
    slices = np.zeros((nx,ny, 4), dtype=np.int64)
    for i in range(nx):
        for j in range(ny):
            slices[i,j] = x1[i], x2[i], y1[j], y2[j]    
    return slices.reshape(nx*ny,4)

In [6]:
def to_tensor(x, **kwargs):
    return x.transpose(2, 0, 1).astype('float32') 

def get_preprocessing():
    _transform = [
        #A.Resize(380)
        A.Normalize(mean=(0.485, 0.456, 0.406), 
                    std=(0.229, 0.224, 0.225),
                    max_pixel_value=255.0, always_apply=True, p=1.0),
        A.Lambda(image=to_tensor, mask=to_tensor),
    ]
    return A.Compose(_transform)
#rgb, input range 0,1
#preprocessing_fn = A.Lambda(image=get_preprocessing_fn(encoder_name='efficientnet-b4',
#                                                       pretrained='imagenet'))

In [7]:
def get_training_augmentation():
    train_transform = [
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomRotate90(p=0.5),
        A.Transpose(p=0.5),
        
        A.ShiftScaleRotate(scale_limit=0.2, rotate_limit=0, shift_limit=0.2, p=0.2, border_mode=0),

        A.IAAAdditiveGaussianNoise(p=0.2),
        A.IAAPerspective(p=0.5),

        A.OneOf(
            [
                A.CLAHE(p=1),
                A.RandomBrightness(p=1),
                A.RandomGamma(p=1),
            ],
            p=0.9,
        ),

        A.OneOf(
            [
                A.IAASharpen(p=1),
                A.Blur(blur_limit=3, p=1),
                A.MotionBlur(blur_limit=3, p=1),
            ],
            p=0.9,
        ),

        A.OneOf(
            [
                A.RandomContrast(p=1),
                A.HueSaturationValue(p=1),
            ],
            p=0.9,
        ),
        
        A.Compose([
            A.VerticalFlip(p=0.5),              
            A.RandomRotate90(p=0.5)]
        )
    ]
    return A.Compose(train_transform)


In [8]:
best_model = torch.load('../input/overlap-and-inference/best_model.pth', map_location=torch.device(DEVICE))

In [9]:
best_model.eval()

Unet(
  (encoder): EfficientNetEncoder(
    (_conv_stem): Conv2dStaticSamePadding(
      3, 48, kernel_size=(3, 3), stride=(2, 2), bias=False
      (static_padding): ZeroPad2d(padding=(0, 1, 0, 1), value=0.0)
    )
    (_bn0): BatchNorm2d(48, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
    (_blocks): ModuleList(
      (0): MBConvBlock(
        (_depthwise_conv): Conv2dStaticSamePadding(
          48, 48, kernel_size=(3, 3), stride=[1, 1], groups=48, bias=False
          (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
        )
        (_bn1): BatchNorm2d(48, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
        (_se_reduce): Conv2dStaticSamePadding(
          48, 12, kernel_size=(1, 1), stride=(1, 1)
          (static_padding): Identity()
        )
        (_se_expand): Conv2dStaticSamePadding(
          12, 48, kernel_size=(1, 1), stride=(1, 1)
          (static_padding): Identity()
        )
        

In [10]:
p = pathlib.Path(BASE_DIR)
identity = rasterio.Affine(1, 0, 0, 0, 1, 0)
subm = {}
for i, filename in tqdm(enumerate(p.glob('test/*.tiff')), 
                        total = len(list(p.glob('test/*.tiff')))):
    
    print(filename) 
    temp_filename = '/tmp/resampled.tiff'
    test_image_ds = rasterio.open(filename.as_posix(), transform=identity)
    data = test_image_ds.read(
        out_shape=(
            test_image_ds.count,
            int(test_image_ds.height // REDUCE_RATE),
            int(test_image_ds.width // REDUCE_RATE)
        ),
        resampling=Resampling.bilinear
    )
    
    # scale image transform
    transform = test_image_ds.transform * test_image_ds.transform.scale(
        (test_image_ds.width / data.shape[-1]),
        (test_image_ds.height / data.shape[-2])
    )
    
    kwargs = test_image_ds.meta.copy()
    kwargs.update({
        'height': int(test_image_ds.height // REDUCE_RATE),
        'width': int(test_image_ds.width // REDUCE_RATE),
        'transform': transform})
    with rasterio.open(temp_filename, 'w', **kwargs) as dst:
        dst.write(data) 
       
    test_image_ds = rasterio.open(temp_filename, transform=identity)
    
    NEW_SHAPE = test_image_ds.shape#(test_image_ds.height//REDUCE_RATE, test_image_ds.width//REDUCE_RATE)
    slices = make_grid(NEW_SHAPE, window=256, min_overlap=MIN_OVERLAP)
    preds = np.zeros(NEW_SHAPE, dtype=np.uint8)
    weights = np.zeros(NEW_SHAPE, dtype=np.uint8)
    # save GPU quota
    #if len(list(p.glob('test/*.tiff')))==5 and i==0:
    #    subm[i] = {'id':filename.stem, 'predicted': rle_numba_encode(preds)}
    #    break
    
    for (x1,x2,y1,y2) in tqdm(slices):
        #get slice image
        image = test_image_ds.read([1,2,3],
                    window=Window.from_slices((x1,x2),(y1,y2)))
        image = np.moveaxis(image, 0, -1) 
        image = get_preprocessing()(image=image)['image']
        image = torch.from_numpy(image).to(DEVICE).unsqueeze(0)
        with torch.set_grad_enabled(False): #freezing step?
            pred = best_model(image) #.predict?
            pred = pred.squeeze().cpu().numpy().round().astype('uint8')
            
        #merge preds
        preds[x1:x2, y1:y2] += pred
        weights[x1:x2, y1:y2] += 1
    del slices
    gc.collect()
    
    preds = preds/weights
    del weights
    gc.collect()
    
    preds = (preds>0.5).astype(np.uint8)
    subm[i] = {'id':filename.stem, 'predicted': rle_numba_encode(preds)}#mask2rle(preds)}
    del preds
    gc.collect()

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

../input/hubmap-kidney-segmentation/test/afa5e8098.tiff


  s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)


HBox(children=(FloatProgress(value=0.0, max=2058.0), HTML(value='')))


../input/hubmap-kidney-segmentation/test/b9a3865fc.tiff


  s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)


HBox(children=(FloatProgress(value=0.0, max=1610.0), HTML(value='')))


../input/hubmap-kidney-segmentation/test/c68fe75ea.tiff


HBox(children=(FloatProgress(value=0.0, max=1680.0), HTML(value='')))


../input/hubmap-kidney-segmentation/test/b2dc8411c.tiff


HBox(children=(FloatProgress(value=0.0, max=595.0), HTML(value='')))


../input/hubmap-kidney-segmentation/test/26dc41664.tiff


HBox(children=(FloatProgress(value=0.0, max=2064.0), HTML(value='')))





In [11]:
submission = pd.DataFrame.from_dict(subm, orient='index')
submission.to_csv('submission.csv', index=False)
submission.head()

Unnamed: 0,id,predicted
0,afa5e8098,4131487 4 4140686 7 4149885 9 4159083 12 41682...
1,b9a3865fc,3832331 5 3840150 16 3847971 22 3855792 26 386...
2,c68fe75ea,876559 1 883269 2 889979 1 896689 1 1225517 1 ...
3,b2dc8411c,3526171 1 3529875 14 3533584 19 3537292 23 354...
4,26dc41664,15494548 2 15504088 4 15513290 24 15513628 5 1...
