In [1]:
import numpy as np
import pathlib
import os
import random
import time
import numba
import gc 
import sys
from tqdm.notebook import tqdm 
import cv2

#data structure
import pandas as pd

#tiff file
import rasterio 
from rasterio.windows import Window 
from rasterio.enums import Resampling

#models
import torch

#data augmentation
import albumentations as A 
import torchvision
from torchvision import transforms as T

In [2]:
!mkdir -p /tmp/pip/cache/
!cp ../input/segmentationmodelspytorch/segmentation_models/efficientnet_pytorch-0.6.3.xyz /tmp/pip/cache/efficientnet_pytorch-0.6.3.tar.gz
!cp ../input/segmentationmodelspytorch/segmentation_models/pretrainedmodels-0.7.4.xyz /tmp/pip/cache/pretrainedmodels-0.7.4.tar.gz
!cp ../input/segmentationmodelspytorch/segmentation_models/segmentation-models-pytorch-0.1.2.xyz /tmp/pip/cache/segmentation_models_pytorch-0.1.2.tar.gz
!cp ../input/segmentationmodelspytorch/segmentation_models/timm-0.1.20-py3-none-any.whl /tmp/pip/cache/
!cp ../input/segmentationmodelspytorch/segmentation_models/timm-0.2.1-py3-none-any.whl /tmp/pip/cache/
!pip install --no-index --find-links /tmp/pip/cache/ efficientnet-pytorch
!pip install --no-index --find-links /tmp/pip/cache/ segmentation-models-pytorch

Looking in links: /tmp/pip/cache/
Processing /tmp/pip/cache/efficientnet_pytorch-0.6.3.tar.gz
Building wheels for collected packages: efficientnet-pytorch
  Building wheel for efficientnet-pytorch (setup.py) ... [?25l- \ done
[?25h  Created wheel for efficientnet-pytorch: filename=efficientnet_pytorch-0.6.3-py3-none-any.whl size=12419 sha256=ff259bfb2eda042c465739ac59ce99a658bcd7bfa39c08abed052e8021073c2e
  Stored in directory: /root/.cache/pip/wheels/13/61/ce/bb0a5a0cbdc953125d9e5eac180085769c9298201dcb8f81b4
Successfully built efficientnet-pytorch
Installing collected packages: efficientnet-pytorch
Successfully installed efficientnet-pytorch-0.6.3
Looking in links: /tmp/pip/cache/
Processing /tmp/pip/cache/segmentation_models_pytorch-0.1.2.tar.gz
Processing /tmp/pip/cache/pretrainedmodels-0.7.4.tar.gz
Processing /tmp/pip/cache/timm-0.2.1-py3-none-any.whl
Building wheels for collected packages: segmentation-models-pytorch, pretrainedmodels
  Building wheel for segm

In [3]:
def set_seeds(seed=21):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

In [4]:
BASE_DIR = '../input/hubmap-kidney-segmentation'
SAVE_DIR = "/kaggle/working/"
SEED = 21
WINDOW = 1024
NEW_SIZE = 256
OVERLAP = 32 
REDUCE_RATE = 4
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' 

torch.cuda.empty_cache()
set_seeds();

In [5]:
#voir ce que fait cette fonction --> numba optimisé
@numba.njit()
def rle_numba(pixels):
    size = len(pixels)
    points = []
    if pixels[0] == 1: 
        points.append(0)
    flag = True
    for i in range(1, size):
        if pixels[i] != pixels[i-1]:
            if flag:
                points.append(i+1)
                flag = False
            else:
                points.append(i+1 - points[-1])
                flag = True
    if pixels[-1] == 1: 
        points.append(size-points[-1]+1)    
    return points

def rle_numba_encode(image):
    pixels = image.T.flatten()
    points = rle_numba(pixels)
    return ' '.join(str(x) for x in points)

def make_grid(shape, window=WINDOW, min_overlap=OVERLAP):
    """
        Return Array of size (N,4), where N - number of tiles,
        2nd axis represente slices: x1,x2,y1,y2 
    """
    x, y = shape
    
    nx = x // (window - min_overlap) + 1 # + 1 seulement si division reste 0?
    x1 = np.linspace(0, x, num=nx, endpoint=False, dtype=np.int64)
    x1[-1] = x - window
    x2 = (x1 + window).clip(0, x) #assert x1.size == x2.size ?
    
    ny = y // (window - min_overlap) + 1
    y1 = np.linspace(0, y, num=ny, endpoint=False, dtype=np.int64)
    y1[-1] = y - window
    y2 = (y1 + window).clip(0, y)
    
    slices = np.zeros((nx,ny, 4), dtype=np.int64)
    for i in range(nx):
        for j in range(ny):
            slices[i,j] = x1[i], x2[i], y1[j], y2[j]    
    return slices.reshape(nx*ny,4)

In [6]:
def to_tensor(x, **kwargs):
    return x.transpose(2, 0, 1).astype('float32') 

def get_preprocessing():
    _transform = [
        #A.Resize(380)
        A.Normalize(mean=(0.485, 0.456, 0.406), 
                    std=(0.229, 0.224, 0.225),
                    max_pixel_value=255.0, always_apply=True, p=1.0),
        A.Lambda(image=to_tensor, mask=to_tensor),
    ]
    return A.Compose(_transform)

In [7]:
def get_training_augmentation():
    train_transform = [
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomRotate90(p=0.5),
        A.Transpose(p=0.5),
        
        A.ShiftScaleRotate(scale_limit=0.2, rotate_limit=0, shift_limit=0.2, p=0.2, border_mode=0),

        A.IAAAdditiveGaussianNoise(p=0.2),
        A.IAAPerspective(p=0.5),

        A.OneOf(
            [
                A.CLAHE(p=1),
                A.RandomBrightness(p=1),
                A.RandomGamma(p=1),
            ],
            p=0.9,
        ),

        A.OneOf(
            [
                A.IAASharpen(p=1),
                A.Blur(blur_limit=3, p=1),
                A.MotionBlur(blur_limit=3, p=1),
            ],
            p=0.9,
        ),

        A.OneOf(
            [
                A.RandomContrast(p=1),
                A.HueSaturationValue(p=1),
            ],
            p=0.9,
        ),
        
        A.Compose([
            A.VerticalFlip(p=0.5),              
            A.RandomRotate90(p=0.5)]
        )
    ]
    return A.Compose(train_transform)


In [8]:
p_base = pathlib.Path(BASE_DIR)
p_model = pathlib.Path('../input/overlap-and-inference')
identity = rasterio.Affine(1, 0, 0, 0, 1, 0)
subm = {}
#trfm = A.Compose([
#    A.Resize(NEW_SIZE,NEW_SIZE)])
WINDOW=256
for i, filename in tqdm(enumerate(p_base.glob('test/*.tiff')), 
                        total = len(list(p_base.glob('test/*.tiff')))):
    print(filename) 
    temp_filename = '/tmp/resampled.tiff'
    test_image_ds = rasterio.open(filename.as_posix(), transform=identity)
    original_shape = test_image_ds.shape
    data = test_image_ds.read(
        out_shape=(
            test_image_ds.count,
            int(test_image_ds.height // REDUCE_RATE),
            int(test_image_ds.width // REDUCE_RATE)
        ),
        resampling=Resampling.bilinear
    )
    
    # scale image transform
    transform = test_image_ds.transform * test_image_ds.transform.scale(
        (test_image_ds.width / data.shape[-1]),
        (test_image_ds.height / data.shape[-2])
    )
    
    kwargs = test_image_ds.meta.copy()
    kwargs.update({
        'height': int(test_image_ds.height // REDUCE_RATE),
        'width': int(test_image_ds.width // REDUCE_RATE),
        'transform': transform})
    with rasterio.open(temp_filename, 'w', **kwargs) as dst:
        dst.write(data) 
       
    test_image_ds = rasterio.open(temp_filename, transform=identity)
    slices = make_grid(test_image_ds.shape, window=NEW_SIZE, min_overlap=OVERLAP)
    preds = np.zeros(test_image_ds.shape, dtype=np.uint8)
    weights = np.zeros(test_image_ds.shape, dtype=np.uint8)
    
    for j, model_file in tqdm(enumerate(p_model.glob('*.pth')), 
                        total = len(list(p_model.glob('*.pth')))):
        best_model = torch.load(model_file, map_location=torch.device(DEVICE))
        
        for (x1,x2,y1,y2) in tqdm(slices):
            #get slice image
            image = test_image_ds.read([1,2,3],
                        window=Window.from_slices((x1,x2),(y1,y2)))
            image = np.moveaxis(image, 0, -1) 
            #image = trfm(image=image)['image']
            image = get_preprocessing()(image=image)['image']
            image = torch.from_numpy(image).to(DEVICE).unsqueeze(0)
            with torch.set_grad_enabled(False): 
                pred = best_model(image) #.predict?
                pred = pred.squeeze().cpu().numpy().round().astype('uint8')
                #pred = cv2.resize(pred,(WINDOW, WINDOW), interpolation=cv2.INTER_AREA)
            #merge preds
            preds[x1:x2, y1:y2] += pred
            weights[x1:x2, y1:y2] += 1
        
    del slices,test_image_ds
    gc.collect()
    
    preds = preds/weights
    del weights
    gc.collect()
        
    preds = (preds>0.5).astype(np.uint8)
    preds = cv2.resize(preds, original_shape, interpolation=cv2.INTER_AREA)
    
    subm[i] = {'id':filename.stem, 'predicted': rle_numba_encode(preds)}
    del preds
    gc.collect()

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

../input/hubmap-kidney-segmentation/test/afa5e8098.tiff


  s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2058.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2058.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2058.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2058.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2058.0), HTML(value='')))



../input/hubmap-kidney-segmentation/test/b9a3865fc.tiff


  s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1610.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1610.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1610.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1610.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1610.0), HTML(value='')))



../input/hubmap-kidney-segmentation/test/c68fe75ea.tiff


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1680.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1680.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1680.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1680.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1680.0), HTML(value='')))



../input/hubmap-kidney-segmentation/test/b2dc8411c.tiff


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=595.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=595.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=595.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=595.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=595.0), HTML(value='')))



../input/hubmap-kidney-segmentation/test/26dc41664.tiff


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2064.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2064.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2064.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2064.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2064.0), HTML(value='')))






In [9]:
submission = pd.DataFrame.from_dict(subm, orient='index')
submission.to_csv('submission.csv', index=False)
submission.head()

Unnamed: 0,id,predicted
0,afa5e8098,66548865 5 66592645 5 66636425 5 66680186 29 6...
1,b9a3865fc,61366350 72 61406779 72 61447208 72 61487627 9...
2,c68fe75ea,21205354 74 21255134 74 21304891 112 21354671 ...
3,b2dc8411c,56465197 7 56465205 33 56465239 16 56465256 7 ...
4,26dc41664,245906728 53 245949088 53 245991448 53 2460338...
