In [1]:
%%time
!pip install ../input/smp-021/SMP_0.2.1/efficientnet_pytorch-0.6.3-py3-none-any.whl
!pip install ../input/smp-021/SMP_0.2.1/pretrainedmodels-0.7.4-py3-none-any.whl
!pip install ../input/smp-021/SMP_0.2.1/timm-0.4.12-py3-none-any.whl
!pip install ../input/smp-021/SMP_0.2.1/segmentation_models_pytorch-0.2.1-py3-none-any.whl
!pip install ../input/transformers4162/transformers-4.16.2-py3-none-any.whl
!pip install ../input/torch-171/torch-1.7.1cu101-cp37-cp37m-linux_x86_64.whl
!pip install ../input/torchvision/torchvision-0.8.2cu101-cp37-cp37m-linux_x86_64.whl
!pip install ../input/einop-041/einops-0.4.1-py3-none-any.whl

import sys
sys.path.append('../input/monai-v081/')
import monai
from monai.inferers import sliding_window_inference
import transformers

import torch
import torch.nn as nn
import torchvision.transforms.functional as F
from tqdm import tqdm
import segmentation_models_pytorch as smp

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from torch.utils.data import DataLoader, Dataset
import random
import numpy as np
import cv2
from matplotlib import pyplot as plt
import os
import json
import pandas as pd
import gc
import glob
from PIL import Image
from tifffile import imread
from skimage.measure import label, regionprops
from skimage import measure
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

def seed_everything(seed=123):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything()

Processing /kaggle/input/smp-021/SMP_0.2.1/efficientnet_pytorch-0.6.3-py3-none-any.whl
Installing collected packages: efficientnet-pytorch
Successfully installed efficientnet-pytorch-0.6.3
[0mProcessing /kaggle/input/smp-021/SMP_0.2.1/pretrainedmodels-0.7.4-py3-none-any.whl
Installing collected packages: pretrainedmodels
Successfully installed pretrainedmodels-0.7.4
[0mProcessing /kaggle/input/smp-021/SMP_0.2.1/timm-0.4.12-py3-none-any.whl
Installing collected packages: timm
Successfully installed timm-0.4.12
[0mProcessing /kaggle/input/smp-021/SMP_0.2.1/segmentation_models_pytorch-0.2.1-py3-none-any.whl
Installing collected packages: segmentation-models-pytorch
Successfully installed segmentation-models-pytorch-0.2.1
[0mProcessing /kaggle/input/transformers4162/transformers-4.16.2-py3-none-any.whl
Installing collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.18.0
    Uninstalling transformers-4.18.0

In [2]:
def read_rgb_img(img_path):
    img= imread(img_path)
    orign_shape= img.shape[:2][::-1]
    return img, orign_shape

def get_test_transform():
    return A.Compose([
        ToTensorV2(p=1.0),
    ])
def get_pad_transform(img_size):
    return A.Compose([
        A.PadIfNeeded(min_height=img_size, min_width=img_size, border_mode=0, p=1),
    ])
def get_crop_transform(img_size):
    return A.Compose([
        A.CenterCrop(img_size, img_size, p=1),
    ])

class Customize_Dataset(Dataset):
    def __init__(self, df, scale, img_size, transforms):
        self.df = df
        self.scale = scale
        self.img_size = img_size
        self.image_path = df['image_path'].values
        self.organ_type = df['organ'].values
        self.pixel_size = df['pixel_size'].values
        self.data_source= df['data_source'].values
        self.transforms = transforms
    
    def __getitem__(self, index):
        img_path = self.image_path[index]
        organ= self.organ_type[index]
        p_size= self.pixel_size[index]
        d_source= self.data_source[index]
        img, ori_shape= read_rgb_img(img_path)
        
        ## scale adjust
        scale= float(p_size)/0.4/self.scale
        img_size= min(img.shape[0], img.shape[1])
        img_size= int(img_size * scale)
        img= cv2.resize(img, (img_size, img_size))
        
        ## need padding
        if img.shape[0]<self.img_size:
            pad= img.shape[0]
            aug= get_pad_transform(self.img_size)
            img= aug(image= img)['image']
        else:
            pad= 0
        
        img = self.transforms(image=img)["image"]
        return {
            'img_path': img_path,
            'image': torch.tensor(img/255, dtype=torch.float32),
            'ori_shape': torch.tensor(ori_shape),
            'pad': pad,
            'organ': organ,
            'data_source': d_source,
        }
    
    def __len__(self):
        return len(self.df)

In [3]:
sys.path.append('../input/coat-net')
from coat import *
from daformer import *
from helper import *
from coatnet_inference import *

class customize_model(nn.Module):
    def __init__(self, model_name):
        super(customize_model, self).__init__()
        
    def forward(self, images):
        out= self.model(images)['logits']
        return out

# CFG

In [4]:
CFG= {
    ## model for 4 types organ
    'preprocessing': [
        's4_w640',
        's4_w768',
        's3_w1024',
        
        's3_w1024',
    ],
    'model': [
        '../input/beit-s4-w640',
        '../input/segformer-b5-s4-w768',
        '../input/segformer-b5-s3-w1024',
        
        '../input/coat-m-s3-w1024',
    ],
    'model_weight': [
        0.283,
        0.283,
        0.283,
        
        0.15,
    ],
    
    ## model for lung type
    'lung_preprocessing': [
        's4_w768',
        's4_w768',
    ],
    'lung_model': [
        '../input/effb7-w768-best',
        '../input/effb7-w768-ds-best',
    ],
    'lung_model_weight': [
        0.5,
        0.5,
    ],
    
    ## postprocessing arg
    'TTA': True,
    'mask_thr': {
        'lung':           0.1,
        'spleen':         0.2,
        'prostate':       0.3,
        'kidney':         0.3,
        'largeintestine': 0.2,
    },
    
    ## dataset filter for inference
    'data_source': [
        'HPA',
        'Hubmap',
    ],
    'organ': [
        'lung',
        'prostate',
        'kidney',
        'largeintestine', 
        'spleen',
    ]
}

# Prepare Dataset

In [5]:
test_df= pd.read_csv('../input/hubmap-organ-segmentation/test.csv')
df= pd.read_csv('../input/hubmap-organ-segmentation/sample_submission.csv')
for i in range(len(df)):
    id_= df.loc[i, ['id']].values[0]
    path= f'../input/hubmap-organ-segmentation/test_images/{id_}.tiff'
    df.loc[i, ['image_path']]= path
    organ= test_df[test_df['id']==int(id_)]['organ'].values[0]
    df.loc[i, ['organ']]= organ
    pixel_size= test_df[test_df['id']==int(id_)]['pixel_size'].values[0]
    df.loc[i, ['pixel_size']]= pixel_size
    data_source= test_df[test_df['id']==int(id_)]['data_source'].values[0]
    df.loc[i, ['data_source']]= data_source
    
test_dataset= Customize_Dataset(df, scale= 4, img_size= 768, transforms= get_test_transform())
test_loader_s4_w768 = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0)

test_dataset= Customize_Dataset(df, scale= 3, img_size= 1024, transforms= get_test_transform())
test_loader_s3_w1024 = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0)

test_dataset= Customize_Dataset(df, scale= 4, img_size= 640, transforms= get_test_transform())
test_loader_s4_w640 = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0)
df.head(3)

Unnamed: 0,id,rle,image_path,organ,pixel_size,data_source
0,10078,12 34,../input/hubmap-organ-segmentation/test_images...,spleen,0.4945,Hubmap


# Inference

In [6]:
def fill_hole(m):
    filled = m.copy()
    pad = np.pad(m, 4)
    lb = measure.label(pad < 0.5, background=0, connectivity=1)
    u, cc = np.unique(lb, return_counts=True)
    if len(u) > 2:
        #print(u, cc)
        lb = lb[4:-4, 4:-4]
        for uu in u[2:]:
            filled[lb == uu] = 1
    return filled

def rle_encode(img):
    #the image should be transposed
    pixels = img.T.flatten()
    # This simplified method requires first and last pixel to be zero
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def inference(model, img, size):
    img= img.cuda()
    for i, m in enumerate(model):
        with torch.no_grad():
            m.eval()
            if CFG['TTA']:
                imgs= torch.cat([img, 
                                 img.rot90(1, [2,3]),
                                 img.rot90(2, [2,3]),
                                 img.rot90(3, [2,3]),
                                 img.flip(-1), 
                                 img.flip(-1).rot90(1, [2,3]),
                                 img.flip(-1).rot90(2, [2,3]),
                                 img.flip(-1).rot90(3, [2,3])], dim=0)
                pred= sliding_window_inference(imgs, 
                                               (size, size), 
                                               sw_batch_size= 1, 
                                               predictor= m,
                                               mode= 'gaussian',
                                               overlap= 0.25)
                pred= (pred[0] + 
                       pred[1].rot90(-1, [1,2]) +
                       pred[2].rot90(-2, [1,2]) +
                       pred[3].rot90(-3, [1,2]) +
                       pred[4].flip(-1) + 
                       pred[5].rot90(-1, [1,2]).flip(-1) + 
                       pred[6].rot90(-2, [1,2]).flip(-1) + 
                       pred[7].rot90(-3, [1,2]).flip(-1)) / 8
            else:
                pred= sliding_window_inference(img, 
                                               (size, size), 
                                               sw_batch_size= 1, 
                                               predictor= m,
                                               mode= 'gaussian',
                                               overlap= 0.25)[0]
                
        if pred.shape[0]!=1:
            if i==0: preds= pred.softmax(dim=0)
            else: preds+= pred.softmax(dim=0)
        else:
            if i==0: preds= pred.sigmoid()
            else: preds+= pred.sigmoid()
                
    pred= preds/len(model)
    pred= pred.cpu().permute(1,2,0).numpy()
    return pred

# Predict 4 types organ

In [7]:
organs= [
    'lung',
    'spleen',
    'prostate',
    'kidney',
    'largeintestine',
]

## load model for 4 type organ
cv_model= []
for i in range(len(CFG['model'])):
    models= []
    for m in glob.glob(CFG['model'][i]+'/**.pth'):
        models.append( torch.load(m, map_location='cuda:0') )
    cv_model.append(models)
CFG['model']= cv_model
print(f"length of model: {len(CFG['model'])}")

indx= 0
for i, (s4_w640_data, s4_w768_data, s3_w1024_data) in enumerate(zip(test_loader_s4_w640,
                                                                    test_loader_s4_w768, 
                                                                    test_loader_s3_w1024)):
    dataset={
        's4_w640': s4_w640_data,
        's4_w768': s4_w768_data,
        's3_w1024': s3_w1024_data,
    }
    
    for j in range(len(s4_w768_data['image'])):
        
        ## get img
        organ= s4_w768_data['organ'][j]
        if organ=='lung':
            indx+= 1
            continue
        d_source= s4_w768_data['data_source'][j]
        
        if organ not in CFG['organ']: 
            indx+= 1
            continue
        if d_source not in CFG['data_source']: 
            indx+= 1
            continue
        
        ## select model type
        models= CFG['model'] if organ!='lung' else CFG['lung_model']
        model_weight= CFG['model_weight'] if organ!='lung' else CFG['lung_model_weight']
        
        ## select preprocessing
        preprocessing= CFG['preprocessing'] if organ!='lung' else CFG['lung_preprocessing']
        
        ## inference
        for k, m in enumerate(models):
                                                  
            ## get data
            data= dataset[ preprocessing[k] ]
            window_size= int(preprocessing[k].split('_')[-1][1:])
            img= data['image'][j]
            img= torch.unsqueeze(img, dim= 0)
            ori_shape= data['ori_shape'][j].numpy()
            pad= data['pad'][j]
            
            mask= inference(m, img, window_size)
            if mask.shape[2]!=1:
                mask= mask[..., organs.index(organ)+1]
            
            ## if pad, recover
            if pad!=0:
                aug= get_crop_transform(pad)
                mask= aug(image= mask)['image']
                mask= cv2.resize(mask, tuple(ori_shape))
            else:
                mask= cv2.resize(mask, tuple(ori_shape))
            
            if k==0: masks= mask * model_weight[k]
            else: masks+= mask * model_weight[k]
                
        mask= masks
        if d_source=='HPA' and organ!='lung':
            mask[ mask>=0.5 ]= 1
            mask[ mask<0.5 ]= 0
        else:
            mask[ mask>=CFG['mask_thr'][organ] ]= 1
            mask[ mask<CFG['mask_thr'][organ] ]= 0
        mask= mask.astype(np.uint8)
        
        ## fill mask hole
        if organ in ['spleen']:
            mask= fill_hole(mask)
        
#         plt.imshow(img[0].permute(1,2,0).numpy())
#         plt.show()
#         plt.imshow(mask)
#         plt.show()

        rle= rle_encode(mask)
        df.loc[indx, 'rle']= rle
        indx+= 1

length of model: 4




# Predict lung type

In [8]:
del CFG['model']

organs= [
    'lung',
    'spleen',
    'prostate',
    'kidney',
    'largeintestine',
]

## load model for lung type
cv_model= []
for i in range(len(CFG['lung_model'])):
    models= []
    for m in glob.glob(CFG['lung_model'][i]+'/**.pth'):
        models.append( torch.load(m, map_location='cuda:0') )
    cv_model.append(models)
CFG['lung_model']= cv_model
print(f"length of lung_model: {len(CFG['lung_model'])}")

indx= 0
for i, (s4_w640_data, s4_w768_data, s3_w1024_data) in enumerate(zip(test_loader_s4_w640,
                                                                    test_loader_s4_w768, 
                                                                    test_loader_s3_w1024)):
    dataset={
        's4_w640': s4_w640_data,
        's4_w768': s4_w768_data,
        's3_w1024': s3_w1024_data,
    }
    
    for j in range(len(s4_w768_data['image'])):
        
        ## get img
        organ= s4_w768_data['organ'][j]
        if organ!='lung':
            indx+= 1
            continue
        d_source= s4_w768_data['data_source'][j]
        
        if organ not in CFG['organ']: 
            indx+= 1
            continue
        if d_source not in CFG['data_source']: 
            indx+= 1
            continue
        
        ## select model type
        models= CFG['model'] if organ!='lung' else CFG['lung_model']
        model_weight= CFG['model_weight'] if organ!='lung' else CFG['lung_model_weight']
        
        ## select preprocessing
        preprocessing= CFG['preprocessing'] if organ!='lung' else CFG['lung_preprocessing']
        
        ## inference
        for k, m in enumerate(models):
                                                  
            ## get data
            data= dataset[ preprocessing[k] ]
            window_size= int(preprocessing[k].split('_')[-1][1:])
            img= data['image'][j]
            img= torch.unsqueeze(img, dim= 0)
            ori_shape= data['ori_shape'][j].numpy()
            pad= data['pad'][j]
            
            mask= inference(m, img, window_size)
            if mask.shape[2]!=1:
                mask= mask[..., organs.index(organ)+1]
            
            ## if pad, recover
            if pad!=0:
                aug= get_crop_transform(pad)
                mask= aug(image= mask)['image']
                mask= cv2.resize(mask, tuple(ori_shape))
            else:
                mask= cv2.resize(mask, tuple(ori_shape))
            
            if k==0: masks= mask * model_weight[k]
            else: masks+= mask * model_weight[k]
                
        mask= masks
        if d_source=='HPA' and organ!='lung':
            mask[ mask>=0.5 ]= 1
            mask[ mask<0.5 ]= 0
        else:
            mask[ mask>=CFG['mask_thr'][organ] ]= 1
            mask[ mask<CFG['mask_thr'][organ] ]= 0
        mask= mask.astype(np.uint8)
        
        ## fill mask hole
        if organ in ['lung']:
            mask= fill_hole(mask)

        rle= rle_encode(mask)
        df.loc[indx, 'rle']= rle
        indx+= 1

length of lung_model: 2




# Submission

In [9]:
df= df.drop(['image_path'], axis=1)
df= df.drop(['organ'], axis=1)
df= df.drop(['pixel_size'], axis=1)
df= df.drop(['data_source'], axis=1)
df.to_csv('submission.csv', index=False)
df

Unnamed: 0,id,rle
0,10078,4476 71 6480 99 8496 115 10516 120 12537 123 1...
