In [1]:
import numpy as np
import pandas as pd
import math
import cv2
import random
import seaborn as sns
import matplotlib.pyplot as plt
from PIL import Image
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import gc
gc.enable()
import warnings
warnings.simplefilter('ignore')
from tqdm.auto import tqdm

In [2]:
cfg = {
    'img_train': '../input/hubmap-organ-segmentation/train_images/',
    'train_df': '../input/hubmap-organ-segmentation/train.csv',
    'img_size': 2304,
    'patches': 4,
    'patch_size': 256,
    'mask_threshold': 0.5
}
len(os.listdir(cfg['img_train']))

351

In [3]:
df = pd.read_csv(cfg['train_df'])
df['id'] = [str(x) + '.tiff' for x in df.id]
df.head()

Unnamed: 0,id,organ,data_source,img_height,img_width,pixel_size,tissue_thickness,rle,age,sex
0,10044.tiff,prostate,HPA,3000,3000,0.4,4,1459676 77 1462675 82 1465674 87 1468673 92 14...,37.0,Male
1,10274.tiff,prostate,HPA,3000,3000,0.4,4,715707 2 718705 8 721703 11 724701 18 727692 3...,76.0,Male
2,10392.tiff,spleen,HPA,3000,3000,0.4,4,1228631 20 1231629 24 1234624 40 1237623 47 12...,82.0,Male
3,10488.tiff,lung,HPA,3000,3000,0.4,4,3446519 15 3449517 17 3452514 20 3455510 24 34...,78.0,Male
4,10610.tiff,spleen,HPA,3000,3000,0.4,4,478925 68 481909 87 484893 105 487863 154 4908...,21.0,Female


In [4]:
def generate_tile(img, slices=cfg['patches']):
    '''
    Generates tiles of images and its corresponding mask
    Returns: Two lists, image tiles and mask tiles of length slices**2 
    '''
    factor = int(img.shape[0] / slices)
    #print(f'Size of each frame/tile: {factor, factor}')
    img_tiles = []
    temp_img = None
    for x in range(slices):
        for y in range(slices):
            temp_img = img[factor*x : factor*(x+1), factor*y: factor*(y+1), ...]
            img_tiles.append(temp_img)
            del temp_img; gc.collect()
    return img_tiles

def rle2mask(mask_rle, shape:tuple):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (width,height) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T

In [5]:
!mkdir images
!mkdir masks

In [6]:
img_dir = '../input/hubmap-organ-segmentation/train_images/'
for _, row in tqdm(df.iterrows()):
    os.chdir('/kaggle/input/')
    path = img_dir + row['id']
    img_id = row['id'].split('.')[0]
    img = np.array(Image.open(path))
    
    if img.shape[0] > cfg['img_size']:
        img = cv2.resize(img, (cfg['img_size'], cfg['img_size']), interpolation=cv2.INTER_AREA)
        
    elif img.shape[0] < cfg['img_size']:
        img = cv2.resize(img, (cfg['img_size'], cfg['img_size']), interpolation=cv2.INTER_CUBIC)
    
    os.chdir('/kaggle/working/images/')
    img_tiles = generate_tile(img)
    for i in range(len(img_tiles)):
        img = img_tiles[i]
        img = cv2.resize(img, (cfg['patch_size'], cfg['patch_size']), interpolation=cv2.INTER_AREA)
        cv2.imwrite(f'{img_id}_{i+1}.jpg', img)
    del img_tiles    
gc.collect()

0it [00:00, ?it/s]

20

In [7]:
for _, row in tqdm(df.iterrows()):
    img_id = row['id'].split('.')[0]
    os.chdir('/kaggle/input/')
    mask = rle2mask(row['rle'], shape=(int(row['img_height']), int(row['img_width'])))
    if mask.shape[0] > cfg['img_size']:
        mask = cv2.resize(mask, (cfg['img_size'], cfg['img_size']), interpolation=cv2.INTER_AREA)
        
    elif mask.shape[0] < cfg['img_size']:
        mask = cv2.resize(mask, (cfg['img_size'], cfg['img_size']), interpolation=cv2.INTER_CUBIC)
        
    os.chdir('/kaggle/working/masks/')
    img_tiles = generate_tile(mask)
    for i in range(len(img_tiles)):
        mask = img_tiles[i]
        mask = cv2.resize(mask, (cfg['patch_size'], cfg['patch_size']), interpolation=cv2.INTER_AREA)
        cv2.imwrite(f'{img_id}_{i+1}.jpg', np.where(mask >= cfg['mask_threshold'], 1, 0))
    del img_tiles
gc.collect()

0it [00:00, ?it/s]

20