In [1]:
import numpy as np
import pandas as pd
import json
import cv2
import tifffile
import matplotlib.pyplot as plt

from collections import OrderedDict


In [2]:
def rle2mask(rle, shape):
    '''
    Conver rle format to mask
    :param rle: rle format data
    :param shape: Shape of mask to create
    :return: Binary mask

    '''
    s = rle.split()
    starts, lengths = [
        np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])
    ]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo : hi] = 255
    return img.reshape(shape).T

In [3]:
dataset_folder = '../dataset/'
dataset_train = dataset_folder + 'train/'

In [4]:
data_info = pd.read_csv(f'{dataset_folder}HuBMAP-20-dataset_information.csv')

In [5]:
image_names = data_info['image_file'].values
shapes = data_info[['height_pixels', 'width_pixels']].values
image2shape = {
    image_id:(width, height) for image_id,  (height, width) in zip(image_names, shapes)
}
image2shape = OrderedDict(sorted(image2shape.items()))
del image_names, shapes

In [6]:
train_rle = pd.read_csv(f'{dataset_folder}train.csv')

In [8]:
train_lst = train_rle['id'].values
for item in image2shape:
    sample_item = item.replace('.tiff', '')
    if sample_item not in train_lst:
        continue
    rle = train_rle[train_rle['id'] ==  sample_item]['encoding'].values[0]
    mask = rle2mask(rle, image2shape[item])
    print(f'Saving {sample_item}_mask.tiff')
    cv2.imwrite(f'{dataset_folder}train/{sample_item}_mask.tiff', mask)

Saving 0486052bb_mask.tiff
Saving 095bf7a1f_mask.tiff
Saving 1e2425f28_mask.tiff
Saving 2f6ecfcdf_mask.tiff
Saving 54f2eec69_mask.tiff
Saving aaa6a05cc_mask.tiff
Saving cb2d976f4_mask.tiff
Saving e79de561c_mask.tiff
