In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import os
import glob
from tqdm import tqdm
import numpy as np
import math
import matplotlib.pyplot as plt
import cv2
from skimage.color import rgb2grey, rgb2hed
from skimage.exposure import rescale_intensity, equalize_adapthist

data_dir = 'dsb_2018_data'

In [None]:
def load_image_labels(folder):
    image = glob.glob(folder + '/images/*')[0]
    image = cv2.imread(image)[:, :, ::-1]
    masks = glob.glob(folder + '/masks/*')
    all_masks = []
    for i, mask in enumerate(masks):
        mask_img = np.sum(cv2.imread(mask), axis=-1)
        mask_img = cv2.erode(mask_img.astype(np.uint8), np.ones((3, 3), np.uint8), iterations=1)
        all_masks.append(((mask_img > 0).astype(np.int16) * (i + 1)))
    if len(masks) == 0:
        return image
    return image, np.sum(all_masks, axis=0, dtype=np.uint16)

In [None]:
def is_stained(img):
    red_mean, green_mean, blue_mean = img.mean(axis=(0, 1))
    if red_mean == green_mean == blue_mean:
        return False
    else:
        return True

In [None]:
def stain_deconvolve(img, mode='hematoxylin_eosin_sum'):
    h, w = img.shape[:2]
    img_hed = rgb2hed(img)
    img_he_sum = np.zeros((h, w, 2))
    img_he_sum[:, :, 0] = rescale_intensity(img_hed[:, :, 0], out_range=(0, 1))
    img_he_sum[:, :, 1] = rescale_intensity(img_hed[:, :, 1], out_range=(0, 1))
    img_deconv = rescale_intensity(img_he_sum.sum(axis=2), out_range=(0, 1))
    return img_deconv

In [None]:
dest_folder = 'dataset_test'

for folder in tqdm(glob.glob('{}/stage1_train/*'.format(data_dir))):
    img, masks = load_image_labels(folder)
    if is_stained(img):
        img = stain_deconvolve(img)
    else:
        img = equalize_adapthist(img[:, :, 0])
    if img.mean() > 0.5:
        img = 1 - img
    img = (img * 255).astype(np.uint8)

    image_id = os.path.basename(folder)
    os.makedirs(os.path.join(dest_folder, image_id), exist_ok=True)
    cv2.imwrite(os.path.join(dest_folder, image_id, 'image.png'), img)
    cv2.imwrite(os.path.join(dest_folder, image_id, 'masks.png'), masks)