paper reference: https://arxiv.org/abs/1802.07465

**EXPLORE THE IDEA ON A DUMMY IMAGE**

In [None]:
from utility.utils import *

In [None]:
from skimage.morphology import *
import cv2
import numpy as np
import collections
import itertools

import torch.nn as nn
%matplotlib inline

In [None]:
def list_directory(path):
    if path[-1] != '/':
        path = path + '/'
    return [path + p for p in os.listdir(path)]

In [None]:
# path for a single sample
path = '../data/ds_bowl_2018/dummy/0a7d30b252359a10fd298b638b90cb9ada3acced4e0c0e5a3692013f432ee4e9/'

In [None]:
# sample image
sample_image = list_files(path+'images')

# sample masks
sample_masks = list_files(path+'masks')

In [None]:
# read 2d - single channel image
def read2d(path): return cv2.imread(path, cv2.IMREAD_GRAYSCALE)
# read 3d - single channel image
def read3d(path): return cv2.imread(path, cv2.IMREAD_COLOR)

In [None]:
plt.imshow(read2d(sample_image[0]))

Note if we are going to treat each nuclei separately problematic cases should be dealt with, e.g. mask images with all 0 pixels, 2 nuclei masks in a single mask image, so on ...

In [None]:
# read each individual mask
mask_sum = 0
for mask in sample_masks:
    mask = cv2.imread(mask, cv2.IMREAD_GRAYSCALE)
    plt.imshow(mask)
    #plt.show()
    mask_sum += mask
plt.close()

## Part 1: Custom Mapping from binary to 3 class (back, fore, boundary)

Can we leverage the fact that we now mask of each nuclei independently ? 

In [None]:
def get_contour_pos(mask):
    """
    Given a binary 2d mask this function will find contour positions of a mask
    
    Inputs:
        mask (np.array): binary 2d numpy array 
    Returns:
        contour_pos (list): list which has (i, j) positions for contour pixels as elements
    """
    if mask.max() != 1: 
        mask = mask / mask.max()
    H, W = mask.shape
    contour_pos = []
    for i in range(H):
        for j in range(W):
            # not edge
            m = []
            # N
            if (i) > 0:
                m.append(mask[i - 1, j])
            # S
            if (i) < H - 1:
                m.append(mask[i + 1, j])
            # E 
            if (j) < W - 1:
                m.append(mask[i, j + 1])
            # W
            if (j) > 0:
                m.append(mask[i, j - 1])
            if (min(m) == 0) and (mask[i, j] == 1):
                contour_pos.append((i, j))
    return contour_pos

In [None]:
def get_contour_arr(mask):
    """
    This function will take a binary mask array and will return the contour of the mask
    Inputs:
        mask (np.array): binary 2d numpy array 
    Returns:
        contour (np.array): binary 2d numpy array
    """
    if mask.max() != 1:
        mask_copy = mask.copy() / mask.max()
    mask_contour = mask_copy.copy()
    for i, j in get_contour_pos(mask_contour):
        mask_contour[i, j] = 2

    return (mask_contour - mask_copy)

In [None]:
def show_with_sz(image, sz=10):
    plt.figure(figsize=(sz, sz))
    plt.imshow(image)
    plt.show()
    plt.close()

In [None]:
mask_images = [read2d(mpath) for mpath in sample_masks]

In [None]:
show_with_sz(mask_images[0])

In [None]:
contour_images = [get_contour_arr(mask) for mask in mask_images]

In [None]:
sum_contours = 0
for cntr in contour_images:
    sum_contours += cntr

In [None]:
show_with_sz(sum_contours)

In [None]:
# get expanded contour at every directions
# this will be helpful for overlaps
# all 8 directions of movement in 2d
all_directions = set(itertools.permutations([1,1,-1, -1, 0, 0], 2))

def expand_contour(contour, directions=all_directions):
    """
    This function will expand a contour in a given set of directions by turning pixels into 1s
    Inputs:
        contour (np.array): Binary 2d numpy array which is the binary contour array
        directions (list): A list of tuples which has the movement addiiton in ith and jth position in tuples, e.g
        for moving left and right [(0, 1), (0, -1)], default is all 8 directions
    Returns:
        expanded_contour (np.array): Binary 2d numpy array which is the expanded contour array
    """
    
    expanded_contour = contour.copy()
    for i, j in zip(np.where(contour == 1)[0], np.where(contour == 1)[1]):
        for i_plus, j_plus in all_directions:
            try:
                expanded_contour[i + i_plus, j + j_plus] = 1
            except: continue # this is for the edges for now, not best practice but practical for fast solution
    return expanded_contour

In [None]:
expanded_contour_images = [expand_contour(contour) for contour in contour_images]

In [None]:
sum_expanded_contours = 0
for expanded_contour in expanded_contour_images:
    sum_expanded_contours += expanded_contour
sum_expanded_contours[sum_expanded_contours > 1] = 2

In [None]:
show_with_sz(sum_expanded_contours)

In [None]:
sum_mask_images = 0
for mask_image in mask_images:
    sum_mask_images += mask_image / 255 # make it binary

In [None]:
show_with_sz(sum_mask_images)

**COMBINE INFORMATION FROM SUM EXPANDED CONTOURS AND SUM MASK IMAGES**

This is definitely a a better job than trying to follow papers methodology which is a bit complex and unclear to implement.

Now we have 3 classes as follows: background:0, foreground:1, boundary:2

In [None]:
sum_mask_images[sum_expanded_contours == 2] = 2

In [None]:
show_with_sz(sum_mask_images)

Here if we ignore background and 4 yellow boundaries, we perfectly match the number of nuclei provided in training set. Before using one mask image due to overlaps when using label function we were getting very bad results and this was effecting our score really really bad.

In [None]:
len(np.unique(label(sum_mask_images))) - 1 - 4
len(sample_masks)

#### Multiclass One Mask Pipeline

- Get sample mask paths
- Read sample mask images
- Get contour images 
- Get expanded contour images
- Calc sum expanded contour images
    - Clip pixels greater than 1 to 2
- Calc sum mask images
    - Filter sum mask images by sum expanded contour images ==2 and assign 2 

In [None]:
def list_files(path):
    """
    List files under a path
    """
    if path[-1] != '/':
        path = path + '/'
    return [path + p for p in os.listdir(path)]

def list_directory(path):
    """
    List directory under a path
    """
    if path[-1] != '/':
        path = path + '/'
    return [path + p + '/' for p in os.listdir(path)]


def multiclass_onemask(sample_masks):
    """
    This function will return a multiclass mask having the labels:
        0: background
        1: foreground
        2: overlap boundary - 2 nuclei sticked together
        
    Inputs:
        sample_masks (list): a list of path of individual masks belonging to the same image
    """
    # get mask images
    mask_images = [read2d(mpath) for mpath in sample_masks]
    # get contour images
    contour_images = [get_contour_arr(mask) for mask in mask_images]
    # create expanded contour images
    expanded_contour_images = [expand_contour(contour) for contour in contour_images]
    # sum and assign to expanded contour images
    sum_expanded_contours = 0
    for expanded_contour in expanded_contour_images:
        sum_expanded_contours += expanded_contour
    sum_expanded_contours[sum_expanded_contours > 1] = 2
    # sum mask images and draw overlap boundaries
    sum_mask_images = 0
    for mask_image in mask_images:
        sum_mask_images += mask_image / 255 # make it binary
    sum_mask_images[sum_expanded_contours == 2] = 2
    
    return sum_mask_images

In [None]:
valid_dirs = list_directory('../data/ds_bowl_2018/valid/')

In [None]:
i = 5 # i th sample
sample_image = list_files(valid_dirs[i] + 'images/') # sample image
sample_masks = list_files(valid_dirs[i] + 'masks/') # sample masks

In [None]:
show_with_sz(read3d(sample_image[0]))

In [None]:
mclass_mask = multiclass_onemask(sample_masks)

In [None]:
show_with_sz(mclass_mask)

In [None]:
# include only foreground and background for label evaluation
final_mask_eval = mclass_mask.copy()
final_mask_eval[final_mask_eval == 2] = 0

In [None]:
show_with_sz(final_mask_eval)

In [None]:
# exclude background and check if it matches
len(np.unique(label(final_mask_eval))) - 1, len(sample_masks)

### Create Multiclass One Mask for every image

We will create multiclass one mask iamges for each sample like we did in one mask before under full train directory. Because when we create new train and valid sets from full data we would like to copy everything including one_mask as well as mclass_one_mask.

In [None]:
MAIN_PATH = '../data/ds_bowl_2018/full_data/'

In [None]:
data_dirs = list_directory(MAIN_PATH)

In [None]:
data_dirs[1], len(data_dirs) 

In [None]:
# function for creating multiclass masks given a directory path
def create_mclass_masks(dir_):
    sample_image = list_files(dir_ + 'images/') # list of sample image, a single image for each sample
    sample_masks = list_files(dir_ + 'masks/') # list of sample masks paths
    mclass_mask = multiclass_onemask(sample_masks)
    plt.imsave(dir_ + 'mclass_one_mask.png', mclass_mask)

In [None]:
# run on multiple cpus for speed
from multiprocessing import Pool
p = Pool(4)
_ = p.map(create_mclass_masks, data_dirs)

In [None]:
mclass_mask = read2d(data_dirs[-1] + 'mclass_one_mask.png')

In [None]:
show_with_sz(mclass_mask)

In [None]:
# Pixel Intensity Mapping

#If there are 3 unique pixel intensities
# 30 : Background
# 110 : Nuclei
# 215 : Overlap Boundary

#If there are 2 unique pixel intensities
# 30 : Background
# 215 : Nuclei

np.unique(mclass_mask)

In [None]:
show_with_sz(mclass_mask == 215)

### Sanity check - masks

In [None]:
def show_side_to_side(im1, im2, figsize=(13, 13)):
    plt.figure(figsize=figsize)
    plt.subplot(1,2,1)
    plt.imshow(im1)
    plt.subplot(1,2,2)
    plt.imshow(im2)
    plt.show()
    plt.close()

In [None]:
for dir_ in data_dirs:
    image_path = list_files(dir_ + '/images')[0]
    mask_path = dir_ + 'mclass_one_mask.png'
    mask = read2d(mask_path)
    image = read3d(image_path)
    show_side_to_side(mask, image)

## Part 2: Defining Weights

#### 1- Distance Transform Based Weight Map (DWM)

## Pixel Intensity Mapping

If there are 3 unique pixel intensities

- 30 : Background
- 110 : Nuclei
- 215 : Overlap Boundary

If there are 2 unique pixel intensities

- 30 : Background
- 215 : Nuclei

#### weights

w DW M (p, β) = w 0 (p) (1 − min (φ g (p)/β, 1))

- Find w0 : 1 / |g| number of pixels for that class, g0 > g1 > g2

### 1 -  Using just class weights

In [None]:
# for non-background w = w0
# w = w0*(1 - min(phi / beta, 1))

In [None]:
MAIN_PATH = '../data/ds_bowl_2018/full_data/'

In [None]:
data_dirs = list_directory(MAIN_PATH)

In [None]:
data_dirs[:5]

In [None]:
tot_background = 0
tot_nuclei = 0
tot_overlap = 0

for i in range(len(data_dirs)):
    files = list_files(data_dirs[i])
    mclass_mask = read2d(files[2])
    if len(np.unique(mclass_mask)) == 2:
        tot_background += np.sum(mclass_mask == 30) # background
        tot_nuclei += np.sum(mclass_mask == 215) # nuclei
    else:
        tot_background += np.sum(mclass_mask == 30) # background
        tot_nuclei += np.sum(mclass_mask == 110) # nuclei
        tot_overlap += np.sum(mclass_mask == 215) # overlap

In [None]:
w_background, w_nuclei, w_overlap  =\
    1 / ((tot_background, tot_nuclei, tot_overlap) / (tot_background + tot_nuclei + tot_overlap))

In [None]:
w_background, w_nuclei, w_overlap = (1.1551767249306626, 7.544967099214484, 57.753645718466)

### 2 -  Using normalized regional weights

In [None]:
image_path = list_files(dir_ + '/images')[0]
mask_path = dir_ + 'mclass_one_mask.png'
mask = read2d(mask_path)
image = read3d(image_path)
show_side_to_side(mask, image)

In [None]:
mask = mask.astype(float)

In [None]:
if len(uniq_pixels) == 3:
    mask[mask == 30] = w_background
    mask[mask == 110] = w_nuclei
    mask[mask == 215] = w_overlap
else:
    mask[mask == 30] = w_background
    mask[mask == 215] = w_nuclei

In [None]:
import scipy
kernel = np.ones((3,3))
SAW = scipy.signal.convolve2d(mask, kernel, mode='full', boundary='fill', fillvalue=0)

In [None]:
mmin, mmax = SAW.min(), SAW.max()

In [None]:
SAW_norm = (SAW) / (mmax - mmin)

In [None]:
plt.hist(SAW_norm.flatten())

In [None]:
plt.figure(figsize=(13, 13))
plt.imshow(SAW_norm, cmap='inferno')