In [None]:
# %load_ext nb_black
%load_ext autoreload
%autoreload 2

## Initialization

### Imports

In [None]:
import os
import gc
import sys
import cv2
import json
import glob
import torch
import tifffile
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px

from tqdm.notebook import tqdm
from collections import Counter
from matplotlib import pyplot as plt
from scipy.ndimage import gaussian_filter

sys.path.append("../code/")

In [None]:
from params import *
from utils.rle import *

from data.dataset import load_image

from utils.metrics import dice_scores_img
from utils.plots import plot_heatmap_preds, plot_contours_preds

In [None]:
# path = "../logs/2021-05-06/1/oofs/"
# for p in tqdm(os.listdir(path)):
#     if p.endswith('.npy'):
#         oof = np.load(path + p)
#         print(oof.shape)
#         oof = oof.mean(0)
#         np.save(path + p[:-4] + "_avg.npy", oof)

### Load

In [None]:
df_info = pd.read_csv(DATA_PATH + f"HuBMAP-20-dataset_information.csv")
df_mask = pd.read_csv(DATA_PATH + "train_2_onlyfc.csv")
df_mask_new = pd.read_csv(DATA_PATH + "train_2_fix.csv")

In [None]:
log_folder = "../logs/2021-05-06/1/"  # b1 512 2 fix debug

In [None]:
THRESHOLD = 0.5

## Double threshold + min_size

In [None]:
def post_process_mask(probs, threshold_max=0.7, threshold_comp=0.5, min_size=0):
    mask = (probs > threshold_comp).astype(np.uint8)
    num_component, components, boxes, _ = cv2.connectedComponentsWithStats(mask, connectivity=8)
    
    if num_component <= 1: 
        return probs
    
    processed_mask = np.zeros_like(probs)
    removed = 0
    for i, b in enumerate(boxes[1:]):
        box = (
            max(b[0] - 10, 0), 
            max(b[1] - 10, 0), 
            min(b[0] + b[2] + 10, probs.shape[1]),
            min(b[1] + b[3] + 10, probs.shape[0]),
        )
#         print(box)
        s = b[-1]
        
        component_prob = (components == (i + 1))[box[1]: box[3], box[0]: box[2]] * probs[box[1]: box[3], box[0]: box[2]]
        max_prob = component_prob.max()
        
        if s > min_size and max_prob > threshold_max:
            processed_mask[box[1]: box[3], box[0]: box[2]] += component_prob
        else: 
            removed += 1
    print(f'Removed {removed} components')
    return processed_mask

In [None]:
THRESHOLD_MAX = 0.7
THRESHOLD_COMP = 0.5
MIN_SIZE = 0
THRESHOLD = 0.5

DOWN = True
PLOT = False

In [None]:
scores = []
scores_pp = []

if DOWN:
    df_mask = pd.read_csv(DATA_PATH + "train_4_fix.csv")
else:
    df_mask = pd.read_csv(DATA_PATH + "train_2_fix.csv")
    
for idx, mask_name in enumerate(tqdm(df_mask['id'])):
    print(f'\n  -> Mask {mask_name}')
    
    rle = df_mask[df_mask['id'] == mask_name]['encoding']
    
    probs = np.load(log_folder + f'pred_{mask_name}.npy').astype(np.float32).mean(0)
    if DOWN:
        probs = cv2.resize(
            probs,
            (probs.shape[1] // 2, probs.shape[0] // 2),
            interpolation=cv2.INTER_AREA,
        )
    mask = enc2mask(rle, (probs.shape[1], probs.shape[0]))

    pred_pp = post_process_mask(
        probs,
        min_size=MIN_SIZE,
        threshold_max=THRESHOLD_MAX,
        threshold_comp=THRESHOLD_COMP,
    )
    
    scores.append(dice_scores_img(probs > THRESHOLD, mask))
    scores_pp.append(dice_scores_img(pred_pp > THRESHOLD, mask))
    
    print(f'Score without pp : {scores[-1] :.4f}')
    print(f'Score with pp    : {scores_pp[-1] :.4f}')
    
#     break

In [None]:
print(f'CV before PP : {np.mean(scores) :.4f}')
print(f'CV after PP :  {np.mean(scores_pp) :.4f}')

# Glomeruli vote blend

In [None]:
def glomeruli_vote(probs, img, mask=None, threshold_vote=0.5, threshold_max=0.7, threshold_comp=0.5, plot=True):
    # Get all glomerulis
    mask_union = ((probs > threshold_comp).sum(0) > 0).astype(np.uint8)
    mask_blend = probs.mean(0)
    
    num_components, components, boxes, _ = cv2.connectedComponentsWithStats(mask_union, connectivity=8)
    
    processed_mask = np.zeros(mask_union.shape, np.float32)

    for i, b in enumerate(boxes[1:]):
        box = (
            max(b[0] - 10, 0), 
            max(b[1] - 10, 0), 
            min(b[0] + b[2] + 10, probs.shape[2]),
            min(b[1] + b[3] + 10, probs.shape[1]),
        )
        
        comp = components[box[1]: box[3], box[0]: box[2]] == (i + 1)
        component_probs = probs[:, box[1]: box[3], box[0]: box[2]] * comp[None, :]
        
        votes = component_probs.max((1, 2)) > threshold_max
        vote = np.sum(votes) >= (threshold_vote * len(probs))
        if vote:
            probas = np.mean([component_probs[i] for i, v in enumerate(votes) if v], 0)
            processed_mask[box[1]: box[3], box[0]: box[2]] += probas
            
        if plot and not np.all(votes):
            prob_blend = mask_blend[box[1]: box[3], box[0]: box[2]] * comp
            prob_vote = processed_mask[box[1]: box[3], box[0]: box[2]] * comp
            
            if ((prob_blend > 0.5).max() == 0 and (prob_vote > 0.5).max() == 0):
                continue
                
            glom = img[box[1]: box[3], box[0]: box[2]]
            gt = mask[box[1]: box[3], box[0]: box[2]]
            
            plot_mask = int(mask is not None)
            
            print(votes, '->', vote)
            plt.figure(figsize=(15, 5 - plot_mask))
        
            plt.subplot(1, 3 + plot_mask, 1)
            plt.imshow(glom)
            plt.axis(False)
            
            plt.subplot(1, 3 + plot_mask, 2)
            plt.imshow(prob_blend > 0.5)
            plt.axis(False)
            plt.title('Blend')
            
            plt.subplot(1, 3 + plot_mask, 3)
            plt.imshow(prob_vote > 0.5)
            plt.axis(False)
            plt.title('Vote')
            
            if plot_mask:
                plt.subplot(1, 3 + plot_mask, 4)
                plt.imshow(gt)
                plt.axis(False)
                plt.title('Truth')
            plt.show()

    return processed_mask

In [None]:
THRESHOLD_VOTE = 0.7
THRESHOLD_COMP = 0.5
THRESHOLD_MAX = 0.7

THRESHOLD_MAX_PP = 0.8
MIN_SIZE = 0

THRESHOLD = 0.45

PLOT = False

In [None]:
log_folders = [
    '../logs/2021-05-09/2/',
    '../logs/2021-05-06/1/',
]

In [None]:
scores_blend = []
scores_vote = []
scores_vote_pp = []
df_mask = pd.read_csv(DATA_PATH + "train_2_fix.csv")

for idx, mask_name in enumerate(tqdm(df_mask['id'])):
    print(f'\n  -> Mask {mask_name}\n')
    
    rle = df_mask[df_mask['id'] == mask_name]['encoding']

    if PLOT:
        img = load_image(os.path.join(TIFF_PATH_2, mask_name + ".tiff"), full_size=False, reduce_factor=2)
    else:
        img = None
    
    try:
        probs = np.concatenate([
            np.load(log_folder + f'pred_{mask_name}.npy').astype(np.float32) for log_folder in log_folders
        ])
    except FileNotFoundError:
        continue
    
    mask = enc2mask(rle, (probs.shape[2], probs.shape[1]))
    
    if len(probs.shape) != 3:
        print('Wrong dim')
        continue
        
    pred_blend = (np.mean(probs, 0) > THRESHOLD).astype(np.uint8)
    
    pred_vote = glomeruli_vote(
        probs,
        img,
        mask=mask,
        threshold_vote=THRESHOLD_VOTE,
        threshold_max=THRESHOLD_MAX,
        threshold_comp=THRESHOLD_COMP,
        plot=PLOT,
    )
    
#     pred_vote_pp = post_process_mask(
#         pred_vote,
#         min_size=MIN_SIZE,
#         threshold_max=THRESHOLD_MAX_PP,
#         threshold_comp=THRESHOLD_COMP,
#     )
    
    
    scores_blend.append(dice_scores_img(pred_blend, mask))
#     scores_vote.append(dice_scores_img(pred_vote > THRESHOLD, mask))
#     scores_vote_pp.append(dice_scores_img(pred_vote_pp > THRESHOLD, mask))
    
    print(f'Score with blending    : {scores_blend[-1] :.4f}')
#     print(f'Score with voting      : {scores_vote[-1] :.4f}')
#     print(f'Score with voting + PP : {scores_vote_pp[-1] :.4f}')
    
#     break

In [None]:
print(f'CV with blending   : {np.mean(scores_blend) :.4f}')
print(f'CV with voting     :  {np.mean(scores_vote) :.4f}')
print(f'CV with voting + PP:  {np.mean(scores_vote_pp) :.4f}')

In [None]:
scores_blend = []
scores_vote = []
scores_vote_pp = []
df_mask = pd.read_csv(DATA_PATH + "train_2_fix.csv")

for idx, mask_name in enumerate(tqdm(df_mask['id'])):
    print(f'\n  -> Mask {mask_name}\n')
    
    rle = df_mask[df_mask['id'] == mask_name]['encoding']

    if PLOT:
        img = load_image(os.path.join(TIFF_PATH_2, mask_name + ".tiff"), full_size=False, reduce_factor=2)
    else:
        img = None
    
    try:
        probs = np.concatenate([
            np.load(log_folder + f'pred_{mask_name}.npy').astype(np.float32) for log_folder in log_folders
        ])
    except FileNotFoundError:
        continue
    
    mask = enc2mask(rle, (probs.shape[2], probs.shape[1]))
    
    if len(probs.shape) != 3:
        print('Wrong dim')
        continue
        
    pred_blend = (np.mean(probs, 0) > THRESHOLD).astype(np.uint8)
    
    pred_vote = glomeruli_vote(
        probs,
        img,
        mask=mask,
        threshold_vote=THRESHOLD_VOTE,
        threshold_max=THRESHOLD_MAX,
        threshold_comp=THRESHOLD_COMP,
        plot=PLOT,
    )
    
#     pred_vote_pp = post_process_mask(
#         pred_vote,
#         min_size=MIN_SIZE,
#         threshold_max=THRESHOLD_MAX_PP,
#         threshold_comp=THRESHOLD_COMP,
#     )
        
    
    scores_blend.append(dice_scores_img(pred_blend, mask))
#     scores_vote.append(dice_scores_img(pred_vote > THRESHOLD, mask))
#     scores_vote_pp.append(dice_scores_img(pred_vote_pp > THRESHOLD, mask))
    
    print(f'Score with blending    : {scores_blend[-1] :.4f}')
#     print(f'Score with voting      : {scores_vote[-1] :.4f}')
#     print(f'Score with voting + PP : {scores_vote_pp[-1] :.4f}')
    
#     break

In [None]:
print(f'CV with blending   : {np.mean(scores_blend) :.4f}')
print(f'CV with voting     :  {np.mean(scores_vote) :.4f}')
print(f'CV with voting + PP:  {np.mean(scores_vote_pp) :.4f}')

## Glomeruli CRF

In [None]:
# post proc function
from pydensecrf.utils import unary_from_softmax
import pydensecrf.densecrf as dcrf

def crf_on_preds(preds, img, binary=False):    
    assert preds.shape == img.shape[:2]
    n_labels = 2
    
    #Setting up the CRF model
    d = dcrf.DenseCRF2D(preds.shape[1], preds.shape[0], n_labels)
    
    # get unary potentials (neg log probability)
    U = unary_from_softmax(np.array([1 - preds, preds]))
    d.setUnaryEnergy(U)

    d.addPairwiseGaussian(
        sxy=(3, 3), compat=3, kernel=dcrf.DIAG_KERNEL, normalization=dcrf.NORMALIZE_SYMMETRIC
    )
    
#     d.addPairwiseBilateral(
#         sxy=(3, 3), 
#         srgb=(5, 5, 5), 
#         rgbim=img, 
#         compat=3, 
#         kernel=dcrf.DIAG_KERNEL, 
#         normalization=dcrf.NORMALIZE_SYMMETRIC
#     )


    #Run Inference
    crf_res = np.array(d.inference(10), dtype=np.float16)[1, :].reshape((preds.shape[0], preds.shape[1]))

    return crf_res

In [None]:
def post_process_crf(probs, img, mask_truth, threshold=0.5, threshold_comp=0.5, plot=False):
    mask = (probs > threshold_comp).astype(np.uint8)
    num_component, components, boxes, _ = cv2.connectedComponentsWithStats(mask, connectivity=8)
    if num_component <= 1: 
        return probs
    
    kernel = np.ones((3, 3), np.uint8)
    
    processed_mask = np.zeros_like(probs)
    removed = 0
    for b in tqdm(boxes[1:]):
        box = (
            max(b[0] - 10, 0), 
            max(b[1] - 10, 0), 
            min(b[0] + b[2] + 10, probs.shape[1]),
            min(b[1] + b[3] + 10, probs.shape[0]),
        )
        
        comp = mask[box[1]: box[3], box[0]: box[2]]
        comp = cv2.dilate(comp, kernel, iterations=5)
        
        component_prob =  probs[box[1]: box[3], box[0]: box[2]] * comp
        glom = img[box[1]: box[3], box[0]: box[2]].copy(order='C')
        
        component_prob_crf = crf_on_preds(component_prob, glom)
#         component_prob_crf = cv2.dilate((component_prob_crf > THRESHOLD).astype(np.uint8), kernel, iterations=1)

        processed_mask[box[1]: box[3], box[0]: box[2]] += component_prob_crf
        
        if plot:
            prob = probs[box[1]: box[3], box[0]: box[2]] * comp
            prob_crf = component_prob_crf

            gt = mask_truth[box[1]: box[3], box[0]: box[2]]
            
            plot_mask = int(mask is not None)
            plt.figure(figsize=(15, 5 - plot_mask))
        
            plt.subplot(1, 3 + plot_mask, 1)
            plt.imshow(glom)
            plt.axis(False)
            
            plt.subplot(1, 3 + plot_mask, 2)
            plt.imshow(prob > threshold)
            plt.axis(False)
            plt.title('No crf')
            
            plt.subplot(1, 3 + plot_mask, 3)
            plt.imshow(prob_crf > threshold)
            plt.axis(False)
            plt.title('crf')
            
            if plot_mask:
                plt.subplot(1, 3 + plot_mask, 4)
                plt.imshow(gt)
                plt.axis(False)
                plt.title('Truth')
            plt.show()
            
    return processed_mask

In [None]:
PLOT = False
THRESHOLD = 0.5
THRESHOLD_COMP = 0.5

log_folders = ["../logs/2021-05-06/1/"]

In [None]:
# scores = []
# scores_crf = []
# scores_vote_pp = []
# df_mask = pd.read_csv(DATA_PATH + "train_2_fix.csv")


# for idx, mask_name in enumerate(tqdm(df_mask['id'])):
#     print(f'\n  -> Mask {mask_name}\n')
    
#     rle = df_mask[df_mask['id'] == mask_name]['encoding']
#     img = load_image(os.path.join(TIFF_PATH_2, mask_name + ".tiff"), full_size=False, reduce_factor=2)
    
#     try:
#         probs = np.concatenate([
#             np.load(log_folder + f'pred_{mask_name}.npy').astype(np.float32) for log_folder in log_folders
#         ])
#     except FileNotFoundError:
#         continue
#     probs = probs.mean(0)
    
#     mask = enc2mask(rle, (probs.shape[1], probs.shape[0]))

#     pred_crf = post_process_crf(
#         probs,
#         img,
#         mask,
#         threshold=THRESHOLD,
#         threshold_comp=THRESHOLD_COMP,
#         plot=PLOT,
#     )
        
#     scores.append(dice_scores_img(probs > THRESHOLD, mask))
#     scores_crf.append(dice_scores_img(pred_crf > THRESHOLD, mask))
# #     scores_vote_pp.append(dice_scores_img(pred_vote_pp > THRESHOLD, mask))
    
#     print(f'Score without       : {scores[-1] :.4f}')
#     print(f'Score with crf      : {scores_crf[-1] :.4f}')
# #     print(f'Score with voting + PP : {scores_vote_pp[-1] :.4f}')
    
# #     break

In [None]:
# print(f'CV without crf : {np.mean(scores) :.4f}')
# print(f'CV with crf    :  {np.mean(scores_crf) :.4f}')

## Glomeruli convex hull

In [None]:
from skimage.morphology import convex_hull_image

In [None]:
def post_process_cvxhull(probs, img, mask_truth, threshold=0.5, threshold_max=0.5, plot=False):
    mask = (probs > threshold).astype(np.uint8)
    num_component, components, boxes, _ = cv2.connectedComponentsWithStats(mask, connectivity=8)
    if num_component <= 1: 
        return probs
    
    processed_mask = np.zeros_like(probs)
    removed = 0
    for i, b in enumerate(boxes[1:]):
        box = (
            max(b[0] - 10, 0), 
            max(b[1] - 10, 0), 
            min(b[0] + b[2] + 10, probs.shape[1]),
            min(b[1] + b[3] + 10, probs.shape[0]),
        )
        
        comp = (components == (i + 1))[box[1]: box[3], box[0]: box[2]]
        prob = probs[box[1]: box[3], box[0]: box[2]] * comp
        
        conv_hull = convex_hull_image(comp)

        if prob.max() > threshold_max:
            diff = (conv_hull.astype(int) - comp.astype(int)).sum()
            if diff < 1000:
                processed_mask[box[1]: box[3], box[0]: box[2]] += conv_hull
            else:
                processed_mask[box[1]: box[3], box[0]: box[2]] += comp
        else:
            processed_mask[box[1]: box[3], box[0]: box[2]] += comp
        
        if plot and 250 < (conv_hull.astype(int) - comp.astype(int)).sum() < 1000 and prob.max() > threshold_max:
            gt = mask_truth[box[1]: box[3], box[0]: box[2]]
            glom = img[box[1]: box[3], box[0]: box[2]]
            
            plot_mask = int(mask is not None)
            plt.figure(figsize=(15, 5 - plot_mask))
        
            plt.subplot(1, 3 + plot_mask, 1)
            plt.imshow(glom)
            plt.axis(False)
            
            plt.subplot(1, 3 + plot_mask, 2)
            plt.imshow(comp)
            plt.axis(False)
            plt.title('Initial')
            
            plt.subplot(1, 3 + plot_mask, 3)
            plt.imshow(conv_hull)
            plt.axis(False)
            plt.title('Convex hull')
            
            if plot_mask:
                plt.subplot(1, 3 + plot_mask, 4)
                plt.imshow(gt)
                plt.axis(False)
                plt.title('Truth')
            plt.show()
            
    return processed_mask

In [None]:
PLOT = False
THRESHOLD = 0.5
THRESHOLD_MAX = 0.5

log_folders = ["../logs/2021-05-06/1/"]

In [None]:
scores = []
scores_crf = []
scores_vote_pp = []
df_mask = pd.read_csv(DATA_PATH + "train_2_fix.csv")


for idx, mask_name in enumerate(tqdm(df_mask['id'])):
    print(f'\n  -> Mask {mask_name}\n')
    
    rle = df_mask[df_mask['id'] == mask_name]['encoding']
    img = load_image(os.path.join(TIFF_PATH_2, mask_name + ".tiff"), full_size=False, reduce_factor=2)
    
    try:
        probs = np.concatenate([
            np.load(log_folder + f'pred_{mask_name}.npy').astype(np.float32) for log_folder in log_folders
        ])
    except FileNotFoundError:
        continue
    probs = probs.mean(0)
    
    mask = enc2mask(rle, (probs.shape[1], probs.shape[0]))

    pred_crf = post_process_cvxhull(
        probs,
        img,
        mask,
        threshold=THRESHOLD,
        threshold_max=THRESHOLD_MAX,
        plot=PLOT,
    )
        
    scores.append(dice_scores_img(probs > THRESHOLD, mask))
    scores_crf.append(dice_scores_img(pred_crf > THRESHOLD, mask))
#     scores_vote_pp.append(dice_scores_img(pred_vote_pp > THRESHOLD, mask))
    
    print(f'Score without convhull : {scores[-1] :.4f}')
    print(f'Score with convhull : {scores_crf[-1] :.4f}')
#     print(f'Score with voting + PP : {scores_vote_pp[-1] :.4f}')
    
#     break

In [None]:
print(f'CV without conv hull: {np.mean(scores) :.4f}')
print(f'CV with conv hull   :  {np.mean(scores_crf) :.4f}')