In [None]:
# %load_ext nb_black
%load_ext autoreload
%autoreload 2

## Initialization

### Imports

In [None]:
import os
import sys
import cv2
import json
import glob
import torch
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px

from tqdm.notebook import tqdm
from matplotlib import pyplot as plt

sys.path.append("../code/")

In [None]:
from params import *
from utils.rle import *
from inference.main import k_fold_inf

from data.dataset import InferenceDataset, load_image
from data.transforms import HE_preprocess

from utils.metrics import dice_scores_img

### Load

In [None]:
df_info = pd.read_csv(DATA_PATH + f"HuBMAP-20-dataset_information.csv")
df_mask = pd.read_csv(DATA_PATH + "train_4.csv")
df = pd.read_csv(OUT_PATH + "df_images.csv")

### Data

In [None]:
root = TIFF_PATH_4
rle_path = DATA_PATH + "train_4.csv"
reduce_factor = 1
rles = pd.read_csv(rle_path)

### Experiment

In [None]:
log_folder = "../logs/2021-03-18/0/"  # b5 new

In [None]:
class Config:
    def __init__(self, **entries):
        self.__dict__.update(entries)

config = json.load(open(log_folder + 'config.json', 'r'))
config = Config(**config)

In [None]:
global_threshold = 0.4

In [None]:
preds = glob.glob(log_folder + "pred_*.npy")

In [None]:
preds

### Image, truth & pred

In [None]:
THRESHOLD = 0.4

In [None]:
mask_name = "aaa6a05cc"

idx = [i for i, path in enumerate(preds) if mask_name in path][0]

In [None]:
pred = (np.load(preds[idx]) > THRESHOLD).astype(np.uint8)

In [None]:
img = load_image(os.path.join(TIFF_PATH_4, mask_name + ".tiff"), full_size=False)

In [None]:
rle = df_mask[df_mask['id'] == mask_name]['encoding']
mask = enc2mask(rle, (img.shape[1], img.shape[0]))

In [None]:
mask.shape, img.shape, pred.shape

## Plot

In [None]:
def plot_contours_preds(img, mask, preds, w=1, downsize=1):
    """
    Plots the contours of a given mask.

    Args:
        img (numpy array [H x W x C]): Image.
        mask (numpy array [H x W]): Mask.
        w (int, optional): Contour width. Defaults to 1.

    Returns:
        img (numpy array [H x W x C]): Image with contours.
    """
    img = img.copy()
    if img.max() > 1:
        img = (img / 255).astype(float)
    if mask.max() > 1:
        mask = (mask / 255).astype(float)
    mask = (mask * 255).astype(np.uint8)
    if mask.max() > 1:
        preds = (preds / 255).astype(float)
    preds = (preds * 255).astype(np.uint8)
    
    if downsize > 1:
        new_shape = (mask.shape[1] // downsize, mask.shape[0] // downsize)
        mask = cv2.resize(
            mask, new_shape, interpolation=cv2.INTER_NEAREST,
        )
        img = cv2.resize(
            img, new_shape, interpolation=cv2.INTER_LINEAR,
        )
        preds = cv2.resize(
            preds, new_shape, interpolation=cv2.INTER_NEAREST,
        )

    contours, _ = cv2.findContours(mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
    contours_preds, _ = cv2.findContours(preds, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
    
    img_pred = img.copy()
    cv2.polylines(img, contours, True, (1., 0., 0.), w)
    cv2.polylines(img_pred, contours_preds, True, (0., 1., 0.), w)

    img = (img + img_pred) / 2

    return px.imshow(img)

In [None]:
fig = plot_contours_preds(img, mask, pred, w=2, downsize=4)
fig.show()

### Post-processing

In [None]:
def post_process_mask(mask, min_size=1000, plot=True):
    num_component, components = cv2.connectedComponents(mask, connectivity=8)
    
    processed_mask = np.zeros(mask.shape, np.uint8)

    sizes = []
    removed = 0
    for c in tqdm(range(1, num_component)):
        p = (components == c)
        sizes.append(p.sum())
        if p.sum() > min_size:
            processed_mask[p] = 1
        else:
            removed += 1
    
    print(f'Removed {removed} components of size <= {min_size}')

    if plot:
        plt.figure(figsize=(15, 5))
        sns.histplot(sizes, bins=50)
        plt.axvline(min_size, color="salmon")
        plt.show()

    return processed_mask

In [None]:
MIN_SIZE = 500
THRESHOLD = 0.35

In [None]:
scores_before = []
scores_after = []

for idx, pred in enumerate(preds):
    mask_name = pred.split('/')[-1].split('_')[1][:-4]
    print(f'\n  -> Mask {mask_name}')
    
    rle = df_mask[df_mask['id'] == mask_name]['encoding']
    img = load_image(os.path.join(TIFF_PATH_4, mask_name + ".tiff"), full_size=False)
    mask = enc2mask(rle, (img.shape[1], img.shape[0]))
    
    pred = (np.load(pred) > THRESHOLD).astype(np.uint8)
    
    pred_pp = post_process_mask(pred, min_size=MIN_SIZE)
    
    scores_before.append(dice_scores_img(pred, mask))
    scores_after.append(dice_scores_img(pred_pp, mask))
    
    print(f'Score before PP : {scores_before[-1] :.4f}')
    print(f'Score after PP :  {scores_after[-1] :.4f}')

In [None]:
print(f'CV before PP : {np.mean(scores_before) :.4f}')
print(f'CV after PP :  {np.mean(scores_after) :.4f}')