**About** : This notebook is used to perform inference on validation data

In [None]:
# %load_ext nb_black
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src/

## Initialization

### Imports

In [None]:
import gc
import os
import ast
import sys
import cv2
import glob
import json
import torch
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm
from collections import Counter
warnings.simplefilter("ignore", UserWarning)

In [None]:
from params import *

from utils.plots import *
from utils.metrics import *
from utils.logger import Config, save_to_folder
from utils.rle import rle_encode, rle_decode

from inference.tweaking import *
from inference.validation import *
from inference.post_process import *

from data.preparation import prepare_data
from data.dataset import SartoriusDataset
from data.transforms import define_pipelines
from inference.validation import inference_val

## Single xp

### Prepare

In [None]:
df = prepare_data(fix=False, remove_anomalies=True)

pipelines = define_pipelines("configs/config_aug.py")
dataset = SartoriusDataset(df, transforms=pipelines['val_viz'], precompute_masks=False)

In [None]:
EXP_FOLDER = OUT_PATH + "ens_8/"

### Tweak

In [None]:
thresholds_mask = [0.45]
thresholds_nms = [0.05, 0.1, 0.15]
thresholds_conf = [np.round(0.05 * i, 2) for i in range(4, 17)]

min_sizes = [0, 25, 50, 75, 100, 125, 150]
# min_sizes = [0]

In [None]:
thresholds_mask = [0.45]
thresholds_nms = [0.15]
thresholds_conf = [0.65]

min_sizes = [75]
# min_sizes = [0]

In [None]:
results = []
for i in tqdm(range(len(df))):
    id_ = df['id'][i]

    preds_mask = np.load(EXP_FOLDER + f"masks_{id_}.npy")
    preds_boxes = np.load(EXP_FOLDER + f"boxes_{id_}.npy")
    
    results.append((preds_boxes, preds_mask))
    
# if EXP_FOLDER == OUT_PATH + "ens_8/":
#     results = [(r[1], r[0]) for r in results]

In [None]:
scores_tweak, all_cell_types = tweak_thresholds(
    results,
    dataset,
    thresholds_mask,
    thresholds_nms,
    thresholds_conf,
    min_sizes=min_sizes,
    remove_overlap=True,
    corrupt=True,
)

In [None]:
best_thresholds_mask, best_thresholds_nms, best_thresholds_conf, best_min_sizes = [], [], [], []
best_scores = []

for c in range(len(CELL_TYPES)):
    print(f' -> Cell type {CELL_TYPES[c]} : ')

    scores_class = scores_tweak[c].mean(-2) 
    idx = np.unravel_index(np.argmax(scores_class, axis=None), scores_class.shape)
    best_score = scores_class[idx]
    best_scores.append(best_score)

    best_thresholds_c = (
        thresholds_mask[idx[0]], thresholds_nms[idx[1]], thresholds_conf[idx[3]], min_sizes[idx[2]]
    )
    best_thresholds_mask.append(best_thresholds_c[0])
    best_thresholds_nms.append(best_thresholds_c[1])
    best_thresholds_conf.append(best_thresholds_c[2])
    best_min_sizes.append(best_thresholds_c[3])

    print(
        f"Best score {best_score:.4f} for thresholds (mask, nms, conf, min_size): {best_thresholds_c}\n"
    )

# weights = [Counter(df_oof['cell_type'])[c] for c in CELL_TYPES]
weights = [Counter(all_cell_types)[c] for c in range(len(CELL_TYPES))]

best_score = np.average(best_scores, weights=weights)

print(f'CV score : {best_score:.4f}')

### Eval

In [None]:
metadata = []
all_scores = [[], [], []]

masks_pred, boxes_pred, cell_types = process_results(
    results,
    best_thresholds_mask,
    best_thresholds_nms,
    best_thresholds_conf,
    best_min_sizes,
    remove_overlap=True,
    corrupt=True
)

scores, scores_per_class = evaluate(
    masks_pred,
    dataset,
    cell_types
)

for masks, boxes, cell_type_pred, img_id, score, cell_type in zip(
    masks_pred, boxes_pred, cell_types, dataset.df['id'].values, scores, dataset.df['cell_type'].values
):
    metadata.append({
        'id': img_id,
        'cell_type': cell_type,
        'cell_type_pred': cell_type_pred,
        'rles': [rle_encode(mask) for mask in masks],
        'boxes': boxes.tolist(),
        'score': score
    })

for i, s in enumerate(scores_per_class):
    all_scores[i] += s
    
df_preds_oof = pd.DataFrame.from_dict(metadata)


In [None]:
print(f' -> IoU mAP : {df_preds_oof.score.mean():.4f}\n')  # [1000, 8000, 1000]
df_preds_oof[['cell_type', 'score']].groupby('cell_type').mean()

## Several Exps (not used)

In [None]:
FOLDERS = [
#     OUT_PATH + "ens_7/",
    OUT_PATH + "ens_8/",
#     OUT_PATH + "ens_12/",
]

In [None]:
THRESHOLDS_CONF = [
#     [0.35, 0.45, 0.65],
    [0.3, 0.4, 0.65],
#     [0.35, 0.4, 0.7]
]
THRESHOLDS_NMS = [
#     [0.1, 0.1, 0.05],
    [0.1, 0.1, 0.15],
#     [0.1, 0.05, 0.05],
]
THRESHOLDS_MASK = [0.45, 0.45, 0.45]

In [None]:
THRESHOLDS_CONF_ENS = [0.3, 0.4, 0.65]
THRESHOLDS_NMS_ENS = [0.1, 0.1, 0.05]

In [None]:
df = prepare_data(fix=False, remove_anomalies=True)

pipelines = define_pipelines("configs/config_aug.py")

dataset = SartoriusDataset(df, transforms=pipelines['val_viz'], precompute_masks=False)

### PP

In [None]:
def process_masks(boxes, masks, thresholds_mask, thresholds_nms, thresholds_conf, min_sizes, remove_overlap=True, corrupt=False):
    # Cell type
    cell = np.argmax(np.bincount(boxes[:, 5].astype(int)))

    # Thresholds
    thresh_mask = (
        thresholds_mask if isinstance(thresholds_mask, (float, int))
        else thresholds_mask[cell]
    )
    thresh_nms = (
        thresholds_nms if isinstance(thresholds_nms, (float, int))
        else thresholds_nms[cell]
    )
    thresh_conf = (
        thresholds_conf if isinstance(thresholds_conf, (float, int))
        else thresholds_conf[cell]
    )
    min_size = (
        min_size if isinstance(min_size, (float, int))
        else min_sizes[cell]
    )

    # Binarize
    if thresh_mask is not None:
        masks = masks > (thresh_mask * 255)

    # Sort by decreasing conf
    order = np.argsort(boxes[:, 4])[::-1]
    masks = masks[order]
    boxes = boxes[order]

    # Remove low confidence
    last = (
        np.argmax(boxes[:, 4] < thresh_conf) if np.min(boxes[:, 4]) < thresh_conf
        else len(boxes)
    )
    masks = masks[:last]
    boxes = boxes[:last]

    # NMS
    if thresh_nms > 0:
        masks, boxes, _ = mask_nms(masks, boxes, thresh_nms)
        
    # Remove small masks
    masks, boxes = remove_small_masks(masks, boxes, min_size=min_size)
    
    # Corrupt
    if corrupt and cell == 1:  # astro
        masks = np.array([corrupt_mask(mask)[0] for mask in masks])

    # Remove overlap
    if remove_overlap:
        masks = remove_overlap_naive(masks)

    return masks, boxes, cell

In [None]:
def merge_preds_union(boxes, masks):
    boxes = np.concatenate(boxes)
    masks = np.concatenate(masks)

    order = np.argsort(boxes[:, 4])[::-1]
    masks = masks[order]
    boxes = boxes[order]

    return boxes, masks

In [None]:
def merge_preds_vote(boxes, masks):
    assert len(boxes) == 3
    
    rles = [[pycocotools.mask.encode(np.asarray(p, order='F')) for p in masks_] for masks_ in masks]
    
    ious_01 = pycocotools.mask.iou(rles[0], rles[1], [0] * 10000)
    ious_12 = pycocotools.mask.iou(rles[1], rles[2], [0] * 10000)
    ious_02 = pycocotools.mask.iou(rles[0], rles[2], [0] * 10000)

    matched_01 = ious_01.max(1) > 0.75
    matched_10 = ious_01.max(0) > 0.75

    matched_02 = ious_02.max(1) > 0.75
    matched_20 = ious_02.max(0) > 0.75
    
    matched_12 = ious_12.max(1) > 0.75
    matched_21 = ious_12.max(0) > 0.75
    
    to_keep_0 = matched_01 + matched_02
    to_keep_1 = matched_10 + matched_12
    to_keep_2 = matched_20 + matched_21
    
    new_boxes = [
        boxes[0][to_keep_0],
        boxes[1][to_keep_1],
        boxes[2][to_keep_2],
    ]
    
    new_masks = [
        masks[0][to_keep_0],
        masks[1][to_keep_1],
        masks[2][to_keep_2],
    ]
    
    new_boxes = np.concatenate(new_boxes)
    new_masks = np.concatenate(new_masks)

    return new_boxes, new_masks

In [None]:
for i in range(len(df)):
    i = 2
    id_ = df['id'][i]

    preds_mask = [np.load(f + f"masks_{id_}.npy") for f in FOLDERS]
    preds_boxes = [np.load(f + f"boxes_{id_}.npy") for f in FOLDERS]
    
    data = dataset[i]
    img = data['img']
    truth = data['gt_masks'].masks.copy().astype(int)
#     boxes_truth = data['gt_bboxes']
    rle_truth = dataset.encodings[i].tolist()

    bs = []
    ms = []
    rles = []
    
    for i, (m, b) in enumerate(zip(preds_mask, preds_boxes)):
        if m.shape[-1] == 6:
            b, m = m, b  # swap

        masks, boxes, cell  = process_masks(
            b.copy(), m.copy(), THRESHOLDS_MASK, THRESHOLDS_NMS[i], THRESHOLDS_CONF[i], remove_overlap=True, corrupt=False
        )

        rle_pred = [pycocotools.mask.encode(np.asarray(p, order='F')) for p in masks]
        iou = pycocotools.mask.iou(rle_truth, rle_pred, [0] * len(rle_pred))
        score = iou_map(ious=[iou])

        bs.append(boxes)
        ms.append(masks)
        scores.append(score)

#         bs.append(b)
#         ms.append(m)
        rles.append(rle_pred)
        
        print(f' -> IoU mAP : {score:.4f}\n')

#         plt.figure(figsize=(15, 10))
#         plot_sample(img, mask=masks.astype(int), boxes=boxes)
#         plt.axis(False)
#         plt.show()
    
    boxes_m, masks_m = merge_preds_vote(bs, ms)
    
    print(boxes_m.shape, masks_m.shape)
        
    masks_m, boxes_m, cell  = process_masks(
        boxes_m, masks_m, 0., THRESHOLDS_NMS_ENS, THRESHOLDS_CONF_ENS, remove_overlap=False, corrupt=False
    )
    print(masks_m.shape)

    rle_pred = [pycocotools.mask.encode(np.asarray(p, order='F')) for p in masks_m]
    iou = pycocotools.mask.iou(rle_truth, rle_pred, [0] * len(rle_pred))
    score = iou_map(ious=[iou])

    print(f' -> IoU mAP : {score:.4f}\n')

    plt.figure(figsize=(15, 10))
    plot_sample(img, mask=masks_m.astype(int), boxes=boxes_m)
    plt.axis(False)
    plt.show()
        
        
    break

In [None]:
fig = plot_preds_iou(
    img,
#     ms[0].astype(int),
    masks_m.astype(int),
    truth,
    plot_tp=True)

fig.update_layout(
    autosize=False,
    width=900,
    height=700,
)

fig.show()

In [None]:
MIN_SIZE = 0

In [None]:
THRESHOLDS_CONF = [
#     [0.35, 0.45, 0.65],
    [0.3, 0.4, 0.65],
#     [0.35, 0.4, 0.7]
]
THRESHOLDS_NMS = [
#     [0.1, 0.1, 0.05],
    [0.1, 0.1, 0.15],
#     [0.1, 0.05, 0.05],
]
THRESHOLDS_MASK = [0.45, 0.45, 0.45]

In [None]:
scores = []
for i in tqdm(range(len(df))[:30]):
    id_ = df['id'][i]

    preds_mask = [np.load(f + f"masks_{id_}.npy") for f in FOLDERS]
    preds_boxes = [np.load(f + f"boxes_{id_}.npy") for f in FOLDERS]
    
#     data = dataset[i]
#     img = data['img']
#     truth = data['gt_masks'].masks.copy().astype(int)
#     boxes_truth = data['gt_bboxes']
    rle_truth = dataset.encodings[i].tolist()

    bs = []
    ms = []
    rles = []
    
    for i, (m, b) in enumerate(zip(preds_mask, preds_boxes)):
        if m.shape[-1] == 6:
            b, m = m, b  # swap
            
        masks, boxes, cell  = process_masks(
            b, m, THRESHOLDS_MASK, THRESHOLDS_NMS[i], THRESHOLDS_CONF[i], remove_overlap=True, corrupt=True
        )
        
#         masks = [m for m in masks if m.sum() > MIN_SIZE]

        rle_pred = [pycocotools.mask.encode(np.asarray(p, order='F')) for p in masks]
        iou = pycocotools.mask.iou(rle_truth, rle_pred, [0] * len(rle_pred))
        score = iou_map(ious=[iou])
        
        scores.append(score)
        bs.append(boxes)
        ms.append(masks)

#         bs.append(b)
#         ms.append(m)
        
#     boxes_m, masks_m = merge_preds_vote(bs, ms)
        
#     masks_m, boxes_m, cell  = process_masks(
#         boxes_m, masks_m, 0., THRESHOLDS_NMS_ENS, THRESHOLDS_CONF_ENS, remove_overlap=True, corrupt=True
#     )

#     rle_pred = [pycocotools.mask.encode(np.asarray(p, order='F')) for p in masks_m]
#     iou = pycocotools.mask.iou(rle_truth, rle_pred, [0] * len(rle_pred))
#     score = iou_map(ious=[iou])

#     scores.append(score)

In [None]:
df_preds_oof = df.copy().head(len(scores))  # 0
df_preds_oof['score'] = scores

print(f' -> IoU mAP : {df_preds_oof.score.mean():.4f}\n')
df_preds_oof[['cell_type', 'score']].groupby('cell_type').mean()

cort : 75
astro : 125
shsy5y : 50