**About** : This notebook is used to perform inference on validation data

In [None]:
# %load_ext nb_black
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src/

## Initialization

### Imports

In [None]:
import gc
import os
import ast
import sys
import cv2
import glob
import json
import torch
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm
warnings.simplefilter("ignore", UserWarning)

In [None]:
from params import *

from utils.plots import *
from utils.metrics import *
from utils.logger import Config

from data.preparation import prepare_data
from data.dataset import SartoriusDataset
from data.transforms import define_pipelines
from inference.post_process import post_process_preds
from inference.validation import inference_val_ens, inference_val

from utils.metrics import post_process_preds

## Single models

In [None]:
EXP_FOLDER = LOG_PATH + "2021-11-10/21/"

In [None]:
config = Config(json.load(open(EXP_FOLDER + "config.json", 'r')))
config.model_config = EXP_FOLDER + config.model_config.split('/')[-1]
config.data_config = EXP_FOLDER + config.data_config.split('/')[-1]

weights = sorted(glob.glob(EXP_FOLDER + "*.pt"))

In [None]:
# df = prepare_data(fix=config.fix)
# results, df_oof = inference_val(df, config, weights)

## Ensembles

In [None]:
EXP_FOLDERS = [  # single models - LB 0.321
    LOG_PATH + "2021-11-10/10/",  # 0.3119 / 0.3081  \
    LOG_PATH + "2021-11-10/11/",  # 0.3109 / 0.3079  |-> 0.3153 / 0.3112
    LOG_PATH + "2021-11-10/12/",  # 0.3122 / 0.3091 /
]

# EXP_FOLDERS = [  # single models
#     LOG_PATH + "2021-11-10/16/",  # 0.3108 / 0.3075   \
#     LOG_PATH + "2021-11-10/15/",  # 0.3107 / 0.3077   |-> 0.3165 / 0.3133
#     LOG_PATH + "2021-11-10/19/",  # 0.3101 / 0.3074   |
#     LOG_PATH + "2021-11-10/20/",  # 0.3151 / 0.3116  /
# ]

In [None]:
EXP_FOLDERS = [  # single models - livecell (r50)
#     LOG_PATH + "2021-11-12/2/",  # 0.3151 / 0.3118   - pretrain
#     LOG_PATH + "2021-11-13/0/",  # 0.3130 / 0.3093   - 700 ext
    LOG_PATH + "2021-11-13/1/",  # 0.3141 / 0.3112   - schedule
#     LOG_PATH + "2021-11-13/3/",  # 0.3149 / 0.3119   - schedule + pretrain 
#     LOG_PATH + "2021-11-11/7/",  # 0.3111 / 0.3084
#     LOG_PATH + "2021-11-10/21/",  # 0.3118 / 0.3102

#     LOG_PATH + "2021-11-13/5/",  # 0.3130 / 0.3100   - schedule + single
#     LOG_PATH + "2021-11-15/1/",   # 0.3139 / 0.3097  - schedule + pretrain r101
]

In [None]:
EXP_FOLDERS = [
    LOG_PATH + "2021-11-10/21/",  # 0.3078 / 0.3042  \
    LOG_PATH + "2021-11-11/0/",   # 0.3068 / 0.3040  |-> 0.3121x / 0.309x
    LOG_PATH + "2021-11-11/1/",   # 0.3088 / 0.3045 /
#     LOG_PATH + "2021-11-11/3/",  # 0.3084 / 0.3046
#     LOG_PATH + "2021-11-11/7/",  # 0.3045 / 0.3012
#     LOG_PATH + "2021-11-12/0/",  # 0.3077 / 0.3044
    LOG_PATH + "2021-11-15/3/",  # 0.3077 / 0.3044
]

In [None]:
USE_TTA = False

In [None]:
configs, weights = [], []

for exp_folder in EXP_FOLDERS:
    config = Config(json.load(open(exp_folder + "config.json", 'r')))
    config.model_config = exp_folder + config.model_config.split('/')[-1]
    config.data_config = exp_folder + config.data_config.split('/')[-1]
    configs.append(config)

    weights.append(sorted(glob.glob(exp_folder + "*.pt")))

In [None]:
%%time
df = prepare_data(fix=False)
results, df_oof = inference_val_ens(df, configs, weights, use_tta=USE_TTA)

## Evaluation

In [None]:
pipelines = define_pipelines(config.data_config)
dataset = SartoriusDataset(df_oof, transforms=pipelines['val_viz'])

In [None]:
thresholds_conf_tweak = [0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9]
thresholds_mask_tweak = [0.35, 0.4, 0.45, 0.5, 0.55]

best_thresholds_conf = [0.5, 0.5, 0.5]
best_thresholds_mask = [0.5, 0.5, 0.5]

### Tweak thresholds

In [None]:
scores = [{} for _ in range(len(CELL_TYPES))]
for th_conf in tqdm(thresholds_conf_tweak):
    score, scores_classes = evaluate_results(dataset, results, th_conf, 0.5, remove_overlap=False)

    for i, (c, s) in enumerate(zip(CELL_TYPES, scores_classes)):
        scores[i][th_conf] = s
    
best_thresholds_conf = [list(score.keys())[np.argmax(list(score.values()))] for score in scores]

for i, th in enumerate(best_thresholds_conf):
    print(f'Best score for {CELL_TYPES[i]} :\t{scores[i][th] :.4f}   (th_conf={th})')

In [None]:
thresholds_mask_tweak = [0.35, 0.4, 0.45, 0.5, 0.55, 0.6]

scores = [{} for _ in range(len(CELL_TYPES))]
for th_mask in tqdm(thresholds_mask_tweak):
    score, scores_classes = evaluate_results(dataset, results, best_thresholds_conf, th_mask, remove_overlap=False)

    for i, (c, s) in enumerate(zip(CELL_TYPES, scores_classes)):
        scores[i][th_mask] = s
    
best_thresholds_mask = [list(score.keys())[np.argmax(list(score.values()))] for score in scores]

for i, th in enumerate(best_thresholds_mask):
    print(f'Best score for {CELL_TYPES[i]} :\t{scores[i][th] :.4f}   (th_mask={th})')

In [None]:
print(f'THRESHOLDS_CONF = {best_thresholds_conf}\nTHRESHOLDS_MASK = {best_thresholds_mask}')

### Score

In [None]:
score, scores_classes = evaluate_results(
    dataset,
    results,
    best_thresholds_conf,
    best_thresholds_mask,
    remove_overlap=False
)

print(f'IoU mAP : {score :.4f}')

In [None]:
score, scores_classes = evaluate_results(
    dataset,
    results,
    best_thresholds_conf,
    best_thresholds_mask,
    remove_overlap=True
)

print(f'IoU mAP : {score :.4f}')

### Dice

In [None]:
masks_preds = []
for result in results:
    masks, _ , _ = post_process_preds(
        result,
        thresholds_conf=best_thresholds_conf,
        thresholds_mask=best_thresholds_mask,
        remove_overlap=False
    )
    masks_preds.append(masks.max(0))
    
masks_truth = [masks.masks.max(0) for masks in dataset.masks]

In [None]:
dice_score(np.array(masks_preds), np.array(masks_truth))

## Viz

In [None]:
max_size = 1500

In [None]:
for idx in range(10):
    data = dataset[idx]

    img = data['img']
    truth = data['gt_masks'].masks.copy().astype(int)
    boxes_truth = data['gt_bboxes']
    
    # preds
    masks, boxes, c = post_process_preds(
        results[idx], best_thresholds_conf, best_thresholds_mask, remove_overlap=True
    )
    
#     sizes = np.max([boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1]], 0)
#     masks = masks[sizes < max_size]
#     boxes = boxes[sizes < max_size]
    
    # Score
    for i in range(len(truth)):
        truth[i] *= (i + 1)
    truth = truth.max(0)

    pred = masks.copy().astype(int)
    for i in range(len(pred)):
        pred[i] *= (i + 1)
    pred = pred.max(0)

    score = iou_map([truth], [pred])

    plt.figure(figsize=(15, 15))
    plot_sample(img, pred, boxes, plotly=False)
    plt.axis(False)
    plt.title(f'{CELL_TYPES[c]} - iou_map={score:.3f}')
    plt.show()
    
    plt.figure(figsize=(15, 15))
    plot_sample(img, truth, boxes_truth, plotly=False)
    plt.axis(False)
    plt.title(f'{CELL_TYPES[c]} - iou_map={score:.3f}')
    plt.show()

    break

In [None]:
fig = plot_preds_iou(img, pred, truth, plot_tp=True)

fig.update_layout(
    autosize=False,
    width=900,
    height=700,
)

fig.show()

## Single image explo

In [None]:
EXP_FOLDERS = [
#     LOG_PATH + "2021-11-12/2/",  # 0.3151 / 0.3118   - pretrain
    LOG_PATH + "2021-11-13/1/",  # 0.3141 / 0.3112   - schedule
    LOG_PATH + "2021-11-13/3/",  # 0.3149 / 0.3119   - schedule + pretrain 
#     LOG_PATH + "2021-11-15/1/",  # 0.3139 / 0.3097  - schedule + pretrain r101
#     LOG_PATH + "2021-11-10/21/"
#     LOG_PATH + "2021-11-10/20/",
]

USE_TTA = False

In [None]:
configs, weights = [], []

for exp_folder in EXP_FOLDERS:
    config = Config(json.load(open(exp_folder + "config.json", 'r')))
    config.model_config = exp_folder + config.model_config.split('/')[-1]
    config.data_config = exp_folder + config.data_config.split('/')[-1]
    configs.append(config)

    weights.append(sorted(glob.glob(exp_folder + "*.pt")))

## Inference

In [None]:
from inference.validation import inference_single

In [None]:
df = prepare_data(fix=False)
results, all_stuff, df_oof = inference_single(df, configs, weights, idx=0, use_tta=USE_TTA)

In [None]:
pipelines = define_pipelines(config.data_config)
dataset = SartoriusDataset(df_oof, transforms=pipelines['val_viz'])

In [None]:
THRESHOLDS_CONF = [0.35, 0.45, 0.8]
THRESHOLDS_MASK = [0.45, 0.45, 0.45]

In [None]:
score, scores_classes = evaluate_results(
    dataset,
    results,
    THRESHOLDS_CONF,
    THRESHOLDS_MASK,
    remove_overlap=True
)

print(f'IoU mAP : {score :.4f}')

## Viz stuff

In [None]:
def bbox_iou(bb1, bb2):
    # determine the coordinates of the intersection rectangle
    x_left = max(bb1[0], bb2[0])
    y_top = max(bb1[1], bb2[1])
    x_right = min(bb1[2], bb2[2])
    y_bottom = min(bb1[3], bb2[3])

    if x_right < x_left or y_bottom < y_top:
        return 0.

    # The intersection of two axis-aligned bounding boxes is always an
    # axis-aligned bounding box
    intersection_area = (x_right - x_left) * (y_bottom - y_top)

    # compute the area of both AABBs
    bb1_area = (bb1[2] - bb1[0]) * (bb1[3] - bb1[1])
    bb2_area = (bb2[2] - bb2[0]) * (bb2[3] - bb2[1])

    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = intersection_area / float(bb1_area + bb2_area - intersection_area)

    return iou

In [None]:
data = dataset[0]

img = data['img']
truth = data['gt_masks'].masks.copy().astype(int)
boxes_truth = data['gt_bboxes']

# preds
masks, boxes, c = post_process_preds(
    results[0], THRESHOLDS_CONF, THRESHOLDS_MASK, remove_overlap=True
)

#     sizes = np.max([boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1]], 0)
#     masks = masks[sizes < max_size]
#     boxes = boxes[sizes < max_size]

# Score
for i in range(len(truth)):
    truth[i] *= (i + 1)
truth = truth.max(0)

pred = masks.copy().astype(int)
for i in range(len(pred)):
    pred[i] *= (i + 1)
pred = pred.max(0)

In [None]:
(
    proposal_list, aug_proposals,
    bbox_result, det_bboxes, det_labels, merged_bboxes, aug_bboxes,
    segm_result, merged_masks, aug_masks
) = all_stuff 

In [None]:
merged_bboxes.size()

In [None]:
det_bboxes.size()

In [None]:
proposals = proposal_list[0].cpu().numpy()
# proposals = aug_proposals[0][0].cpu().numpy()

In [None]:
len(aug_proposals[0][0].cpu().numpy()), len(aug_proposals[0][1].cpu().numpy())

In [None]:
merged_bboxes.size()

In [None]:
aug_bboxes[0].size()

In [None]:
det_bboxes = det_bboxes.cpu().numpy()

In [None]:
len(proposals), len(aug_bboxes[0]), len(aug_bboxes[1])

In [None]:
len(bbox_result[0])

In [None]:
len(det_bboxes)

In [None]:
plt.figure(figsize=(15, 10))
plot_sample(img, mask=None, boxes=proposals)
plt.axis(False)
plt.show()

In [None]:
threshold_hit = 0.4

plt.figure(figsize=(15, 5))

missed = []
for i, preds in enumerate((proposals, det_bboxes)):
    max_ious = []
    for b in boxes_truth:
        ious = []
        for prop in preds:
            ious.append(get_iou(b, prop))

        max_ious.append(np.max(ious))

    max_ious = np.array(max_ious)
    missed.append(boxes_truth[max_ious < threshold_hit])

    plt.subplot(1, 2, i + 1)
    sns.histplot(max_ious, bins=20)
    plt.axvline(threshold_hit, c="salmon")
    plt.title('proposals' if i == 0 else "det_bboxes")
    
plt.show()

In [None]:
plot_sample(img, plotly=True)

In [None]:
fig = plot_preds_iou(
    img,
    pred,
    truth,
    boxes=missed[1],
    boxes_2=missed[0],
    plot_tp=True)

fig.update_layout(
    autosize=False,
    width=900,
    height=700,
)

fig.show()