**About** : This notebook is used to do inference on test data

In [None]:
# %load_ext nb_black
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src/

## Initialization

In [None]:
import gc
import os
import ast
import sys
import cv2
import glob
import json
import warnings
import pycocotools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm

warnings.simplefilter("ignore", UserWarning)

In [None]:
from params import *

from data.preparation import sub_to_mmdet
from data.transforms import define_pipelines
from data.dataset import SartoriusInferenceDataset

from utils.logger import Config
from utils.rle import rle_decode
from utils.plots import plot_sample

from inference.test import inference

In [None]:
IS_PL = True
FOLD = 0 if IS_PL else "*"

## Config

In [None]:
EXP_FOLDERS = [  # ENS_6
    LOG_PATH + "2021-12-02/7/",  # 7. Cascade b5 - 0.3179
    LOG_PATH + "2021-12-03/0/",  # 8. Cascade rx101 - 0.3189
    LOG_PATH + "seb/mrcnn_resnext101_aug_2021-12-06/",  # 18.  maskrcnn rx101 - 0.3197
    LOG_PATH + "seb/mrcnn_resnet50_aug_2021-12-07/",  # 21. maskrcnn r50 - 0.3175
#     LOG_PATH + "seb/maskrcnn_resnet50_2021-12-01/",  # 6. maskrcnn r50 - 0.3173
]

In [None]:
THRESHOLDS_MASK = 0.45
THRESHOLDS_NMS = [0.1, 0.05, 0.05]
THRESHOLDS_CONF = [0.35, 0.4, 0.7]

In [None]:
ENSEMBLE_CONFIG = {
    "use_tta": True,
    "num_classes": 3,

    "rpn_nms_pre": [3000, 2000, 1000],
    "rpn_iou_threshold": [0.75, 0.75, 0.6],
    "rpn_score_threshold": [0.95, 0.9, 0.95],
    "rpn_max_per_img": [None, None, None],  # [1500, 1000, 500],

    "bbox_nms": True,
    "rcnn_iou_threshold": [0.75, 0.9, 0.6],
    "rcnn_score_threshold": [0.2, 0.3, 0.5],
}

In [None]:
if IS_PL:
    df = pd.DataFrame({'id': os.listdir(DATA_PATH + "train_semi_supervised/")})
    df['predicted'] = ""
    df['img_path'] = DATA_PATH + "train_semi_supervised/" + df['id']
#     df = df.head(5)
else:
    df = pd.read_csv(DATA_PATH + "sample_submission.csv")
    df['img_path'] = DATA_PATH + "test/" + df['id'] + ".png"

In [None]:
configs, weights = [], []

for exp_folder in EXP_FOLDERS:
    config = Config(json.load(open(exp_folder + "config.json", 'r')))
    config.model_config = exp_folder + config.model_config.split('/')[-1]
    config.data_config = exp_folder + config.data_config.split('/')[-1]
    configs.append(config)

    weights.append(sorted(glob.glob(exp_folder + f"*_{FOLD}.pt")))

## Single fold

### Inference

In [None]:
%%time
rles = inference(
    df,
    configs,
    weights,
    ENSEMBLE_CONFIG,
    THRESHOLDS_MASK,
    THRESHOLDS_NMS,
    THRESHOLDS_CONF,
    corrupt=not IS_PL,
    remove_overlap=not IS_PL,
)

### Viz

In [None]:
submission = []
pipelines = define_pipelines(config.data_config)
dataset = SartoriusInferenceDataset(df, transforms=pipelines['test_viz'], precompute_masks=False)

for idx, (rle, img_id) in enumerate(zip(rles, df['id'].values)):
    if idx < 3:
        img = dataset[idx]['img'][0].numpy().transpose(1, 2, 0)
        img = (img - img.min()) / (img.max() - img.min())
        img = img[:ORIG_SIZE[0], :ORIG_SIZE[1]]
        
        masks = np.array([rle_decode(enc, ORIG_SIZE) for enc in rle])
        
        plt.figure(figsize=(15, 15))
        plot_sample(img, masks.astype(int))
        plt.axis(False)
        plt.title(img_id)
        plt.show()        
    
    for enc in rle:
        submission.append((img_id, enc))
        
    if not len(rle):  # Empty
        submission.append((image_id, ""))

df_sub = pd.DataFrame(submission, columns=['id', 'predicted'])

### Generate PLs

In [None]:
df_g = df_sub.groupby('id').agg(list).reset_index()
masks, meta = sub_to_mmdet(df_g, 0)

# plt.figure(figsize=(15, 10))
# plot_sample(255 * np.ones(ORIG_SIZE, dtype=np.uint8), masks.astype(int), meta['ann']['bboxes'], width=1)
# plt.axis(False)
# plt.show()

In [None]:
metas = [sub_to_mmdet(df_g, i)[1] for i in range(len(df_g))]
meta_df = pd.DataFrame.from_dict(metas)
meta_df.to_csv(OUT_PATH + f"pl_ens15_{FOLD}.csv", index=False)
print(f' -> Saved pls to "{OUT_PATH}pl_ens15_{FOLD}.csv"')

## All folds

In [None]:
assert IS_PL

for FOLD in range(0, 5):
    configs, weights = [], []
    for exp_folder in EXP_FOLDERS:
        config = Config(json.load(open(exp_folder + "config.json", 'r')))
        config.model_config = exp_folder + config.model_config.split('/')[-1]
        config.data_config = exp_folder + config.data_config.split('/')[-1]
        configs.append(config)

        weights.append(sorted(glob.glob(exp_folder + f"*_{FOLD}.pt")))
        
    rles = inference(
        df,
        configs,
        weights,
        ENSEMBLE_CONFIG,
        THRESHOLDS_MASK,
        THRESHOLDS_NMS,
        THRESHOLDS_CONF,
        corrupt=not IS_PL,
        remove_overlap=not IS_PL,
    )

    submission = []
    for idx, (rle, img_id) in enumerate(zip(rles, df['id'].values)):
        for enc in rle:
            submission.append((img_id, enc))
        if not len(rle):
            submission.append((image_id, ""))
    df_sub = pd.DataFrame(submission, columns=['id', 'predicted'])
    df_g = df_sub.groupby('id').agg(list).reset_index()

    metas = [sub_to_mmdet(df_g, i)[1] for i in range(len(df_g))]
    meta_df = pd.DataFrame.from_dict(metas)
    meta_df.to_csv(OUT_PATH + f"pl_ens15_{FOLD}.csv", index=False)
    print(f' -> Saved pls to "{OUT_PATH}pl_ens15_{FOLD}.csv"\n\n')