**About** : This notebook is used to train models.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src/

## Initialization

### Imports

In [None]:
import os
import re
import cv2
import sys
import glob
import yaml
import shutil
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm

pd.set_option('display.width', 1000)
pd.set_option('display.max_columns', 30)
pd.set_option('max_colwidth', 100)

In [None]:
from params import *
from util.plots import *
from data.preparation import *
from util.metrics import compute_metrics
from inference.det import *

## Data

In [None]:
# df_patient, df_img = prepare_data(DATA_PATH)

# df = pd.read_csv('../input/active_extravasation_bounding_boxes.csv')
# df = df.rename(columns={"pid": "patient_id", "series_id": "series", "instance_number": "instance"})
# df = df.merge(df_img)

In [None]:
# plt.figure(figsize=(20, 5))

# for i in range(1, 5):
#     plt.subplot(1, 4, i)

#     idx = np.random.choice(len(df))
#     img = cv2.imread(df['path'].values[idx])
#     boxes = df[["x1", "y1", "x2", "y2"]].values[idx]

#     plot_boxes(img, boxes, bbox_format="pascal_voc")

# plt.show()

## Inference

In [None]:
class Config:
    selected_model = "yolo"
    bbox_format = "yolo"
    pred_format = "pascal_voc"

    fold = 0
#     fold = "fullfit"
    version = "v1"
    exp = 2
    
    if fold != "fullfit":
        name = f"rsna_{version}_fold{fold}_{exp}"
    else:
        name = f"rsna_{version}_fullfit_{exp}"

    data_dir = f"../input/yolo/v1/{fold}_train/"
    cfg = f"../yolox/exps/{name}.py"
    ckpt = f"../yolox/YOLOX_outputs/{name}/best_ckpt.pth"
#     ckpt = f"../yolox/YOLOX_outputs/{name}/last_epoch_ckpt.pth"
    
    labels = ["extravasation"]

    size = (384, 384)

    # NMS
    conf_thresh = 0.01
    iou_thresh = 0.5
    max_per_img = 1

    num_workers = 8
    val_bs = 64
    device = "cuda"

In [None]:
model_marker = retrieve_yolox_model(Config.cfg, Config.ckpt, size=Config.size)
model_marker = YoloXWrapper(model_marker, Config)

In [None]:
df = pd.DataFrame({"path": glob.glob(Config.data_dir + "images/valid/*")})
df['gt_path'] = df['path'].apply(lambda x: re.sub("images", "labels", x))
df['gt_path'] = df['gt_path'].apply(lambda x: re.sub(".png", ".txt", x))

# df = df.head(100)
df.head()

In [None]:
print('- Predict')
transforms = get_transfos(size=Config.size)
dataset = InferenceDataset(df, transforms)
meter = predict(model_marker, dataset, Config, disable_tqdm=False)

print('\n- Update shapes')
dataset = InferenceDataset(df, None)
for i in range(len(dataset)):
    shape = dataset[i][2]
    meter.preds[i].update_shape(shape)

In [None]:
PLOT = False

In [None]:
print('- Evaluate')

f1s = {c: [] for c in Config.labels}
recalls = {c: [] for c in Config.labels}

for idx in range(len(dataset)):
    img, gt, shape = dataset[idx] 

    gt = Boxes(gt, (shape[0], shape[1]), bbox_format="yolo")['pascal_voc']
    gt = [gt[dataset.classes[idx] == i] for i in range(len(Config.labels))]

    preds = [meter.preds[idx]['pascal_voc'][meter.labels[idx] == i] for i in range(len(Config.labels))]
    scores = [meter.confidences[idx][meter.labels[idx] == i] for i in range(len(Config.labels))]

    for i, (t, pm) in enumerate(zip(gt, preds)):
        metrics = compute_metrics(pm, t)

        f1s[Config.labels[i]].append(metrics['f1_score'])
        recalls[Config.labels[i]].append(metrics['recall'])

    if PLOT or not (idx % 500):
        plt.figure(figsize=(10, 5))
        plt.subplot(1, 2, 1)
        plot_boxes(img, gt[0], "pascal_voc")
        plt.title('Truth')

        plt.subplot(1, 2, 2)
        plot_boxes(img, preds[0], "pascal_voc")
        plt.title(f'Pred - conf={scores[0][0]:.3f}')
        plt.show()

print('\n')
for k, v in f1s.items():
    print(f'{k} Recall@1: {np.mean(recalls[k]):.3f}')
#         break
#     break

### Full Inference

In [None]:
df_patient, df_img = prepare_data(DATA_PATH)

if "fold" not in df_patient.columns:
    folds = pd.read_csv(DATA_PATH + "folds_4.csv")
    df_img = df_img.merge(folds)
    df_patient = df_patient.merge(folds)

In [None]:
for fold in [0, 1, 2, 3]:
    print(f'\n- Fold {fold}\n')
    Config.fold = fold
    Config.name = f"rsna_{Config.version}_fold{fold}_{Config.exp}"
    Config.data_dir = f"../input/yolo/v1/{fold}_train/"
    Config.cfg = f"../yolox/exps/{Config.name}.py"
    ckpt = f"../yolox/YOLOX_outputs/{Config.name}/best_ckpt.pth"

    model_marker = retrieve_yolox_model(Config.cfg, Config.ckpt, size=Config.size)
    model_marker = YoloXWrapper(model_marker, Config)
    
    print('\n- Predict')
    df_val = df_img[df_img['fold'] == fold].reset_index(drop=True)

#     df['path'] = df['path'].apply(lambda x: "../input/imgs/" + x.split('/')[-1])
#     df_val = df_val[df_val['path'].isin(df['path'].values)].reset_index(drop=True)
#     df_val['gt_path'] = df_val['path'].apply(
#         lambda x: "../input/yolo/v1/0_train/labels/valid/" + re.sub(".png", ".txt", x.split('/')[-1])
#     )

    transforms = get_transfos(size=Config.size)
    dataset = InferenceDataset(df_val, transforms)
    meter = predict(model_marker, dataset, Config, disable_tqdm=False)

    preds = meter.preds
        
    print('\n- Save & viz')

    boxes = []
    scores = []
    for idx in range(len(dataset)):
        pred = meter.preds[idx]['pascal_voc'][0]
        score = meter.confidences[idx][0]
        
        boxes.append(pred)
        scores.append(score)

        if PLOT or not (idx % 10000):
            img, gt, shape = dataset[idx] 
            if isinstance(img, torch.Tensor):
                img = img.cpu().numpy().transpose(1, 2, 0)
            plt.figure(figsize=(5, 5))
            plot_boxes(img, pred[None], "pascal_voc")
            plt.title(f'Pred - conf={score:.3f}')
            plt.show()
    
    np.save(f'../output/boxes_{Config.name}.npy', np.array(boxes))
    np.save(f'../output/confs_{Config.name}.npy', np.array(scores))
    
#     break

Done ! 