#### Code to train models

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
cd ../src

/home/theo/Documents/kaggle_siim_covid/src


## Imports

In [3]:
import os
import cv2
import glob
import json
import torch
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from collections import Counter
from tqdm.notebook import tqdm
from matplotlib.patches import Rectangle

In [4]:
from data.preparation import prepare_dataframe, handle_duplicates, add_additional_boxes
from data.dataset import CovidDetDataset, CovidClsDataset
from data.transforms import get_transfos_det, get_transfos_cls

from model_zoo.models import get_model
from model_zoo.encoders import get_encoder

from utils.plot import plot_sample
from utils.boxes import treat_boxes
from utils.logger import prepare_log_folder, save_config, create_logger, update_overall_logs

from training.main import k_fold

In [66]:
from params import *
from data.extraction import *
from utils.boxes import Boxes, expand_boxes

# PP

In [56]:
import ast

from map_boxes import mean_average_precision_for_boxes

In [6]:
df = prepare_dataframe().copy()

In [7]:
clusts = np.load("../output/clusts.npy", allow_pickle=True)
found = np.load("../output/found.npy")
transpositions = np.load("../output/transpositions.npy", allow_pickle=True)

df = handle_duplicates(df, clusts, transpositions, plot=False)

In [8]:
df = add_additional_boxes(df)

In [9]:
boxes = pd.read_csv('../output/lungs_train')
df = df.merge(boxes, left_on="image_id", right_on="img_name", suffixes=('', '_lung'))

In [10]:
df['boxes_lung'] = df['boxes_lung'].apply(lambda x: np.array(ast.literal_eval(x)))
df['confidences'] = df['confidences'].apply(lambda x: np.array(ast.literal_eval(x)))

In [38]:
pred_oof = np.load("/home/theo/Documents/kaggle_siim_covid/logs/2021-07-30/0/pred_oof_img.npy")

In [135]:
from sklearn.metrics import *
roc_auc_score(df['img_target'], pred_oof)

0.9018662130719447

In [174]:
PLOT = False

In [188]:
boxes_pred, boxes_truth = [], []
tps, fns, fps = 0, 0, 0

for i in range(len(df)):
    pred = np.array(df['boxes_lung'][i])
    truth = np.array(df['boxes'][i])
    starts = np.array(df['crop_starts'][i])

    shape_crop = df['shape_crop'][i]
    shape = df['shape'][i]
    
    truth = Boxes(truth, shape, bbox_format="coco")["albu"]
    
    pred = Boxes(pred, shape_crop, bbox_format="albu")["coco"]
    
    if len(pred):
        pred[:, 0] += starts[1]
        pred[:, 1] += starts[0]
    
    pred = Boxes(pred, shape, bbox_format="coco")
    pred = expand_boxes(pred, 0.75)["albu"]
    
    confidence = pred_oof[i]
#     confidence = df['img_target'][i]
    
#     if len(truth):
    bp = []
    for b in pred:
        bp.append([
            df["study_id"][i], "opacity", confidence, b[0], b[2], b[1], b[3]
        ])
    boxes_pred += bp

    bt = []
    for b in truth:
        bt.append([
            df["study_id"][i], "opacity", b[0], b[2], b[1], b[3]
        ])
        
    boxes_truth += bt

    if len(bt):
        ap = mean_average_precision_for_boxes(bt, bp, verbose=0)[0]
    else:
        ap = 0
    
    if PLOT:
        print(f' -> mAP : {ap:.3f}')

    if confidence > 0.5:
        if ap == 1:
            tps += 2
        elif 0 < ap < 1:  # 1 correct, rest wrong
            tps += 1
            fns += len(bt) - 1
            fps += 1
        else:  # 2 wrong
            fps += 2
            fns += len(bt)
    else:
        fns += len(bt)

    if PLOT:  # or 0 < ap < 1:
        if np.max(starts) >= -1: # 100:
            xray_path = DATA_PATH + "train/" + df["study_id"][i] + "/" + df['series_id'][i] + "/" + df["image_id"][i] + ".dcm"
            image, _ = read_xray(xray_path)

            plt.figure(figsize=(15, 7))
            plt.subplot(1, 2, 1)
            plot_sample(image, pred, bbox_format="albu")
            plt.title(f'pred - {confidence :.3f}')
            plt.subplot(1, 2, 2)
            plot_sample(image, truth, bbox_format="albu")
            plt.title('truth')

            plt.show()

#         if i > 10:
#             break

In [189]:
tps, fns, fps

(2907, 5397, 6209)

In [190]:
mean_average_precision_for_boxes(boxes_truth, boxes_pred)

Number of files in annotations: 4294
Number of files in predictions: 6054
Unique classes: 1
Detections length: 6054
Annotations length: 4294
opacity                        | 0.183543 |    8257
mAP: 0.183543


(0.1835433195341496, {'opacity': (0.1835433195341496, 8257.0)})