**About** : This notebook is used to infer models.

In [None]:
# %load_ext nb_black
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src/

## Initialization

### Imports

In [None]:
import os
import torch

print(torch.__version__)
os.environ['CUDA_VISIBLE_DEVICES'] = "1"
torch.cuda.get_device_name(0)

In [None]:
import os
import re
import cv2
import sys
import glob
import json
import torch
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm
from sklearn.metrics import *

warnings.simplefilter(action="ignore", category=FutureWarning)
warnings.simplefilter(action="ignore", category=UserWarning)

In [None]:
from utils.logger import Config, upload_to_kaggle

from params import *
from data.dataset import BreastDataset
from data.preparation import *
from data.transforms import get_transfos

from model_zoo.models import define_model
from utils.metrics import *
from utils.torch import load_model_weights
from utils.plots import plot_confusion_matrix

from inference.predict import predict, predict_tta
from inference.main import *

### Upload

In [None]:
# TO_UPLOAD = [
# #     "../logs/2023-02-12/1/",   # nfl2 - 1536x1024  LR=3e-4  5ep CBIS CMMD PASM pl1  0.5030  -  th=0.23  -  auc 0.9043
# #     "../logs/2023-02-12/2/",   # v2-m - 1536x1024  LR=3e-4  5ep CBIS CMMD PASM pl1  0.5099  -  th=0.31  -  auc 0.9115
# #     "../logs/2023-02-10/6/",   # v2-s - 1536x1024  LR=4e-4  5ep gem CBIS            0.4771  -  th=0.27  -  auc 0.9190
# #     "../logs/2023-02-10/16/",  # v2-m - 1536x1024  LR=3e-4  5ep bs6 CBIS CMMD PASM  0.4988  -  th=0.32  -  auc 0.9163
# #     "../logs/2023-02-11/1/",   # nfl2 - 1536x1024  LR=4e-4  5ep bs8 CBIS            0.4881  -  th=0.21  -  auc 0.8994
# #     "../logs/2023-02-11/3/",   # nfl2 - 1536x1024  LR=3e-4  5ep bs8 CBIS CMMD PASM  0.4837  -  th=0.25  -  auc 0.9106
# #     "../logs/2023-02-19/2/",   # nfl2 - 1536x1024  LR=3e-4  5ep aux0 CBIS CMMD PASM    0.5066  -  th=0.23  -  auc 0.9038
# #     "../logs/2023-02-13/15/",   # nfl1 - 1536x1024  LR=3e-4  4ep CBIS CMMD PASM pl1 0.4906  -  th=0.22  -  auc 0.9112
#     "../logs/2023-02-12/2/",   # v2-m - 1536x1024  LR=3e-4  5ep CBIS CMMD PASM pl1  0.5099  -  th=0.31  -  auc 0.9115
# ]

# upload_to_kaggle(TO_UPLOAD, "/workspace/datasets/rsna_weights_4/", "RSNA Breast Weights 4", update_folders=True)

## Expes

In [None]:
EXP_FOLDER = "../logs/2023-02-09/4/"   # nfl1 - 1536x1024  LR=3e-4  5ep     CBIS            0.4639  -  th=0.24  -  auc 0.9141
EXP_FOLDER = "../logs/2023-02-09/11/"  # nfl1 - 1536x1024  LR=3e-4  5ep     CBIS CMMD       0.4651  -  th=0.27  -  auc 0.9131
EXP_FOLDER = "../logs/2023-02-09/13/"  # nfl1 - 1536x1024  LR=3e-4  5ep gem CBIS            0.4734  -  th=0.23  -  auc 0.9141
EXP_FOLDER = "../logs/2023-02-09/14/"  # nfl1 - 1536x1024  LR=3e-4  5ep gem CBIS CMMD       0.4588  -  th=0.19  -  auc 0.9112
EXP_FOLDER = "../logs/2023-02-10/0/"   # nfl1 - 1536x1024  LR=2e-4  5ep gem CBIS CMMD       0.4663  -  th=0.20  -  auc 0.9108
EXP_FOLDER = "../logs/2023-02-10/1/"   # nfl1 - 1536x1024  LR=3e-4  6ep gem CBIS            0.4756  -  th=0.20  -  auc 0.9127    <- nfl1
EXP_FOLDER = "../logs/2023-02-10/2/"   # nfl1 - 1536x1024  LR=4e-4  5ep gem CBIS CMMD       0.4703  -  th=0.19  -  auc 0.9125
EXP_FOLDER = "../logs/2023-02-10/3/"   # nfl1 - 1536x1024  LR=4e-4  5ep gem CBIS            0.4686  -  th=0.28  -  auc 0.9156
EXP_FOLDER = "../logs/2023-02-10/4/"   # nfl1 - 1536x1024  LR=3e-4  5ep gem CBIS CMMD s     0.4686  -  th=0.19  -  auc 0.9122
EXP_FOLDER = "../logs/2023-02-10/5/"   # v2-s - 1536x1024  LR=3e-4  5ep gem CBIS            0.4701  -  th=0.24  -  auc 0.9182
EXP_FOLDER = "../logs/2023-02-10/6/"   # v2-s - 1536x1024  LR=4e-4  5ep gem CBIS            0.4771  -  th=0.27  -  auc 0.9190    <- v2-s   0.4837  -  th=0.20  -  auc 0.9186


EXP_FOLDER = "../logs/2023-02-10/9/"   # nfl1 - 1536x1024  LR=3e-4  5ep bs8 CBIS PASM       0.4783  -  th=0.24  -  auc 0.9089
EXP_FOLDER = "../logs/2023-02-10/10/"  # v2-s - 1536x1024  LR=4e-4  5ep bs8 CBIS CMMD PASM  0.4487  -  th=0.19  -  auc 0.9125
EXP_FOLDER = "../logs/2023-02-10/12/"  # nfl2 - 1536x1024  LR=3e-4  5ep bs8 CBIS            0.4759  -  th=0.21  -  auc 0.9007
EXP_FOLDER = "../logs/2023-02-10/13/"  # v2-m - 1536x1024  LR=3e-4  5ep bs6 CBIS            0.4705  -  th=0.24  -  auc 0.9101
EXP_FOLDER = "../logs/2023-02-10/16/"  # v2-m - 1536x1024  LR=3e-4  5ep bs6 CBIS CMMD PASM  0.4858  -  th=0.23  -  auc 0.9152    <- v2-m   0.4988  -  th=0.32  -  auc 0.9163
EXP_FOLDER = "../logs/2023-02-11/0/"   # v2-m - 1536x1024  LR=4e-4  5ep bs6 CBIS            0.4702  -  th=0.22  -  auc 0.9149
EXP_FOLDER = "../logs/2023-02-11/1/"   # nfl2 - 1536x1024  LR=4e-4  5ep bs8 CBIS            0.4881  -  th=0.21  -  auc 0.8994    <- nfl2
EXP_FOLDER = "../logs/2023-02-11/3/"   # nfl2 - 1536x1024  LR=3e-4  5ep bs8 CBIS CMMD PASM  0.4837  -  th=0.25  -  auc 0.9106    <- nfl2

EXP_FOLDER = "../logs/2023-02-11/4/"   # nfl2 - 1536x1024  LR=4e-4  5ep CBIS CMMD PASM      0.4919  -  th=0.16  -  auc 0.9093
# EXP_FOLDER = "../logs/2023-02-11/8/"   # nfl2 - 1536x1024  LR=3e-4 10ep CBIS CMMD PASM      0.4800  -  th=0.26  -  auc 0.8789

# EXP_FOLDER = "../logs/2023-02-12/1/"   # nfl2 - 1536x1024  LR=3e-4  5ep CBIS CMMD PASM pl1  0.5030  -  th=0.23  -  auc 0.9043
# EXP_FOLDER = "../logs/2023-02-12/2/"   # v2-m - 1536x1024  LR=3e-4  5ep CBIS CMMD PASM pl1  0.5099  -  th=0.31  -  auc 0.9115     <- v2-m         CV pf1 : 0.4946  -  th=0.31  -  auc 0.9140
# EXP_FOLDER = "../logs/2023-02-13/0/"   # v2-s - 1536x1024  LR=4e-4  5ep CBIS CMMD PASM pl1  0.4714  -  th=0.33  -  auc 0.9116
# EXP_FOLDER = "../logs/2023-02-13/0/"   # v2-s - 1536x1024  LR=4e-4  5ep CBIS CMMD PASM pl1  0.4714  -  th=0.33  -  auc 0.9116
# EXP_FOLDER = "../logs/2023-02-13/15/"   # nfl1 - 1536x1024  LR=3e-4  4ep CBIS CMMD PASM pl1 0.4906  -  th=0.22  -  auc 0.9112
# EXP_FOLDER = "../logs/2023-02-13/16/"   # nfl2 - 1536x1024  LR=3e-4  5ep CBIS CMMD PASM pl1 0.5057  -  th=0.21  -  auc 0.9125


EXP_FOLDER = "../logs/2023-02-14/3/"   # nfl1 - 1536x1024  LR=3e-4 5ep CBIS CMMD PASM         0.4780  -  th=0.20  -  auc 0.9164
EXP_FOLDER = "../logs/2023-02-14/4/"   # v2-m - 1536x1024  LR=3e-4 5ep CBIS                   0.4690  -  th=0.29  -  auc 0.9090
EXP_FOLDER = "../logs/2023-02-14/5/"   # nfl2 - 1536x1024  LR=3e-4 10ep CBIS                  0.4706  -  th=0.23  -  auc 0.8664
EXP_FOLDER = "../logs/2023-02-15/5/"   # v2-l - 1536x1024  LR=3e-4 5ep CBIS CMMD PASM         0.4717  -  th=0.31  -  auc 0.9117


EXP_FOLDER = "../logs/2023-02-16/5/"  # nfl2 - 1536x1024  LR=4e-4  drop.2 bs8 CBIS  CMMD PASM 0.4948  -  th=0.22  -  auc 0.9091
EXP_FOLDER = "../logs/2023-02-17/0/"  # v2-m - 1536x1024  LR=3e-4  drop.2 bs6 CBIS CMMD PASM  0.4851  -  th=0.30  -  auc 0.9161

EXP_FOLDER = "../logs/2023-02-19/2/"  # nfl2 - 1536x1024  LR=3e-4  5ep aux0 CBIS CMMD PASM    0.5066  -  th=0.23  -  auc 0.9038

### Blends

In [None]:
EXP_FOLDERS = [  # 1536x1024   0.5440  -  th=0.21  -  auc 0.9332  -  pct=98.485      RERUN -> TensorRT friendly
    "../logs/2023-02-10/6/",   # v2-s - 1536x1024  LR=4e-4  5ep gem CBIS              0.4837  -  th=0.20  -  auc 0.9186
    "../logs/2023-02-10/16/",  # v2-m - 1536x1024  LR=3e-4  5ep bs6 CBIS CMMD PASM    0.4988  -  th=0.32  -  auc 0.9163
    "../logs/2023-02-11/1/",   # nfl2 - 1536x1024  LR=4e-4  5ep bs8 CBIS              0.4881  -  th=0.21  -  auc 0.8994
    "../logs/2023-02-11/3/",   # nfl2 - 1536x1024  LR=3e-4  5ep bs8 CBIS CMMD PASM    0.4837  -  th=0.25  -  auc 0.9106
]

## Results

In [None]:
config = Config(json.load(open(EXP_FOLDER + "config.json", "r")))

In [None]:
df = prepare_data(DATA_PATH, config.img_folder)
df = df.fillna(-1)

In [None]:
if "fold" not in df.columns:
    folds = pd.read_csv(config.folds_file)
    df = df.merge(folds, how="left", on=["patient_id", "image_id"])

In [None]:
df['pred'] = -1

for i in range(4):
    val_idx = list(df[df['fold'] == i].index)
    df_val = df.iloc[val_idx].copy().reset_index(drop=True)
    
    if i not in []:
        try:
            file = EXP_FOLDER + f'pred_val_{i}.npy'
            if os.path.exists(EXP_FOLDER + f'pred_val_inf_{i}.npy'):
                file = EXP_FOLDER + f'pred_val_inf_{i}.npy'
            pred_val = np.load(file)
            
            pred_val = []
            for exp_folder in EXP_FOLDERS:
                file = exp_folder + f'pred_val_{i}.npy'
                if os.path.exists(exp_folder + f'pred_val_inf_{i}.npy'):
                    file = exp_folder + f'pred_val_inf_{i}.npy'
                pred_val.append(np.load(file))
            pred_val = np.average(pred_val, 0)

            df_val["pred"] = pred_val
            df.loc[val_idx, "pred"] = pred_val
        
            df_group = df_val.groupby(["site_id", 'machine_id', 'patient_id', 'laterality']).mean().reset_index()
            th, scores, score = tweak_thresholds(
                df_group["cancer"].values,
                df_group["pred"].values
            )
            print(f"Fold {i} score : {score:.4f}  -  th={th:.2f}")
#             break

        except FileNotFoundError:
            print(f'Fold {i} not found')
    
df = df[df['pred'] > -1].reset_index(drop=True)

In [None]:
# df_group = df[["site_id", 'machine_id', 'patient_id', 'laterality', 'pred', 'cancer', 'BIRADS', 'invasive']].groupby(["site_id", 'machine_id', 'patient_id', 'laterality']).max().reset_index()
df_group = df.groupby(["site_id", 'machine_id', 'patient_id', 'laterality']).mean().reset_index()

In [None]:
th, scores, score = tweak_thresholds(
    df_group["cancer"].values,
    df_group["pred"].values
)

In [None]:
pred_pp = (df_group['pred'].values > th).astype(int)
df_group['pred_pp'] = pred_pp

score = pfbeta(df_group["cancer"].values, pred_pp)
auc = roc_auc_score(df_group["cancer"].values, df_group["pred"].values)

print(f"-> CV pf1 : {score:.4f}  -  th={th:.2f}  -  auc {auc:.4f}")

In [None]:
from scipy.stats import percentileofscore, scoreatpercentile

pct = percentileofscore(df_group["pred"].values, th, kind='mean')
pct = np.round(pct, 3)
th_pct = scoreatpercentile(df_group["pred"].values, pct)

score = pfbeta(df_group["cancer"].values, (df_group['pred'].values > th_pct).astype(int))
print(f"-> CV pf1 : {score:.4f}  -  th={th_pct:.2f}  -  auc {auc:.4f}  -  pct={pct}")

In [None]:
plt.axvline(th, c="salmon")
plt.plot(np.round(np.arange(0, 1, 0.01), 2), scores)
plt.text(th, score + 0.001, s=f' pf1={score :.4f}', c='salmon', size=9)
plt.xlim(th - 0.2, th + 0.2)
plt.ylim(score - 0.07, score + 0.01)
plt.grid()
plt.show()

### Errors

In [None]:
fns = df[df['cancer'] == 1].sort_values('pred')
fps = df[df['cancer'] == 0].sort_values('pred', ascending=False)

n = 5
df_error = pd.concat([fps.head(n), fns.head(n)]).reset_index(drop=True)

In [None]:
transforms = get_transfos(augment=False)
dataset = BreastDataset(df_error, transforms=transforms)

In [None]:
for idx in range(len(dataset)):
    img, y, w = dataset[idx]
    plt.figure(figsize=(5, 5))
    plt.imshow(img.mean(0), cmap="gray")
    plt.axis(False)
    plt.title(f"{df_error['patient_id'][idx]} {df_error['image_id'][idx]} - y={int(y)} - pred={df_error['pred'][idx] :.5f}")
    plt.show()

### Inference

In [None]:
EXP_FOLDER = "../logs/2023-02-10/2/"   # nfl1 - 1536x1024  LR=4e-4  5ep gem CBIS CMMD       0.4703  -  th=0.19  -  auc 0.9125
EXP_FOLDER = "../logs/2023-02-10/6/"   # v2-s - 1536x1024  LR=4e-4  5ep gem CBIS            0.4771  -  th=0.27  -  auc 0.9190

# EXP_FOLDER = "../logs/2023-02-10/16/"  # v2-m - 1536x1024  LR=3e-4  5ep bs6 CBIS CMMD PASM  0.4858  -  th=0.23  -  auc 0.9152
# EXP_FOLDER = "../logs/2023-02-12/2/"   # v2-m - 1536x1024  LR=3e-4  5ep CBIS CMMD PASM pl1  0.5099  -  th=0.31  -  auc 0.9115
# EXP_FOLDER = "../logs/2023-02-12/1/"   # nfl2 - 1536x1024  LR=3e-4  5ep CBIS CMMD PASM pl1  0.5030  -  th=0.23  -  auc 0.9043

EXP_FOLDER = "../logs/2023-02-15/5/"   # v2-l - 1536x1024  LR=3e-4 5ep CBIS CMMD PASM       0.4706  -  th=0.23  -  auc 0.8664

# 1536x1024   0.5440  -  th=0.21  -  auc 0.9332  -  pct=98.485      RERUN -> TensorRT friendly
# EXP_FOLDER = "../logs/2023-02-10/6/"   # v2-s - 1536x1024  LR=4e-4  5ep gem CBIS            0.4837  -  th=0.20  -  auc 0.9186
# EXP_FOLDER = "../logs/2023-02-10/16/"  # v2-m - 1536x1024  LR=3e-4  5ep bs6 CBIS CMMD PASM  0.4988  -  th=0.32  -  auc 0.9163
# EXP_FOLDER = "../logs/2023-02-11/1/"   # nfl2 - 1536x1024  LR=4e-4  5ep bs8 CBIS            0.4881  -  th=0.21  -  auc 0.8994
# EXP_FOLDER = "../logs/2023-02-11/3/"   # nfl2 - 1536x1024  LR=3e-4  5ep bs8 CBIS CMMD PASM  0.4837  -  th=0.25  -  auc 0.9106
EXP_FOLDER = "../logs/2023-02-22/1/"

In [None]:
DEBUG = False
SAVE = True
USE_TTA = False
USE_FP16 = True
USE_TENSOR_RT = False
EXTRACT_FTS = False

In [None]:
config = Config(json.load(open(EXP_FOLDER + "config.json", "r")))

In [None]:
df = prepare_data(DATA_PATH, config.img_folder)

In [None]:
if "fold" not in df.columns:
    folds = pd.read_csv(config.folds_file)
    df = df.merge(folds, how="left", on=["patient_id", "image_id"])

In [None]:
%%time
pred_oof, pred_oof_aux = kfold_inference_val(
    df,
    EXP_FOLDER,
    config=config,
    debug=DEBUG,
    save=SAVE,
    use_tta=USE_TTA,
    use_fp16=USE_FP16,
    use_tensor_rt=USE_TENSOR_RT,
    extract_fts=EXTRACT_FTS,
)

- Fold 0 score : 0.5575  -  th=0.32
- Fold 1 score : 0.4663  -  th=0.27
- Fold 2 score : 0.5304  -  th=0.20
- Fold 3 score : 0.4343  -  th=0.24

In [None]:
if DEBUG:
    df = df[df['fold'] == 0].reset_index(drop=True).head(len(pred_oof))

df['pred'] = pred_oof
df_group = df[['patient_id', 'laterality', 'pred', 'cancer']].groupby(['patient_id', 'laterality']).mean()

In [None]:
th, scores, score = tweak_thresholds(
    df_group["cancer"].values,
    df_group["pred"].values
)

print(f"Best score : {score:.4f}")
print(f"th = {th:.2f}")

print(f'\n-> CV pf1 : {pfbeta(df_group["cancer"].values, df_group["pred"].values > th) :.4f}')
print(f'-> CV AUC : {roc_auc_score(df_group["cancer"].values, df_group["pred"].values) :.4f}')

In [None]:
plt.axvline(th, c="salmon")
plt.plot(np.round(np.arange(0, 1, 0.01), 2), scores)
plt.text(th, score + 0.001, s=f' pf1={score :.4f}', c='salmon', size=8)
plt.ylim(0.3, score + 0.025)
plt.show()

Done ! 