**About** : This notebook is used to train models.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src/

## Initialization

### Imports

In [None]:
import os
import torch

print(torch.__version__)
os.environ['CUDA_VISIBLE_DEVICES'] = "0"
device = torch.cuda.get_device_name(0)
print(device)

In [None]:
import os
import sys
import glob
import json
import torch
import operator
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm
from sklearn.metrics import *

warnings.simplefilter(action="ignore", category=UserWarning)

In [None]:
from util.logger import (
    prepare_log_folder,
    save_config,
    create_logger,
#     init_neptune,
)

from params import *
from data.dataset import *
from data.preparation import *
from util.metrics import rsna_loss
from util.plots import plot_confusion_matrix
from model_zoo.models_lvl2 import define_model
from inference.extract_features import Config as ConfigInf
from training.main_lvl2 import k_fold, retrieve_preds

## Data

In [None]:
df_patient, df_img = prepare_data(DATA_PATH)

In [None]:
EXP_FOLDERS = [
    ("../logs/2023-09-20/14/", "seg"),
    ("../logs/2023-09-20/36/", "probas_2d"),  # convnext-tiny best
    ("../logs/2023-10-02/41/", "crop"),
#     ("../logs/2023-09-30/20/", "crop"),
]
EXP_FOLDER = EXP_FOLDERS[0][0]

In [None]:
config = ConfigInf(json.load(open(EXP_FOLDER + "config.json", "r")))

In [None]:
if "fold" not in df_patient.columns:
    folds = pd.read_csv(config.folds_file)
    df_img = df_img.merge(folds)
    df_patient = df_patient.merge(folds)

In [None]:
dataset = PatientFeatureDataset(
    df_patient[df_patient['fold'] == 0],
    df_img[df_img['fold'] == 0],
    EXP_FOLDERS,
#     max_len=600,
#     resize=200
)

In [None]:
# for i, k in enumerate(dataset.fts.keys()):
#     if k[0] == 10004:
#         print(i, k)
        
# ft, y, y_aux = dataset[178]
# ft.mean(0)

In [None]:
# lens = []
# for i in tqdm(range(len(dataset))):
#     x = dataset[i][0]
#     lens.append(len(x))
# #     break

# sns.histplot(lens)
# plt.show()

In [None]:
# lens = []
# for i in tqdm(range(len(dataset))):
#     fts = dataset[i][0]
#     x = fts.numpy()
#     lens.append(len(x))
    
# #     start, end = detect_start_end(x)
    
#     if len(x) > 1000:
# #         plt.subplot(1, 2, 1)
#         plt.plot(x[:, :5])
# #         plt.axvline(start, c="salmon")
# #         plt.axvline(end, c="salmon")
        
# #         plt.subplot(1, 2, 2)
# #         plt.plot(kept)
#         plt.show()
    
#     break

In [None]:
# sns.histplot(lens)

## Model

In [None]:
x = dataset[0][0]

In [None]:
model = define_model("rnn_att", ft_dim=x['x'].shape[-1], layer_dim=512, n_layers=1, dense_dim=256, num_classes=11, num_classes_aux=0, n_fts=x['ft'].shape[-1] * x['ft'].shape[-2])

In [None]:
model(x['x'].unsqueeze(0), x['ft'].unsqueeze(0))

## Training
- Handle variable sequence length more cleverly
- Tweak CNN
- Tweak fancier archs

In [None]:
class Config:
    """
    Parameters used for training
    """
    # General
    seed = 42
    verbose = 1
    device = "cuda"
    save_weights = True

    # Data
    exp_folders = [
#         ("../logs/2023-09-06/4/", "seg"),  # v2-s
#         ("../logs/2023-09-19/10/", "seg"),  # v2-rw-t stride+
        ("../logs/2023-09-20/14/", "seg"),  # v2-rw-t stride+ 384
#         ("../logs/2023-09-25/26/", "seg"),  # v2-rw-t stride+ 384 4 classes
#         ("../logs/2023-09-24/20/", "seg3d"),  # resnet18d 3D
        
#         ("../logs/2023-09-22/9/", "probas"),  # v2m
#         ("../logs/2023-09-21/32/", "probas"),  # v2m
#         ("../logs/2023-09-22/24/", "probas"),  # convnext-nano
#         ("../logs/2023-09-25/22/", "probas"),  # convnext-tiny 
#         ("../logs/2023-09-25/15/", "probas"),  # convnext-tiny 
#         ("../logs/2023-09-26/8/", "probas_3d"),  # convnext-tiny rnn 3fs5
        ("../logs/2023-09-27/20/", "probas_3d"),  # convnext-tiny rnn 3fs3
#         ("../logs/2023-09-26/32/", "probas_3d"),  # convnext-tiny rnn 2.5D
        ("../logs/2023-09-26/39/", "probas_3d"),  # convnext-tiny rnn 2.5D 5fs5
#         ("../logs/2023-09-27/19/", "probas_3d"),  # convnext-nano rnn 2.5D 5fs5

#         ("../logs/2023-09-25/37/", "probas_2d"),  # convnext-tiny bs8
        ("../logs/2023-09-20/36/", "probas_2d"),  # convnext-tiny best 
#         ("../output/confs_rsna_v1_fold*_2", "yolox"),
    ]

    restrict = True
    max_len = 600 if restrict else 1000
    resize = 200
    n_fts = 0  # already pooled features, not supported yet

    # k-fold
    k = 4
    folds_file = f"../input/folds_{k}.csv"
    selected_folds = [0, 1, 2, 3]

    # Model
#     name = "rnn"
    name = "transfo"
    ft_dim = (11 + 11) * len([p for p in exp_folders if "probas" in p[1]]) + 5 # + 4

    dense_dim = 256  # 384
    layer_dim = 128
    n_layers = 1

    p = 0.1
    use_msd = False
    num_classes = 11
    num_classes_aux = 0

    # Training    
    loss_config = {
        "name": "patient",
        "weighted": True,
        "use_any": True,
        "smoothing": 0,
        "activation": "patient",
        "aux_loss_weight": 0,
        "name_aux": "patient",
        "smoothing_aux": 0,
        "activation_aux": "",
    }

    data_config = {
        "batch_size": 64,
        "val_bs": 256,
        "mix": "mixup",
        "mix_proba": 0.,
        "sched": False,
        "mix_alpha": 4.,
        "additive_mix": False,
        "num_classes": num_classes,
        "num_workers": 8,
    }

    optimizer_config = {
        "name": "AdamW",
        "lr": 5e-4,  # 7e-4, 9e-4
        "warmup_prop": 0.,
        "betas": (0.9, 0.999),
        "max_grad_norm": 10.,
        "weight_decay": 0.2,
    }

    epochs = 8

    use_fp16 = True
    verbose = 1
    verbose_eval = 50

    fullfit = False
    n_fullfit = 1

    local_rank = 0
    distributed = False
    world_size = 1

In [None]:
class Config:
    """
    Parameters used for training
    """
    # General
    seed = 42
    verbose = 1
    device = "cuda"
    save_weights = True

    # Data
    exp_folders = [
        ("../logs/2023-09-20/14/", "seg"),  # v2-rw-t stride+ 384
#         ("../logs/2023-09-24/20/", "seg3d"),  # resnet18d 3D

        ("../logs/2023-09-20/36_r/", "probas_2d"),  # 0.358 - convnext-tiny best  <-
        ("../logs/2023-09-27/20_r/", "probas_3d"),  # 0.349 with 36_r - convnext-tiny rnn 3fs3  <-
#         ("../logs/2023-09-26/39_r/", "probas_3d"),  # convnext-tiny rnn 2.5D 5fs5  <- REMOVE ?
        
#         ("../logs/2023-09-25/39/", "probas_2d"),  # 0.362 convnext-tiny bs8

#         ("../logs/2023-10-02/66/", "probas_2d"),  # coat_lite_medium_384  
#         ("../logs/2023-10-02/66/", "probas_2d"),  # coat_lite_medium_384  3s3f
#         ("../logs/2023-10-03/4/", "probas_2d"),  # coat_lite_medium_384  bs+

#         ("../logs/2023-10-03/21/",  "probas_2d"),  # convnext-tiny crop
#         ("../logs/2023-10-03/33/",  "probas_2d"),  # convnext-tiny rerepro
#         ("../logs/2023-10-04/19/", "probas_2d"),  # 0.359 convnext-tiny repro new - -0.001 resize longest / +0.002 crop 
#         ("../logs/2023-10-04/20/", "probas_2d"),  # convnext-tiny 512 new
#         ("../logs/2023-10-04/31/", "probas_2d"),  # resize 512
#         ("../logs/2023-10-04/29/", "probas_2d"),  # crop 448
        
        ("../logs/2023-10-05/13/", "probas_2d"),  # 0.353 - maxvit_tiny_tf_384   (+36_r - 0.347 / +20_ 0.342) <- 
#         ("../logs/2023-10-05/14/", "probas_2d")  # 0.414 coatnet_1_rw_224
#         ("../logs/2023-10-05/15/", "probas_2d")  # 0.386 maxxvitv2_nano_rw_256
#         ("../logs/2023-10-05/23/", "probas_2d")  # 0.384 maxxvitv2_rmlp_base_rw_384

#         ("../output/confs_rsna_v1_fold*_2", "yolox"),
        ### CROP MODELS with 3x convnext
        # Convnext
#         ("../logs/2023-09-30/46/", "crop"),  # -> 0.332 +
#         ("../logs/2023-10-03/1/", "crop"),   # -> 0.332 +
        
        # CoAt
#         ("../logs/2023-10-02/11/", "crop"),  # -> 0.328  <- 
#         ("../logs/2023-10-02/36/", "crop"),  # -> 0.329  <- 
#         ("../logs/2023-10-02/41/", "crop"),  # -> 0.327  <- 
#         ("../logs/2023-10-02/51/", "crop"),  # -> 0.329  <- 
#         ("../logs/2023-10-02/70/", "crop"),  # -> 0.328 5x5 (<-)
        
#         ("../logs/2023-10-05/6/", "crop"),  # -> 0.325
#         ("../logs/2023-10-05/8/", "crop"),  # -> 0.328

        
        # Scores with 2x Convnext & maxvit
        ("../logs/2023-10-02/11/", "crop"),  # -> 0.326
        ("../logs/2023-10-02/36/", "crop"),   # -> 0.326
        ("../logs/2023-10-02/41/", "crop"),  # -> 0.325
#         ("../logs/2023-10-02/51/", "crop"),  # -> 0.327
#         ("../logs/2023-10-02/70/", "crop")  # -> 0.326
        ("../logs/2023-10-05/6/", "crop"),  # -> 0.322
        ("../logs/2023-10-05/8/", "crop"),  # -> 0.326
        ("../logs/2023-10-05/20/", "crop"),  # -> 0.325
        ("../logs/2023-10-05/21/", "crop"),  # -> 0.325
    ]

    restrict = True
    max_len = 600
    resize = 200
    n_fts = 9 * len([p for p in exp_folders if "crop" in p[1]])

    # k-fold
    k = 4
    folds_file = f"../input/folds_{k}.csv"
    selected_folds = [0, 1, 2, 3]

    # Model
#     name = "rnn"
    name = "rnn_att"
    ft_dim = (11 + 11) * len([p for p in exp_folders if "probas" in p[1]]) + 5 # + 4      # 11 + 8 ??
    if any(["yolox" in mode for _, mode in exp_folders]):
        ft_dim += 1

    dense_dim = 384  # 384
    layer_dim = 256
    n_layers = 1

    p = 0.
    use_msd = False
    num_classes = 11
    num_classes_aux = 0

    # Training    
    loss_config = {
        "name": "patient",
        "weighted": True,
        "use_any": True,
        "smoothing": 0,
        "activation": "patient",
        "aux_loss_weight": 0,
        "name_aux": "patient",
        "smoothing_aux": 0,
        "activation_aux": "",
    }

    data_config = {
        "batch_size": 64,
        "val_bs": 256,
        "mix": "mixup",
        "mix_proba": 0.,
        "sched": False,
        "mix_alpha": 4.,
        "additive_mix": False,
        "num_classes": num_classes,
        "num_workers": 8,
    }

    optimizer_config = {
        "name": "AdamW",
        "lr": 4e-4,  # 4e-4, 5e-4
        "warmup_prop": 0.,
        "betas": (0.9, 0.999),
        "max_grad_norm": 10.,
        "weight_decay": 0.2,
    }

    epochs = 10

    use_fp16 = True
    verbose = 1
    verbose_eval = 50

    fullfit = False
    n_fullfit = 1

    local_rank = 0
    distributed = False
    world_size = 1

In [None]:
DEBUG = False
log_folder = None

In [None]:
# if not DEBUG:
#     log_folder = prepare_log_folder(LOG_PATH)
#     print(f"Logging results to {log_folder}")
#     config_df = save_config(Config, log_folder + "config.json")
#     create_logger(directory=log_folder, name="logs.txt")

# preds, preds_aux = k_fold(Config, df_patient, df_img, log_folder=log_folder, run=None)

### Eval

In [None]:
# !cat ../logs/2023-10-02/26/config.json
# !cat ../logs/2023-10-02/26/logs.txt

In [None]:
EXP_FOLDERS = [
#     "../logs/2023-09-21/27/",  # 0.358 - rnn_att - convnext-tiny best 384 x seg 384       <--- best 2D
#     "../logs/2023-09-25/28/",  # 0.361 - transfo - convnext-tiny best 384 x seg 384       <--- best 2D + transfo
#     "../logs/2023-09-25/17/",  # 0.357 - rnn_att  - convnext-tiny best 384 x seg 384 + 3d  <--- best 2D + 3D

#     # 3 frames + best 2D
#     "../logs/2023-09-26/36/",  # 0.352 - rnn_att - convnext-tiny best 2D & RNN 3 frames   ---       8
#     "../logs/2023-09-26/37/",  # 0.351 - rnn_att - convnext-tiny best 2D & RNN 3 frames   ---       8
#     "../logs/2023-09-26/38/",  # 0.357 - transfo - convnext-tiny best 2D & RNN 3 frames   ---       8
#     "../logs/2023-09-27/16/",  # 0.351 - rnn_att - convnext-tiny best 2D & RNN 3 frames   ---       8
#     "../logs/2023-09-27/2/",   # 0.352 - rnn_att - convnext-tiny best 2D & RNN 3 frames   ---       32
    
#     # 5 frames + best 2D
#     "../logs/2023-09-27/11/",   # 0.350 - rnn_att - convnext-tiny best 2D & RNN 5 frames   ---      39
#     "../logs/2023-09-27/13/",   # 0.351 - rnn_att - convnext-tiny best 2D & RNN 5 frames   ---      40
    
#     # 5 frames + 3 frames + best 2D
#     "../logs/2023-09-27/10/",   # 0.346 - rnn_att - convnext-tiny best 2D & RNN 3 & 5 frames    ---     8 39
#     "../logs/2023-09-27/17/",   # 0.348 - rnn_att - convnext-tiny best 2D & RNN 3 & 5 frames    ---     8 40
#     "../logs/2023-09-27/14/",   # 0.348 - rnn_att - convnext-tiny best 2D & RNN 3 & 5 frames    ---     32 40
#     "../logs/2023-09-27/15/",   # 0.347 - rnn_att - convnext-tiny best 2D & RNN 3 & 5 frames    ---     32 39
 
#     "../logs/2023-09-27/28/",   # 0.346 - rnn_att - convnext-tiny best 2D & RNN 3 & 5 frames    ---     20 39
#     "../logs/2023-09-27/29/",   # 0.346 - rnn_att - convnext-tiny best 2D & RNN 3 3 & 5 frames  ---   8 20 39
#     "../logs/2023-10-02/27/",     # 0.344 - rnn_att p - convnext-tiny best 2D & RNN 3 & 5 frames  ---     20 39     <---   SUBMIT
        
#     "../logs/2023-10-02/60/",
    
#     "../logs/2023-09-28/15/",     # 0.359 - rnn_att p - convnext-tiny best 2D   ---
        
#     "../logs/2023-09-27/31/",   # 0.344 - rnn_att - convnext-tiny best 2D & RNN 3 & 5 frames & yolox   0     20 39 yoloX      0.343 r
    

#     "../logs/2023-09-30/31/",   # 0.334 - rnn_att - convnext-tiny best 2D & RNN 3 & 5 frames & crop   20 39
    
#     "../logs/2023-10-01/28/",   # 0.329 - rnn_att - convnext-tiny best 2D & RNN 3 & 5 frames & crop   20 39 - 20 46 0
#     "../logs/2023-10-01/31/",   # 0.329 - rnn_att - convnext-tiny best 2D & RNN 3 & 5 frames & crop   20 39 - 20 46 0
#     "../logs/2023-10-01/39/",   # 0.329 - rnn_att - convnext-tiny best 2D & RNN 3 & 5 frames & crop   20 39 - 20 46 50 0
#     "../logs/2023-10-01/41/"   # 0.329 - rnn_att - convnext-tiny best 2D & RNN 3 & 5 frames & crop   20 39 - 20 29 46 50 0
    
#     "../logs/2023-10-02/26/",   # 0.326 - rnn_att - convnext-tiny best 2D & RNN 3 & 5 frames & crop   20 39 - 46 11
#     "../logs/2023-10-02/25/",   # 0.325 - rnn_att - convnext-tiny best 2D & RNN 3 & 5 frames & crop   20 39 - 46 11 41
#     "../logs/2023-10-02/25/",   # 0.324 - rnn_att - convnext-tiny best 2D & RNN 3 & 5 frames & crop   20 39 - 46 11 41 51 
    
#     "../logs/2023-10-02/63/",   # 0.326 - rnn_att - convnext-tiny best 2D & RNN 3 & crop   20 - 46 11 36 41 51
#     "../logs/2023-10-02/64/",   # 0.325 - rnn_att - convnext-tiny best 2D & RNN 3 & crop   20 - 11 36 41 51
#     "../logs/2023-10-02/68/",   # 0.327 - rnn_att - convnext-tiny best 2D & RNN 3 & crop   20 - 11 41
#     "../logs/2023-10-03/13/",   # 0.325 - rnn_att - convnext-tiny best 2D & RNN 3 & crop   20 - 11 36 41 51 70

#     "../logs/2023-10-04/1/"
#     "../logs/2023-10-03/22/"  # 0.331 - 20 36 - 41                              <---  LB 0.38
#     "../logs/2023-10-05/11/",   # 0.324 - rnn_att - 36_r 20_r - 11 36 41 51      
#     "../logs/2023-10-05/19/",  # 0.322  -  rnn_att - 36_r 20_r - 11 36 41 51 6
#     "../logs/2023-10-05/19/",  # 0.321  -  rnn_att - 36_r 20_r - 11 36 41 51 6 8
#     "../logs/2023-10-05/49/",  # 0.319 - rnn_att - 36_r 20_r 13 - 41 6 20 21
#     "../logs/2023-10-05/53/",  # 0.321 - rnn_att - 36_r 20_r  - 41 6 20 21
    "../logs/2023-10-05/54/",  # 0.316 - rnn_att - 36_r 20_r 13 - 11 36 41 6 8 20 21    <---   BEST
#     "../logs/2023-10-05/55/",  # 0.318 - rnn_att - 36_r 13 - 11 36 41 6 8 20 21
#     "../logs/2023-10-05/56/",  # 0.316 - rnn_att - 36_r 20_r 13 - 11 36 41 51 70 6 8 20 21
]

In [None]:
preds_oof = []
for exp_folder in EXP_FOLDERS:
    cfg = ConfigInf(json.load(open(exp_folder + "config.json", "r")))
    print(
        exp_folder, " --> ",
        "proba :",
        " - ".join(e[8:] for e, m in cfg.exp_folders if "proba" in m),
        "\t crop : ",
        " - ".join(e[8:] for e, m in cfg.exp_folders if "crop" in m),
    )

    df_oof, pred_oof = retrieve_preds(
        df_patient,
        df_img,
        cfg,
        exp_folder,
        custom_agg=False,
#         folds=[0]
    )
    preds_oof.append(pred_oof)
#     df_oof.to_csv(exp_folder + 'df_oof.csv', index=False)

pred_oof = np.mean(preds_oof, 0)

In [None]:
losses, avg_loss = rsna_loss(pred_oof, df_oof)

for k, v in losses.items():
    print(f"- {k.split('_')[0][:8]} loss\t: {v:.3f}")

print(f'\n -> CV Score : {avg_loss :.3f}')

In [None]:
# # Check kaggle score

# df_oof = df_oof.sort_values('patient_id').head(200)
# pred_oof = df_oof[df_oof.columns[-11:]].values

# losses, avg_loss = rsna_loss(pred_oof, df_oof)

# for k, v in losses.items():
#     print(f"- {k.split('_')[0][:8]} loss\t: {v:.3f}")

# print(f'\n -> CV Score : {avg_loss :.4f}')

In [None]:
# from inference.lvl2 import to_sub_format

# preds = pred_oof.copy()
# df = df_oof.copy()

# for i in range(preds.shape[1]):
#     df[f'pred_{i}'] = preds[:, i]

# # dfg = df.drop(['series', 'path', 'frame', 'patient_id'], axis=1).groupby('patient').mean().reset_index()
# sub = to_sub_format(df.rename(columns={"patient_id": "patient"}))
# sub.to_csv(f'../output/submission_{int(avg_loss * 1000)}.csv', index=False)

# print('-> Saved to', f'../output/submission_{int(avg_loss * 1000)}.csv')

# sub.head()

In [None]:
# plt.figure(figsize=(22, 4))

# plt.subplot(1, 5, 1)
# plot_confusion_matrix(pred_oof[:, 0] > 0.5, df_oof[PATIENT_TARGETS[0]], display_labels=["ok", "injury"], normalize=None, show_label=True)
# plt.title(PATIENT_TARGETS[0])

# plt.subplot(1, 5, 2)
# plot_confusion_matrix(pred_oof[:, 1] > 0.5, df_oof[PATIENT_TARGETS[1]], display_labels=["ok", "injury"], normalize=None)
# plt.title(PATIENT_TARGETS[1])

# plt.subplot(1, 5, 3)
# plot_confusion_matrix(pred_oof[:, 2:5].argmax(-1), df_oof[PATIENT_TARGETS[2]], display_labels=["ok", "low", "high"], normalize=None)
# plt.title(PATIENT_TARGETS[2])

# plt.subplot(1, 5, 4)
# plot_confusion_matrix(pred_oof[:, 5:8].argmax(-1), df_oof[PATIENT_TARGETS[3]], display_labels=["ok", "low", "high"], normalize=None)
# plt.title(PATIENT_TARGETS[3])

# plt.subplot(1, 5, 5)
# plot_confusion_matrix(pred_oof[:, 9:].argmax(-1), df_oof[PATIENT_TARGETS[4]], display_labels=["ok", "low", "high"], normalize=None)
# plt.title(PATIENT_TARGETS[4])

# plt.show()

### Crop models

In [None]:
# from data.preparation import get_df_series

# dfs = []
# for i in tqdm(range(4)):
#     preds = np.load(f'../logs/2023-09-30/20/pred_val_{i}.npy')
#     df_series = get_df_series(df_patient[df_patient['fold'] == i], df_img[df_img['fold'] == i])
    
#     cs = ['pred_healthy', 'pred_low', 'pred_high']
#     for i, c in enumerate(cs):
#         df_series[c] = preds[:, i]
    
#     df_series = df_series.groupby(["patient_id", "series"]).agg(list).reset_index()
    
#     i = 2
#     for idx, c in enumerate(["kidney", "liver", "spleen"]):
#         for p in ['pred_healthy', 'pred_low', 'pred_high']:
#             df_series[f"pred_{i}"] = np.array(df_series[p].values.tolist())[:, idx]
#             i += 1

#     df_series = df_series[["patient_id"] + [f"pred_{i}" for i in range(2, 11)]]
#     df = df_series.groupby("patient_id").mean().reset_index()
#     dfs.append(df)
    
# df_crop = pd.concat(dfs, ignore_index=True)

In [None]:
# df_oof_c = df_oof.merge(df_crop, on="patient_id")

# pred_oof_1 = df_oof_c[["pred_0", "pred_1"] + [f"pred_{i}_x" for i in range(2, 11)]].values
# pred_oof_2 = df_oof_c[["pred_0", "pred_1"] + [f"pred_{i}_y" for i in range(2, 11)]].values

In [None]:
# for i, c in enumerate(['bowel_injury', 'extravasation_injury', 'kidney_healthy', 'kidney_low', 'kidney_high', 'liver_healthy', 'liver_low', 'liver_high', 'spleen_healthy', 'spleen_low', 'spleen_high']):
#     if i > 1:
#         auc_1 = roc_auc_score(df_oof[c], pred_oof_1[:, i])
#         auc_2 = roc_auc_score(df_oof[c], pred_oof_2[:, i])
#         print(f"{c[:10]} AUC:\t {auc_1 :.3f} - {auc_2 :.3f}  ({'+' if auc_2 > auc_1 else ''}{auc_2 - auc_1 :.3f})")

### Rescaling

In [None]:
rescaled_preds = []
df_oofs = []

print("Exp folders :\t", "\t".join(EXP_FOLDERS))

for fold in [0, 1, 2, 3, "fullfit"]:
    print(f'\n ->  Fold {fold}')
    train_folds = [f for f in range(4) if f != fold] 
    
    preds_oof = []
    for exp_folder in EXP_FOLDERS:
        df_oof, pred_oof = retrieve_preds(df_patient, df_img, Config, exp_folder, folds=train_folds)
        preds_oof.append(pred_oof)
    pred_oof = np.mean(preds_oof, 0)

    losses, avg_loss = rsna_loss(pred_oof, df_oof)
    print(f'\n- Train Score : {avg_loss :.3f}')
    
    print(f'\n- Optimize coeffs on folds {train_folds}:\n')
    pred_oof_ = pred_oof.copy()
    best_score = avg_loss
    factors = {i: [1, 0] for i in range(pred_oof.shape[1])}
    for _ in range(2):

        for i in range(pred_oof.shape[1]):
            scores = {}
            for factor in np.round(np.arange(0.5, 1.5, 0.1), 2):
                for shift in np.round(np.arange(-0.1, 0.11, 0.1), 2):
    #             for shift in [-0.1, 0, 0.1]:
                    pred_oof_r = pred_oof_.copy()
                    pred_oof_r[:, i] = pred_oof_r[:, i] * factor + shift
                    pred_oof_r[:, i] = np.clip(pred_oof_r[:, i], 0.00001, 0.99999)

                    losses, avg_loss = rsna_loss(pred_oof_r, df_oof)
                    scores[(factor, shift)] = avg_loss

        #     print(scores)
            best_coefs, best_loss = min(scores.items(), key=operator.itemgetter(1))
            delta = best_score - best_loss 
            if delta > 0.0001:
                pred_oof_[:, i] = np.clip(pred_oof_[:, i] * best_coefs[0] + best_coefs[1], 0.00001, 0.99999)
                best_score = best_loss
                print(f'{i} - {best_coefs}  -  {best_loss :.3f}  (-{delta:.4f})')
                factors[i][0] = np.round(best_coefs[0] * factors[i][0], 2)
                factors[i][1] += best_coefs[1]
                
    if fold != "fullfit":
        # Validate
        preds_oof = []
        for exp_folder in EXP_FOLDERS:
            df_oof, pred_oof = retrieve_preds(df_patient, df_img, Config, exp_folder, folds=[fold])
            preds_oof.append(pred_oof)

        pred_oof = np.mean(preds_oof, 0)

        losses, avg_loss = rsna_loss(pred_oof, df_oof)
        print(f'\n- Val Score : {avg_loss :.3f}')

        pred_oof_ = pred_oof.copy()
        for i in range(pred_oof.shape[1]):
            pred_oof_[:, i] *= factors[i][0]
            pred_oof_[:, i] += factors[i][1]
            pred_oof_[:, i] = np.clip(pred_oof_[:, i], 0.00001, 0.99999)

        losses, avg_loss = rsna_loss(pred_oof_, df_oof)
        print(f'- Rescaled val Score : {avg_loss :.3f}')

        rescaled_preds.append(pred_oof_)
        df_oofs.append(df_oof)

#     break

In [None]:
# losses, avg_loss = rsna_loss(np.concatenate(rescaled_preds), pd.concat(df_oofs))

# for k, v in losses.items():
#     print(f"- {k.split('_')[0][:8]} loss\t: {v:.3f}")
    
# print(f'\n -> CV Score : {avg_loss :.3f}')

In [None]:
# from inference.lvl2 import to_sub_format

# preds = np.concatenate(rescaled_preds).astype(np.float64)
# df = pd.concat(df_oofs)

# for i in range(preds.shape[1]):
#     df[f'pred_{i}'] = preds[:, i]

# # dfg = df.drop(['series', 'path', 'frame', 'patient_id'], axis=1).groupby('patient').mean().reset_index()
# sub = to_sub_format(df.rename(columns={"patient_id": "patient"}))
# sub.to_csv(f'../output/submission_{int(avg_loss * 1000)}.csv', index=False)

# print('-> Saved to', f'../output/submission_{int(avg_loss * 1000)}.csv')

# sub.head()

Done ! 