**About** : This notebook is used to train RNN models.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src/

### Imports

In [None]:
import os
import sys
import glob
import json
import torch
import operator
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm
from sklearn.metrics import *
from collections import Counter
from scipy.stats import spearmanr

warnings.simplefilter(action="ignore", category=UserWarning)
warnings.simplefilter("ignore", FutureWarning)

In [None]:
from util.logger import (
    prepare_log_folder,
    save_config,
    create_logger,
)

from data.dataset import FeatureDataset
from params import *
from data.preparation import *
from util.logger import Config as ConfigInf
from training.main_lvl2 import k_fold
from util.metrics import *

In [None]:
from model_zoo.models_lvl2 import define_model
from training.losses import StudyLoss
from util.metrics import rsna_loss

### Data

In [None]:
df = prepare_data_lvl2()

if "fold" not in df.columns:
    folds = pd.read_csv("../input/train_folded_v1.csv")
    df = df.merge(folds, how="left")

In [None]:
# df = df[df['fold'] == 0].reset_index(drop=True)

In [None]:
exp_folders = {
        # "scs_crop_coords": "../logs/2024-09-12/1/",  # 5f -0.005 scs
        # "scs_crop_coords_2": "../logs/2024-10-02/2/",  # 3f -0.005 scs
        
        "scs_crop_coords": "../logs/2024-10-04/34/",  # 5f -0.005 scs
        "scs_crop_coords_2": "../logs/2024-10-04/37/",  # 3f

        # "dh": '../output/oof____cfg_dh_15c.pth',  # Darragh preds
        "dh": '../output/oof____cfg_dh_12y8.pth',  # Darragh preds
        "dh_2": "../output/oof____cfg_dh_29a2.pth",  # Darragh preds
        "dh_3": "../output/oof____cfg_dh_29g_seed1.pth",  # Darragh preds ax scs
        # "dh_4": "../output/oof____cfg_dh_29k_seed0.pth",  # Darragh preds ax nfn
        "ch": '../output/oof_cfg_ch_35.pth',  # Dieter preds all

        # "crop_2": "../logs/2024-09-13/7/",  # coatnet side
        "crop_2": "../logs/2024-10-04/9/",   # coatnet frameflip  - 0.3842 tta / 0.3843

        # "crop": "../logs/2024-09-14/5/",   # coatnet side  - 0.3848
        "crop": "../logs/2024-10-04/1/",   # coatnet side fix  - 0.3836 <---- best
        # "crop": "../logs/2024-10-04/31/",  # coatnet side fix frameflip - 0.3843
        
        # "crop": "../logs/2024-10-04/10/",   # coatnet vflip  - 0.3846 tta / 0.3849
        # "crop_ax": "../logs/2024-09-30/6/", 
        # "crop_2": "../logs/2024-09-30/1/", 
    }

In [None]:
config = ConfigInf(json.load(open(exp_folders["crop"] + "config.json", "r")))

In [None]:
df.head(1)

In [None]:
dataset = FeatureDataset(df, exp_folders, targets=CLASSES)

In [None]:
for i in tqdm(range(len(dataset))):
    fts, y, _ = dataset[i]
    # for k in fts:
    #     print(k, fts[k].size())
    break

In [None]:
for k in fts:
    print(k, fts[k].size())

In [None]:
# plt.figure(figsize=(20, 5))
# plt.subplot(1, 3, 1)
# for i in range(5):
#     plt.plot(fts['ss_aux'].softmax(1)[:, i], label=LEVELS[i])
# plt.legend()

# plt.subplot(1, 3, 2)
# for i in [1, 2]:
#     plt.plot(fts['ss'][:, i], label=f'left_{i}')
# for i in [4, 5]:
#     plt.plot(fts['ss'][:, i], label=f'right_{i - 3}')
# plt.legend()

# # plt.subplot(1, 3, 3)
# # for i in range(5):
# #     plt.plot(fts['ss'][:, 1, i], label=LEVELS[i])
# # plt.legend()

# plt.show()

In [None]:
# for k in fts:
#     print(k, fts[k].shape)

### Model

In [None]:
model = define_model(
    name="simple",
    num_classes=len(CLASSES) * 3,
    ft_dim=[18, 12, 12],
)

In [None]:
x = {k: fts[k].unsqueeze(0) for k in fts}

In [None]:
pred, _ = model(x)
pred.size()

In [None]:
# l = StudyLoss()
# l(pred, y.unsqueeze(0))

In [None]:
# rsna_loss(y.unsqueeze(0).numpy(), pred.softmax(2).detach().numpy())

### Training


In [None]:
class Config:
    """
    Parameters used for training
    """
    # General
    seed = 42
    verbose = 1
    device = "cuda"
    save_weights = True
    targets = CLASSES

    # Data
    exp_folders = {
        # "scs_crop_coords": "../logs/2024-09-12/1/",  # 5f -0.005 scs
        # "scs_crop_coords_2": "../logs/2024-10-02/2/",  # 3f -0.005 scs
        
        "scs_crop_coords": "../logs/2024-10-04/34/",  # 5f -0.005 scs
        "scs_crop_coords_2": "../logs/2024-10-04/37/",  # 3f

        # "dh": '../output/oof____cfg_dh_15c.pth',  # Darragh preds
        "dh": '../output/oof____cfg_dh_12y8.pth',  # Darragh preds
        "dh_2": "../output/oof____cfg_dh_29a2.pth",  # Darragh preds
        "dh_3": "../output/oof____cfg_dh_29g_seed1.pth",  # Darragh preds ax scs
        # "dh_4": "../output/oof____cfg_dh_29k_seed0.pth",  # Darragh preds ax nfn
        "ch": '../output/oof_cfg_ch_35.pth',  # Dieter preds all

        # "crop_2": "../logs/2024-09-13/7/",  # coatnet side
        "crop_2": "../logs/2024-10-04/9/",   # coatnet frameflip  - 0.3842 tta / 0.3843

        # "crop": "../logs/2024-09-14/5/",   # coatnet side  - 0.3848
        "crop": "../logs/2024-10-04/1/",   # coatnet side fix  - 0.3836 <---- best
        # "crop": "../logs/2024-10-04/31/",  # coatnet side fix frameflip - 0.3843
        
        # "crop": "../logs/2024-10-04/10/",   # coatnet vflip  - 0.3846 tta / 0.3849
        # "crop_ax": "../logs/2024-09-30/6/", 
        # "crop_2": "../logs/2024-09-30/1/", 
    }
    n_fts = 0
    resize = 0
    remove_noisy = False

    # k-fold
    k = 4
    folds_file = "../input/train_folded_v1.csv"  # f"../input/folds_{k}.csv"
    selected_folds = [0, 1, 2, 3]

    # Model
    name = "simple"
    dense_dim = 4096
    layer_dim = 0
    ft = 6 * ("crop" in exp_folders) + 3 * ("dh" in exp_folders) + 3 * ("ch" in exp_folders) # + 3
    ft_dim = [
        ft + 3 * len([k for k in exp_folders if "scs" in k]),
        ft + 3 * len([k for k in exp_folders if "nfn" in k]),
        ft + 3 * len([k for k in exp_folders if "ss" in k]),
    ]  # scs, nfn, ss

    p = 0.
    num_classes = len(CLASSES) * 3
    num_classes_aux = 0

    # Training    
    loss_config = {
        "name": "study",
        "weighted": True,
        "use_any": True,
        "smoothing": 0,
        "activation": "study",
        "aux_loss_weight": 0,
        "name_aux": "",
        "smoothing_aux": 0,
        "activation_aux": "",
    }

    data_config = {
        "batch_size": 128,
        "val_bs": 512,
        "mix": "mixup",
        "mix_proba": 0.,
        "sched": False,
        "mix_alpha": 4.,
        "additive_mix": False,
        "num_classes": num_classes,
        "num_classes_aux": num_classes_aux,
        "num_workers": 8,
    }

    optimizer_config = {
        "name": "AdamW",
        "lr": 7e-5,  # 5e-5
        "warmup_prop": 0.,
        "betas": (0.9, 0.999),
        "max_grad_norm": 1.,
        "weight_decay": 1,
    }

    epochs = 15

    use_fp16 = True
    verbose = 1
    verbose_eval = 20

    fullfit = False
    n_fullfit = 1

    local_rank = 0
    distributed = False
    world_size = 1

In [None]:
DEBUG = True
log_folder = None

In [None]:
df = prepare_data_lvl2()

if "fold" not in df.columns:
    folds = pd.read_csv(Config.folds_file)
    df = df.merge(folds, how="left")

# df = df[~df['study_id'].isin([1215498865, 1647904243, 2570933394, 2761048584, 3284652867, 3941522676])].reset_index(drop=True)
# df['any_severe'] = (df[df.columns[3:8]] == 2).max(1)
# df = pd.concat([df, df[df['any_severe']]], ignore_index=True)

In [None]:
if not DEBUG:
    log_folder = prepare_log_folder(LOG_PATH)
    print(f"Logging results to {log_folder}")
    config_df = save_config(Config, log_folder + "config.json")
    create_logger(directory=log_folder, name="logs.txt")

    Config.fullfit = True

preds = k_fold(Config, df, log_folder=log_folder, run=None)

### Eval

In [None]:
preds = np.load('../logs/2024-10-07/0/pred_oof.npy')
# preds_2 = np.load('../logs/2024-10-08/1/pred_oof.npy')
# preds = (preds + preds_2) / 2

In [None]:
avg_loss, losses = rsna_loss(df[Config.targets].values, preds, verbose=1)

for k, v in losses.items():
    print(f"- {k}_loss\t: {v:.3f}")

print(f'\n -> CV Score : {avg_loss :.4f}')

In [None]:
preds_2 = np.load('../logs/2024-10-07/0/pred_oof.npy')

avg_loss, losses = rsna_loss(df[Config.targets].values, preds_2, verbose=1)

for k, v in losses.items():
    print(f"- {k}_loss\t: {v:.3f}")

print(f'\n -> CV Score : {avg_loss :.4f}')

In [None]:
df.head()

In [None]:
# for i in range(5):
#     plt.figure(figsize=(14, 5))
#     for tgt in [1, 2]:
#         plt.subplot(1, 2, tgt)
#         sns.histplot(np.round(preds[:, i, tgt], 1))
#         plt.title(f'{LEVELS[i]} - {tgt} - {(df[Config.targets].values[:, i] == tgt).mean() :.3f}')
#         plt.xlim(-0.01, 1.01)
#     plt.show()

In [None]:
# df['any_severe'] = (df[df.columns[3:8]] == 2).max(1)
# ids = df[df['any_severe']].index.values

In [None]:
# avg_loss, losses = rsna_loss(
#     np.concatenate([df[Config.targets].values, df[Config.targets].values[ids]]),
#     np.concatenate([preds, preds[ids]]),
# )

# for k, v in losses.items():
#     print(f"- {k}_loss\t: {v:.3f}")

# print(f'\n -> CV Score : {avg_loss :.4f}')

Done ! 