**About** : This notebook is used to train RNN models.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src/

### Imports

In [None]:
import os
import sys
import glob
import json
import torch
import operator
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm
from sklearn.metrics import *

warnings.simplefilter(action="ignore", category=UserWarning)

In [None]:
from util.logger import (
    prepare_log_folder,
    save_config,
    create_logger,
)

from params import *
from data.preparation import prepare_data
from inference.extract_features import Config as ConfigInf
from training.main_lvl2 import k_fold, retrieve_preds

### Data

In [None]:
df_patient, df_img = prepare_data(DATA_PATH)

In [None]:
EXP_FOLDERS = [
    ("../logs/2023-09-20/14/", "seg"),
    ("../logs/2023-09-20/36_r/", "probas_2d"),  # 0.358 - convnext-tiny best  <-
    ("../logs/2023-10-05/13/", "probas_2d"),  # 0.353 - maxvit_tiny_tf_384   (+36_r - 0.347)       <- 
]
EXP_FOLDER = EXP_FOLDERS[0][0]

In [None]:
config = ConfigInf(json.load(open(EXP_FOLDER + "config.json", "r")))

In [None]:
if "fold" not in df_patient.columns:
    folds = pd.read_csv(config.folds_file)
    df_img = df_img.merge(folds)
    df_patient = df_patient.merge(folds)

### Training

In [None]:
class Config:
    """
    Parameters used for training
    """
    # General
    seed = 42
    verbose = 1
    device = "cuda"
    save_weights = True

    # Data
    exp_folders = [
        ("../logs/2023-09-20/14/", "seg"),  # v2-rw-t stride+ 384
        ("../logs/2023-10-10/27/", "probas_2d"),  # 0.346  - maxvit_tiny_tf_512   <-
        ("../logs/2023-10-05/31/", "crop"),  # coatnet_1_rw_224 -1 11       -> 0.320  +0.0016
    ]

    restrict = True
    max_len = 600
    resize = 200
    n_fts = 9 * len([p for p in exp_folders if "crop" in p[1]])

    # k-fold
    k = 4
    folds_file = f"../input/folds_{k}.csv"
    selected_folds = [0, 1, 2, 3]

    # Model
    name = "rnn_att"
    ft_dim = (
        22 * len([p for p in exp_folders if "probas" in p[1]]) + 
        4 * len([p for p in exp_folders if "bowel_extrav" in p[1]]) + 
        5
    )

    dense_dim = 384  # 384
    layer_dim = 256
    n_layers = 1

    p = 0.
    num_classes = 11
    num_classes_aux = 0

    # Training    
    loss_config = {
        "name": "patient",
        "weighted": True,
        "use_any": True,
        "accentuate": False,
        "smoothing": 0,
        "activation": "patient",
        "aux_loss_weight": 0,
        "name_aux": "patient",
        "smoothing_aux": 0,
        "activation_aux": "",
    }

    data_config = {
        "batch_size": 64,
        "val_bs": 256,
        "mix": "mixup",
        "mix_proba": 0.,
        "sched": False,
        "mix_alpha": 4.,
        "additive_mix": False,
        "num_classes": num_classes,
        "num_workers": 8,
    }

    optimizer_config = {
        "name": "AdamW",
        "lr": 4e-4,  # 4e-4, 5e-4
        "warmup_prop": 0.,
        "betas": (0.9, 0.999),
        "max_grad_norm": 10.,
        "weight_decay": 0.,
    }

    epochs = 10

    use_fp16 = True
    verbose = 1
    verbose_eval = 50

    fullfit = False
    n_fullfit = 1

    local_rank = 0
    distributed = False
    world_size = 1

In [None]:
DEBUG = True
log_folder = None

In [None]:
if not DEBUG:
    log_folder = prepare_log_folder(LOG_PATH)
    print(f"Logging results to {log_folder}")
    config_df = save_config(Config, log_folder + "config.json")
    create_logger(directory=log_folder, name="logs.txt")

preds, preds_aux = k_fold(Config, df_patient, df_img, log_folder=log_folder, run=None)

### Eval

In [None]:
EXP_FOLDERS = [
    # SUB 1 - 0.3096
    "../logs/2023-10-10/45/",   # 0.3111  - rnn_att - 36_r 25 27   - 11 36 41 6 20 21 31 31    <------ BEST LB 0.35
    "../logs/2023-10-10/42/",   # 0.3120  - rnn_att - 25 27   - 11 36 41 6 20 21 31 31
    "../logs/2023-10-10/46/",   # 0.3126  - rnn_att - 36_r 27   - 11 36 41 6 20 21 31 31
]

In [None]:
preds_oof = []
for exp_folder in EXP_FOLDERS:
    cfg = ConfigInf(json.load(open(exp_folder + "config.json", "r")))
    print(
        exp_folder, " --> ",
        "proba :",
        " - ".join(e[8:] for e, m in cfg.exp_folders if "proba" in m),
        "\t crop : ",
        " - ".join(e[8:] for e, m in cfg.exp_folders if "crop" in m),
    )

    df_oof, pred_oof = retrieve_preds(
        df_patient,
        df_img,
        cfg,
        exp_folder,
        custom_agg=False,
#         folds=[0]
    )
    preds_oof.append(pred_oof)
    
    losses, avg_loss = rsna_loss(pred_oof, df_oof)
    print(f'-> CV Score : {avg_loss :.4f}\n')

#     df_oof.to_csv(exp_folder + 'df_oof.csv', index=False)

pred_oof = np.mean(preds_oof, 0)

In [None]:
losses, avg_loss = rsna_loss(pred_oof, df_oof)

for k, v in losses.items():
    print(f"- {k.split('_')[0][:8]} loss\t: {v:.3f}")

print(f'\n -> CV Score : {avg_loss :.4f}')

Done ! 