**About** : This notebook is used to train RNN models.

In [6]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
cd ../src/

/home/tviel/work/kaggle_rsna_lumbar_spine/src


### Imports

In [8]:
import os
import sys
import glob
import json
import torch
import operator
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm
from sklearn.metrics import *
from collections import Counter

warnings.simplefilter(action="ignore", category=UserWarning)

In [9]:
from util.logger import (
    prepare_log_folder,
    save_config,
    create_logger,
)

from data.dataset import FeatureDataset
from params import *
from data.preparation import *
from util.logger import Config as ConfigInf
from training.main_lvl2 import k_fold

### Data

In [10]:
df = prepare_data_lvl2()

In [11]:
if "fold" not in df.columns:
    folds = pd.read_csv("../input/folds_4.csv")
    df = df.merge(folds, how="left")

In [12]:
df.head(3)

Unnamed: 0,study_id,series_id,series_description,spinal_canal_stenosis_l1_l2,spinal_canal_stenosis_l2_l3,spinal_canal_stenosis_l3_l4,spinal_canal_stenosis_l4_l5,spinal_canal_stenosis_l5_s1,left_neural_foraminal_narrowing_l1_l2,left_neural_foraminal_narrowing_l2_l3,...,left_subarticular_stenosis_l2_l3,left_subarticular_stenosis_l3_l4,left_subarticular_stenosis_l4_l5,left_subarticular_stenosis_l5_s1,right_subarticular_stenosis_l1_l2,right_subarticular_stenosis_l2_l3,right_subarticular_stenosis_l3_l4,right_subarticular_stenosis_l4_l5,right_subarticular_stenosis_l5_s1,fold
0,4003253,"[702807833, 1054713880, 2448190387]","[Sagittal T2/STIR, Sagittal T1, Axial T2]",0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,4646740,"[3201256954, 3486248476, 3666319702]","[Axial T2, Sagittal T1, Sagittal T2/STIR]",0,0,1,2,0,0,0,...,0,0,2,0,0,1,1,1,0,0
2,7143189,"[132939515, 1951927562, 3219733239]","[Sagittal T2/STIR, Axial T2, Sagittal T1]",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3


In [13]:
EXP_FOLDERS = {
    "nfn": "../logs/2024-08-05/27/",
    "scs": "../logs/2024-08-04/33/",
    "ss": "../logs/2024-08-06/17/",  # NEEDS IMPROVEMENT
    "ss_aux": "../logs/2024-08-06/17/",
    "scs_crop": "../logs/2024-08-07/19/",
    "nfn_crop": "../logs/2024-08-07/32/",
}

In [14]:
config = ConfigInf(json.load(open(EXP_FOLDERS["nfn"] + "config.json", "r")))

In [15]:
if "fold" not in df.columns:
    folds = pd.read_csv(config.folds_file)
    df = df.merge(folds)

In [16]:
df.head(1)

Unnamed: 0,study_id,series_id,series_description,spinal_canal_stenosis_l1_l2,spinal_canal_stenosis_l2_l3,spinal_canal_stenosis_l3_l4,spinal_canal_stenosis_l4_l5,spinal_canal_stenosis_l5_s1,left_neural_foraminal_narrowing_l1_l2,left_neural_foraminal_narrowing_l2_l3,...,left_subarticular_stenosis_l2_l3,left_subarticular_stenosis_l3_l4,left_subarticular_stenosis_l4_l5,left_subarticular_stenosis_l5_s1,right_subarticular_stenosis_l1_l2,right_subarticular_stenosis_l2_l3,right_subarticular_stenosis_l3_l4,right_subarticular_stenosis_l4_l5,right_subarticular_stenosis_l5_s1,fold
0,4003253,"[702807833, 1054713880, 2448190387]","[Sagittal T2/STIR, Sagittal T1, Axial T2]",0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0


In [17]:
dataset = FeatureDataset(df, EXP_FOLDERS, resize=15, targets=CLASSES)

In [18]:
for i in tqdm(range(len(dataset))):
    fts, y, _ = dataset[i]
    break

  0%|          | 0/1975 [00:00<?, ?it/s]

In [19]:
idx = 0
fts, y, _ = dataset[idx]

In [20]:
for k in fts:
    print(k, fts[k].size())

nfn torch.Size([15, 15])
scs torch.Size([15, 15])
ss torch.Size([30, 6])
ss_aux torch.Size([30, 5])
scs_crop torch.Size([15])
nfn_crop torch.Size([30])


In [21]:
# plt.figure(figsize=(20, 5))
# plt.subplot(1, 3, 1)
# for i in range(5):
#     plt.plot(fts['ss_aux'].softmax(1)[:, i], label=LEVELS[i])
# plt.legend()

# plt.subplot(1, 3, 2)
# for i in [1, 2]:
#     plt.plot(fts['ss'][:, i], label=f'left_{i}')
# for i in [4, 5]:
#     plt.plot(fts['ss'][:, i], label=f'right_{i - 3}')
# plt.legend()

# # plt.subplot(1, 3, 3)
# # for i in range(5):
# #     plt.plot(fts['ss'][:, 1, i], label=LEVELS[i])
# # plt.legend()

# plt.show()

In [22]:
# for k in fts:
#     print(k, fts[k].shape)

### Model

In [23]:
from model_zoo.models_lvl2 import define_model
from training.losses import StudyLoss
from util.metrics import rsna_loss

In [24]:
model = define_model(
    name="baseline",
    num_classes=len(CLASSES) * 3
)

In [25]:
x = {k: fts[k].unsqueeze(0) for k in fts}

In [26]:
pred, _ = model(x)
pred.size()

torch.Size([1, 25, 3])

In [27]:
l = StudyLoss()
l(pred, y.unsqueeze(0))

tensor(0.9375, grad_fn=<DivBackward0>)

In [28]:
rsna_loss(y.unsqueeze(0).numpy(), pred.softmax(2).detach().numpy())

(0.9375256299972534,
 {'scs': 1.110085,
  'nfn': 1.0883645,
  'ss': 1.1093023,
  'any': 0.44235074520111084})

### Training

In [86]:
class Config:
    """
    Parameters used for training
    """
    # General
    seed = 42
    verbose = 1
    device = "cuda"
    save_weights = True
    targets = CLASSES

    # Data
    exp_folders = {
        "nfn": "../logs/2024-08-05/27/",
        "scs": "../logs/2024-08-04/33/",
        "ss": "../logs/2024-08-06/17/",  # NEEDS IMPROVEMENT
        "ss_aux": "../logs/2024-08-06/17/",
        # "scs_crop": "../logs/2024-08-07/19/",
        # "nfn_crop": "../logs/2024-08-07/32/",
        "scs_crop": "../logs/2024-08-08/7/",
        "nfn_crop": "../logs/2024-08-08/6/",
    }
    n_fts = 1
    resize = 10

    # k-fold
    k = 4
    folds_file = f"../input/folds_{k}.csv"
    selected_folds = [0, 1, 2, 3]

    # Model
    name = "baseline"
    dense_dim = 1024
    layer_dim = 8
    ft_dim = 0

    p = 0.1
    num_classes = len(CLASSES) * 3
    num_classes_aux = 0

    # Training    
    loss_config = {
        "name": "study",
        "weighted": True,
        "use_any": True,
        "smoothing": 0,
        "activation": "study",
        "aux_loss_weight": 0,
        "name_aux": "",
        "smoothing_aux": 0,
        "activation_aux": "",
    }

    data_config = {
        "batch_size": 64,
        "val_bs": 512,
        "mix": "mixup",
        "mix_proba": 0.,
        "sched": False,
        "mix_alpha": 4.,
        "additive_mix": False,
        "num_classes": num_classes,
        "num_classes_aux": num_classes_aux,
        "num_workers": 8,
    }

    optimizer_config = {
        "name": "AdamW",
        "lr": 1e-4,
        "warmup_prop": 0.,
        "betas": (0.9, 0.999),
        "max_grad_norm": 1.,
        "weight_decay": 1,
    }

    epochs = 20

    use_fp16 = True
    verbose = 1
    verbose_eval = 20

    fullfit = False
    n_fullfit = 1

    local_rank = 0
    distributed = False
    world_size = 1

In [87]:
DEBUG = True
log_folder = None

In [88]:
if not DEBUG:
    log_folder = prepare_log_folder(LOG_PATH)
    print(f"Logging results to {log_folder}")
    config_df = save_config(Config, log_folder + "config.json")
    create_logger(directory=log_folder, name="logs.txt")

preds = k_fold(Config, df, log_folder=log_folder, run=None)


-------------   Fold 1 / 4  -------------

    -> 1481 training studies
    -> 494 validation studies
    -> 5354443 trainable parameters

Epoch 01/20 (step 0020) 	lr=9.6e-05 	 t=1s 	 loss=0.743    scs_loss=0.594    nfn_loss=0.668    ss_loss=0.810    any_loss=0.427	 val_loss=0.625
Epoch 02/20 (step 0040) 	lr=9.2e-05 	 t=1s 	 loss=0.577    scs_loss=0.510    nfn_loss=0.613    ss_loss=0.722    any_loss=0.386	 val_loss=0.557
Epoch 03/20 (step 0060) 	lr=8.7e-05 	 t=1s 	 loss=0.514    scs_loss=0.477    nfn_loss=0.596    ss_loss=0.698    any_loss=0.392	 val_loss=0.541
Epoch 04/20 (step 0080) 	lr=8.3e-05 	 t=1s 	 loss=0.506    scs_loss=0.450    nfn_loss=0.585    ss_loss=0.680    any_loss=0.379	 val_loss=0.523
Epoch 05/20 (step 0100) 	lr=7.8e-05 	 t=1s 	 loss=0.469    scs_loss=0.417    nfn_loss=0.570    ss_loss=0.677    any_loss=0.368	 val_loss=0.508
Epoch 06/20 (step 0120) 	lr=7.4e-05 	 t=1s 	 loss=0.460    scs_loss=0.390    nfn_loss=0.562    ss_loss=0.667    any_loss=0.373	 val_loss=0.498
Ep

### Eval

In [84]:
avg_loss, losses = rsna_loss(df[Config.targets].values, preds)

for k, v in losses.items():
    print(f"- {k}_loss\t: {v:.3f}")

print(f'\n -> CV Score : {avg_loss :.3f}')

- scs_loss	: 0.322
- nfn_loss	: 0.516
- ss_loss	: 0.631
- any_loss	: 0.299

 -> CV Score : 0.442


- scs_loss	: 0.325
- nfn_loss	: 0.517
- ss_loss	: 0.634
- any_loss	: 0.297

 -> CV Score : 0.443

Done ! 