**About** : This notebook is used to train models.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src/

## Initialization

### Imports

In [None]:
import os
import torch

print(torch.__version__)
os.environ['CUDA_VISIBLE_DEVICES'] = "0"
device = torch.cuda.get_device_name(0)
print(device)

In [None]:
import os
import sys
import glob
import torch
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm
from sklearn.metrics import *

pd.set_option('display.width', 1000)
pd.set_option('display.max_columns', 30)
pd.set_option('max_colwidth', 100)

In [None]:
from training.main import k_fold

from util.logger import (
    prepare_log_folder,
    save_config,
    create_logger,
    init_neptune
)
from util.torch import init_distributed

from params import *
from data.dataset import *
from data.preparation import *
from data.transforms import get_transfos

from model_zoo.models import define_model
from training.losses import *

## Data
- Frame level label :
    - Ok for extravasion & bowel
    - Use seg for other classes

In [None]:
df_patient, df_img = prepare_data(DATA_PATH)

In [None]:
# df_seg = pd.read_csv('../input/df_seg.csv')
# df_img = df_img.merge(df_seg.drop('img_path', axis=1), how="left", on=["patient_id", "series", "frame"])

In [None]:
# dfg = df_img[["series", "pred_spleen", "spleen", "pred_liver", "liver", "pred_kidney", "kidney"]].groupby('series').max()  # , "pixel_count_liver"
# dfg[(dfg[["pred_spleen", "pred_liver", "pred_kidney"]].min(1) < 0.9) & (dfg[["spleen", "liver", "kidney"]].max(1) > 0)]

In [None]:
# dfg = df_img[["series", "kidney", "liver", "spleen", "kidney_injury", "liver_injury", "spleen_injury"]].groupby('series').max()

# for c in ["kidney", "liver", "spleen"]:
#     print(c)
#     display(dfg[dfg[c] != dfg[f"{c}_injury"]])

In [None]:
# plt.figure(figsize=(20, 4))
# for i, c in enumerate(["extravasation_injury", "bowel_injury", "kidney_injury", "liver_injury", "spleen_injury"]):
#     plt.subplot(1, 5, i + 1)
#     sns.countplot(x=df_img[c])
#     plt.yscale('log')
    
#     pos_prop = (df_img[c] > 0).mean()
#     plt.title(f'{c.split("_")[0]} positives: {pos_prop*100 :.2f}%')
# plt.show()

In [None]:
# (df_img.drop_duplicates(subset="patient_id", keep="first")['patient_id'].values == df_patient['patient_id'].values).all()

In [None]:
# plt.figure(figsize=(30, 4))
# n = 5

# for idx, series in enumerate(np.random.choice(df_img[df_img['any_injury'] > 0]['series'], n)):
#     plt.subplot(1, n, idx + 1)

#     df_series = df_img[df_img['series'] == series].reset_index(drop=True)
#     plt.plot([0] * len(df_series), c="black")

#     for i, c in enumerate([col for col in df_series.columns if "injury" in col and "any" not in col]):
#         plt.scatter(
#             np.arange(len(df_series))[df_series[c] > 0],
#             [0.5 + i * 0.1] * (df_series[c] > 0).sum(),
#             label=c,
#             marker="."
#         )
#     plt.ylim(0, 1)
#     plt.title(f'Series {series}')
#     if idx == 0:
#         plt.legend(loc=3)
# plt.show()

- Loop over patient x 5 classes
 - If positive, sample in the middle of the frames where img_target is positive
 - Else, sample in the middle of the organ, using bowel for extravasation or whole image idk
 - During training, sample somewhere around the middle
- Move to 2.5D soon

In [None]:
transforms = get_transfos(augment=False, resize=(512, 512), strength=3)

dataset = AbdominalDataset(df_patient[df_patient["any_injury"] == 1].reset_index(drop=True).head(100), df_img, transforms=transforms, train=False, frames_chanel=1, use_soft_target=True)

In [None]:
i = np.random.choice(len(dataset))
#     i = 0
img, y_img, y_patient = dataset[i]
# print()
# print(y_patient)
# print(y_img)

In [None]:
for i in range(10):
#     i = 0
    img, y_patient, y_img = dataset[i]

    plt.figure(figsize=(8, 8))
    plt.imshow(img.numpy().transpose(1, 2, 0))
    
    plt.title(f"#{i}\n{PATIENT_TARGETS} - {y_img}\n{IMG_TARGETS_EXTENDED} - {y_patient}")
    plt.show()

    break

## Model

In [None]:
model = define_model(
    'tf_efficientnet_b5_ns',
    num_classes=2,
    num_classes_aux=11,
    n_channels=3,
    reduce_stride=False,
    drop_path_rate=0.,
    use_gem=True
)

In [None]:
x = torch.cat([img.unsqueeze(0)] * 2)

pred, pred_aux = model(x)

In [None]:
pred.size(), pred_aux.size()
# y_patient.size(), y_img.size()

In [None]:
y_p = torch.cat([y_patient.unsqueeze(0)] * 2)
y_i = torch.cat([y_img.unsqueeze(0)] * 2)

In [None]:
loss = AbdomenLoss({
    "name": "image",
    "smoothing": 0,
    "aux_loss_weight": 0.5,
    "name_aux": "patient",
    "smoothing_aux": 0,
})

In [None]:
pred.size(), y_i.size()

In [None]:
loss(pred, pred_aux, y_i, y_p)

In [None]:
# loss = PatientLoss()
# loss(pred_aux, y_p)

## Training

In [None]:
class Config:
    """
    Parameters used for training
    """
    # General
    seed = 42
    verbose = 1
    device = "cuda"
    save_weights = True

    # Data
    resize = (512, 512)
    aug_strength = 1
    pos_prop = 0.1

    # k-fold
    k = 4
    folds_file = f"../input/folds_{k}.csv"
    selected_folds = [0]  # , 1, 2, 3]

    # Model
    name = "tf_efficientnet_b0"
    pretrained_weights = None # PRETRAINED_WEIGHTS[name]  # None
    
    num_classes = 2
    num_classes_aux = 0
    drop_rate = 0
    drop_path_rate = 0
    n_channels = 3
    reduce_stride = False
    replace_pad_conv = False
    use_gem = True

    # Training    
    loss_config = {
        "name": "image",
        "smoothing": 0,
        "activation": "sigmoid",
        "aux_loss_weight": 0,
        "name_aux": "patient",
        "smoothing_aux": 0,
        "activation_aux": "",
        "ousm_k": 0,  # todo ?
    }

    data_config = {
        "batch_size": 16,
        "val_bs": 16,
        "mix": "mixup",
        "mix_proba": 0.,
        "mix_alpha": 4.,
        "additive_mix": False,
        "num_classes": num_classes,
        "num_workers": 8,
    }

    optimizer_config = {
        "name": "AdamW",
        "lr": 5e-4,
        "warmup_prop": 0.,
        "betas": (0.9, 0.999),
        "max_grad_norm": 10.,
        "weight_decay": 0.,
    }

    epochs = 1

    use_fp16 = True

    verbose = 1
    verbose_eval = 50
    
    fullfit = False
    n_fullfit = 1

In [None]:
DEBUG = True
log_folder = None
run = None

In [None]:
if not DEBUG:
    log_folder = prepare_log_folder(LOG_PATH)
    print(f"Logging results to {log_folder}")
    config_df = save_config(Config, log_folder + "config.json")
    create_logger(directory=log_folder, name="logs.txt")
#     run = init_neptune(Config, log_folder)

df_patient, df_img = prepare_data(DATA_PATH)
init_distributed(Config)

preds = k_fold(Config, df_patient, df_img, log_folder=log_folder, run=run)

Done ! 