**TODO :**
- Recheck Augmentations
- Recheck LAB normalization
- sampler for faster convergence ?

In [1]:
# %load_ext nb_black
%load_ext autoreload
%autoreload 2

### Initialization

### Imports

In [2]:
import os
import sys
import torch
import warnings
import numpy as np
import pandas as pd

from tqdm.notebook import tqdm
from matplotlib import pyplot as plt

sys.path.append("../code/")
# os.environ['CUDA_VISIBLE_DEVICES'] = "1,0"
warnings.simplefilter("ignore", UserWarning)

In [3]:
from training.main import k_fold

from utils.logger import (
    prepare_log_folder,
    save_config,
    create_logger,
    update_overall_logs,
)

from params import *

### Load

In [4]:
df_info = pd.read_csv(DATA_PATH + f"HuBMAP-20-dataset_information.csv")
df_mask = pd.read_csv(DATA_PATH + "train.csv")

## Training

In [5]:
BATCH_SIZES = {
    "resnet18": 64,
    "resnet34": 32, 
    "resnext50_32x4d": 32, 
    "se_resnext50_32x4d": 32,
    "efficientnet-b0": 32,
    "efficientnet-b1": 32,
    "efficientnet-b2": 32,
    "efficientnet-b3": 32,
    "efficientnet-b4": 32,
    "efficientnet-b5": 16,
    "efficientnet-b6": 8,
}

In [9]:
class Config:
    """
    Parameters used for training
    """
    # General
    seed = 42
    verbose = 1

    device = "cuda" if torch.cuda.is_available() else "cpu"
    save_weights = True
    sampling_mode = 'convhull' # chose between 'convhull', 'centered', 'random', 'visible'
    
    # Images
    tile_size = 256
    reduce_factor = 4
    on_spot_sampling = 0.95
    overlap_factor = 1.5

    img_dir = DATA_PATH + f"train_{tile_size}_red_{reduce_factor}"
    mask_dir = DATA_PATH + f"masks_{tile_size}_red_{reduce_factor}"

    # k-fold
    cv_column = "5fold"
    random_state = 0
    selected_folds = [0, 1, 2, 3, 4]

    # Model
    encoder = "efficientnet-b1"  # "resnet18" "resnext50_32x4d", "resnet34", "efficientnet-b5"
    decoder = "Unet"  # "Unet", "DeepLabV3Plus"
    use_bot = False
    use_fpn = False
    double_model = False
    encoder_weights = "imagenet"
    num_classes = 1

    # Training
    loss = "BCEWithLogitsLoss"  # "SoftDiceLoss" / "BCEWithLogitsLoss"  / "lovasz"
    activation = "none" if loss == "lovasz" else "sigmoid"

    optimizer = "Adam"
    
    batch_size = BATCH_SIZES[encoder]
    
    if tile_size == 512:
        batch_size = batch_size // 2
        
    if batch_size >= 32:
        epochs = 50
    elif batch_size >= 16:
        epochs = 40
    elif batch_size >= 6:
        epochs = 30
    else:
        epochs = 25
        
    iter_per_epoch = 5000
    lr = 1e-3
    swa_first_epoch = 50

    warmup_prop = 0.05
    val_bs = batch_size * 2

    first_epoch_eval = 0
    
    use_fp16 = True

In [10]:
DEBUG = True
log_folder = None

In [11]:
if not DEBUG:
    log_folder = prepare_log_folder(LOG_PATH)
    print(f"Logging results to {log_folder}")
    config_df = save_config(Config, log_folder + "config.json")
    create_logger(directory=log_folder, name="logs.txt")

metrics = k_fold(Config, log_folder=log_folder)

Creating in-memory dataset ...
Done in 126 seconds.

-------------   Fold 1 / 5  -------------

    -> 10046291 trainable parameters
    -> Validation images : ['2f6ecfcdf', 'b2dc8411c', '4ef6695ce'] 

[6, 860, 1244, 5390, 5774][6, 860, 1244, 5390, 5774]

[12, 5734, 6118, 6265, 6649][12, 5734, 6118, 6265, 6649]

[3, 4426, 4810, 5578, 5962]
[13, 769, 1153, 6949, 7333][3, 4426, 4810, 5578, 5962]

[1, 5051, 5435, 6420, 6804]
[13, 769, 1153, 6949, 7333]
[11, 3385, 3769, 6396, 6780]
[3, 4555, 4939, 3385, 3769]
[7, 1082, 1466, 2558, 2942]
[1, 2747, 3131, 9167, 9551]
[1, 2904, 3288, 8666, 9050]
[3, 2734, 3118, 3005, 3389]
[11, 3005, 3389, 4658, 5042]
[6, 3202, 3586, 3556, 3940]
[6, 3202, 3586, 3556, 3940]
[11, 5393, 5777, 8792, 9176]
[11, 5393, 5777, 8792, 9176]
[14, 2824, 3208, 7513, 7897][14, 2824, 3208, 7513, 7897]

[3, 6235, 6619, 5486, 5870][1, 1363, 1747, 6235, 6619]

[8, 3152, 3536, 1585, 1969]
[11, 1585, 1969, 3943, 4327]
[7, 2613, 2997, 5225, 5609]
[12, 1500, 1884, 6873, 7257]
[9, 49

RuntimeError: CUDA out of memory. Tried to allocate 24.00 MiB (GPU 0; 10.76 GiB total capacity; 4.18 GiB already allocated; 15.25 MiB free; 4.24 GiB reserved in total by PyTorch)