**About** : This notebook is used to train models.

In [None]:
# %load_ext nb_black
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src/

## Initialization

### Imports

In [None]:
import os
import torch

print(torch.__version__)
os.environ['CUDA_VISIBLE_DEVICES'] = "1"
device = torch.cuda.get_device_name(0)
print(device)

In [None]:
import os
import sys
import glob
import json
import torch
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm
from sklearn.metrics import *
from numerize.numerize import numerize

pd.set_option('display.width', 500)
pd.set_option('max_colwidth', 100)

In [None]:
from util.logger import (
    prepare_log_folder,
    save_config,
    create_logger,
    init_neptune
)

from params import *
from util.plots import *
from data.dataset import *
from data.transforms import get_transfos
from data.preparation import *
from util.torch import init_distributed, count_parameters, load_model_weights, count_parameters
from util.plots import plot_sample
from model_zoo.models import define_model


from training.main import k_fold

## Data

In [None]:
# df_split = prepare_folds()

In [None]:
df = prepare_data(DATA_PATH, processed_folder="false_color/")

In [None]:
if "fold" not in df.columns:
    folds = pd.read_csv(DATA_PATH + "folds_4.csv")
    df = df.merge(folds)

In [None]:
df['fold'].value_counts()

### Dataset

In [None]:
df = df[df['has_contrail']].reset_index(drop=True)

In [None]:
transfos =  get_transfos(strength=2)
dataset = ContrailDataset(df, transfos)

transfos

In [None]:
for idx in np.random.choice(len(dataset), 3):
    idx = 4
    image, mask, y = dataset[idx]
    plot_sample(image.numpy().transpose(1, 2, 0), mask.numpy().transpose(1, 2, 0))
    
    break

In [None]:
len(df)

## Model

In [None]:
model = define_model("Unet", "tf_efficientnetv2_s", use_cls=False, use_hypercolumns=True, reduce_stride=True, center="none")

In [None]:
count_parameters(model)

In [None]:
# model.model.encoder.model.blocks[1][0].conv_exp.stride = (1, 1)
# model.model.decoder.blocks[-2].upscale = False

In [None]:
image, mask, y = dataset[0]

x = image.unsqueeze(0).repeat(2, 1, 1, 1)

In [None]:
pred_mask, pred_cls = model(x)
pred_mask.size(), pred_cls.size()

In [None]:
from training.losses import *

loss = LovaszFocalLoss()

In [None]:
loss(pred_mask, mask.unsqueeze(0).repeat(2, 1, 1, 1))

In [None]:
mask.size()

In [None]:
# y, y_aux = model(x)
# y.size(), y.max()

In [None]:
# opt = torch.optim.Adam(model.parameters())
# loss = y.mean()
# loss.backward()
# opt.step()
# for name, param in model.named_parameters():
#     if param.grad is None:
#         print(name)
    
# opt.zero_grad()

## Training

In [None]:
class Config:
    """
    Parameters used for training
    """

    # General
    seed = 42
    verbose = 1
    device = "cuda"
    save_weights = True

    # Data
    processed_folder = "false_color/"
    size = 256
    aug_strength = 0

    # k-fold
    k = 4
    folds_file = f"../input/folds_{k}.csv"
    selected_folds = [0]  # , 1, 2, 3]

    # Model
    encoder_name = "tf_efficientnetv2_s"  # tf_efficientnetv2_s
    decoder_name = "Unet"

    pretrained_weights = None
    reduce_stride = False
    n_channels = 3
    num_classes = 1
    
    # Training
    loss_config = {
        "name": "bce",  # ce
        "smoothing": 0.,
        "activation": "sigmoid",  # None for lovasz
        "aux_loss_weight": 0.,
        "activation_aux": "sigmoid",
        "ousm_k": 0,
    }

    data_config = {
        "batch_size": 16,
        "val_bs": 32,
        "mix": "cutmix",
        "mix_proba": 0.,
        "mix_alpha": 0.5,
        "additive_mix": False,
        "num_classes": num_classes
    }

    optimizer_config = {
        "name": "Adam",
        "lr": 1e-3,
        "warmup_prop": 0.05,
        "betas": (0.9, 0.999),
        "max_grad_norm": 10.0,
        "weight_decay": 0,
    }

    epochs = 100

    use_fp16 = True
    model_soup = False

    verbose = 1
    verbose_eval = 200

    fullfit = False  # len(selected_folds) == 4
    n_fullfit = 1

In [None]:
DEBUG = True
log_folder = None
run = None

In [None]:
if not DEBUG:
    log_folder = prepare_log_folder(LOG_PATH)
    print(f"Logging results to {log_folder}")
    config_df = save_config(Config, log_folder + "config.json")
    create_logger(directory=log_folder, name="logs.txt")
#     run = init_neptune(Config, log_folder)

df = prepare_data(DATA_PATH, Config.processed_folder)
init_distributed(Config)

preds = k_fold(Config, df, log_folder=log_folder, run=run)

Done ! 