# 🏋️ Model Training

## ⚙️ Setup 

### 📚 Importing Libraries

Importing from packages

In [None]:
import os
import gc
import pandas as pd
import time
import numpy as np
import torch
from torch.optim import AdamW
from torch import nn
from transformers import (
    get_linear_schedule_with_warmup,
    get_cosine_schedule_with_warmup,
)

In [None]:
os.chdir("../")

Importing user defined packages

In [None]:
from lib.config import config
from lib.paths import Paths
from lib.model.deberta import CustomModel
from lib.model.epoch_functions import train_epoch, valid_epoch
from lib.model.utils import get_score
from lib.utils.utils import get_logger, seed_everything
from lib.data import read_data_loader_from_disk

In [None]:
seed_everything()

## 📖 Definitions

### 🌎 Global Variables

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
LOGGER = get_logger(Paths.MODEL_OUTPUT_PATH)

### 🛠️ Functions

In [None]:
def get_optimizer_params(model, encoder_lr, decoder_lr, weight_decay=0.0):
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {
            "params": [
                p
                for n, p in model.model.named_parameters()
                if not any(nd in n for nd in no_decay)
            ],
            "lr": encoder_lr,
            "weight_decay": weight_decay,
        },
        {
            "params": [
                p
                for n, p in model.model.named_parameters()
                if any(nd in n for nd in no_decay)
            ],
            "lr": encoder_lr,
            "weight_decay": 0.0,
        },
        {
            "params": [p for n, p in model.named_parameters() if "model" not in n],
            "lr": decoder_lr,
            "weight_decay": 0.0,
        },
    ]

    return optimizer_parameters

In [None]:
def get_scheduler(optimizer, num_train_steps):
    if config.scheduler == "linear":
        return get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=config.num_warmup_steps,
            num_training_steps=num_train_steps,
        )

    if config.scheduler == "cosine":
        return get_cosine_schedule_with_warmup(
            optimizer,
            num_warmup_steps=config.num_warmup_steps,
            num_training_steps=num_train_steps,
            num_cycles=config.num_cycles,
        )

In [None]:
def get_model_optimizer_and_scheduler(train_loader):
    model = CustomModel(config, config_path=None, pretrained=True)
    torch.save(model.config, Paths.MODEL_OUTPUT_PATH + "/config.pth")
    model.to(device)

    optimizer = AdamW(
        get_optimizer_params(
            model,
            encoder_lr=config.encoder_lr,
            decoder_lr=config.decoder_lr,
            weight_decay=config.weight_decay,
        ),
        lr=config.encoder_lr,
        eps=config.eps,
        betas=config.betas,
    )

    num_train_steps = int(len(train_loader) / config.batch_size_train * config.epochs)
    scheduler = get_scheduler(optimizer, num_train_steps)
    return model, optimizer, scheduler

In [None]:
def train_loop(fold):
    LOGGER.info(f"========== Fold: {fold} training ==========")

    # ======== DATA LOADER ==========
    train_loader, valid_loader = read_data_loader_from_disk(fold)
    valid_fold = pd.read_csv(os.path.join(Paths.DATA_LOADER_PATH, f"valid_{fold}.csv"))
    valid_labels = valid_fold["score"].values

    # ======== MODEL ==========
    model, optimizer, scheduler = get_model_optimizer_and_scheduler(train_loader)

    # ======= LOSS ==========
    # criterion = RMSELoss(reduction="mean") # nn.SmoothL1Loss(reduction='mean')
    criterion = nn.CrossEntropyLoss()
    softmax = nn.Softmax(dim=1)

    best_score = -np.inf
    # ====== ITERATE EPOCHS ========
    for epoch in range(config.epochs):
        start_time = time.time()

        # ======= TRAIN ==========
        avg_loss = train_epoch(
            train_loader, model, criterion, optimizer, epoch, scheduler, device
        )

        # ======= EVALUATION ==========
        avg_val_loss, prediction_dict = valid_epoch(
            valid_loader, model, criterion, device
        )
        predictions = prediction_dict["predictions"]
        _, predictions = torch.max(softmax(torch.tensor(predictions)), dim=1)

        # ======= SCORING ==========
        score = get_score(valid_labels, predictions)

        elapsed = time.time() - start_time

        LOGGER.info(
            f"Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s"
        )
        LOGGER.info(f"Epoch {epoch+1} - Score: {score:.4f}")

        if score > best_score:
            best_score = score
            LOGGER.info(f"Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model")
            torch.save(
                {"model": model.state_dict(), "predictions": predictions},
                Paths.MODEL_OUTPUT_PATH
                + f"/{config.model.replace('/', '_')}_fold_{fold}_best.pth",
            )

    predictions = torch.load(
        Paths.MODEL_OUTPUT_PATH
        + f"/{config.model.replace('/', '_')}_fold_{fold}_best.pth",
        map_location=torch.device("cpu"),
    )["predictions"]
    valid_fold["pred_score"] = predictions

    del model, optimizer, scheduler, criterion, softmax
    torch.cuda.empty_cache()
    gc.collect()

    return valid_fold

In [None]:
def get_result(oof_df):
    labels = oof_df["score"].values
    preds = oof_df["pred_score"].values
    score = get_score(labels, preds)
    LOGGER.info(f'Score: {score:<.4f}')

## 🏁 Start Training

In [None]:
if config.train:
    oof_df = pd.DataFrame()

    for fold in range(config.n_folds):
        if fold in config.train_folds:
            _oof_df = train_loop(fold)
            oof_df = pd.concat([oof_df, _oof_df])
            LOGGER.info(f"========== Fold: {fold} result ==========")
            get_result(_oof_df)

    oof_df = oof_df.reset_index(drop=True)
    LOGGER.info(f"========== CV ==========")
    get_result(oof_df)
    oof_df.to_csv(Paths.MODEL_OUTPUT_PATH + "/oof_df.csv", index=False)