## Import Libs

In [None]:
import os
import random
import gc
import time
import copy
import sys
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2

from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn.functional as F
from torch import nn
from torch.optim import lr_scheduler
from torch.optim.lr_scheduler import _LRScheduler, CosineAnnealingLR
import timm

from PIL import Image
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
from collections import defaultdict
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_curve, auc, roc_auc_score

import warnings # 避免一些可以忽略的报错
warnings.filterwarnings('ignore')
# For colored terminal text
from colorama import Fore, Back, Style
b_ = Fore.BLUE
sr_ = Style.RESET_ALL

## CONFIG

In [None]:
is_debug = False
use_803098 = False
use_401059_val = True
use_1977_train = False # 2024_0 + 2024_1 + 2020_1
use_1954_train = True # 2024_0 + 2024_1 + 2020_1
use_10999_train = False # 2024_0 + 2024_1 + 2020_1 + 2019_1
use_401059_train = False # 2024_0 + 2024_1

class CONFIG:
    seed = 308
    epochs = 8 if not is_debug else 2
    
    train_batch_size = 32
    valid_batch_size = 512
    img_size = [[160, 160],
                [160, 160]]
    now_cv = 0
    n_classes = 1
    n_folds = 5
    
    n_accumulate = 1.0
    n_workers = os.cpu_count()
    
    formatted_time = None
    ckpt_save_path = None

    learning_rate = 1e-3 * train_batch_size * n_accumulate / 32
    # learning_rate = 1e-5 * train_batch_size / 32 # eva02
    total_sample = 1954
    T_max = [1586 * epochs / train_batch_size // n_accumulate,
             1522 * epochs / train_batch_size // n_accumulate,
             1582 * epochs / train_batch_size // n_accumulate,
             1574 * epochs / train_batch_size // n_accumulate,
             1552 * epochs / train_batch_size // n_accumulate] # 401059
    min_lr = 1e-6
    weight_decay = 1e-6
    scheduler = "CosineAnnealingWithWarmupLR" # 'CosineAnnealingLR'
    DataParallel = False

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    """
    tf_efficientnet_b0_ns
    convnext_atto_ols.a2_in1k
    tiny_vit_21m_512.dist_in22k_ft_in1k
    eva02_base_patch14_224.mim_in22k
    tf_efficientnetv2_s.in21k_ft_in1k
    tf_efficientnetv2_l.in21k_ft_in1k
    tf_efficientnetv2_b3.in21k_ft_in1k
    tf_efficientnet_b3.ns_jft_in1k
    convnextv2_tiny.fcmae_ft_in22k_in1k_384
    convnext_tiny.fb_in22k_ft_in1k_384

    efficientvit_b0.r224_in1k
    efficientvit_b3.r256_in1k

    edgenext_base.in21k_ft_in1k
    eca_nfnet_l0.ra2_in1k
    """
    model_name = "ensemble_linear"
    model_names = ["tf_efficientnetv2_s.in21k_ft_in1k",
                   "edgenext_base.in21k_ft_in1k",
                   "convnext_atto_ols.a2_in1k",
                   "tf_efficientnet_b3.ns_jft_in1k"]
    ckpt_path = ["/kaggle/input/20240827-efficientnetv2s-train1954-cv15496",
                 "/kaggle/input/20240828-edgenext-train1954-cv0-1519",
                 "/kaggle/input/20240829-convnext-atto-train1954-cv149",
                 "/kaggle/input/20240829-efficientnet-b3-train1954-cv1504"]
    is_pretrained = False
    backbone_grad = True
    use_gempool = False
    smooth_threshold = 0.05

    old_my_train_csv = "/kaggle/input/my-train-with-sgkfold/my_train_with_sgkfold.csv"
    my_train_csv = "/kaggle/input/my-train-with-sgkfold/my_train_with_sgkfold.csv"
    train_img_dir = "/kaggle/input/isic-2024-train-1954-imgs/train_1954_img"
    img_dir = "/kaggle/input/isic-2024-challenge/train-image/image"
    
    train_1954_csv = "/kaggle/input/isic-2024-train-1954/train_1954.csv"

if CONFIG.DataParallel:
    os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
    print("IN DataParallel!")
else:
    os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
    print("NO IN DataParallel!")

## Set Random Seed

In [None]:
def set_seed(seed=308):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
set_seed(CONFIG.seed)

## Data Progress

In [None]:
# root_dir = "/kaggle/input/isic-2024-challenge/train-image/image"
# img_ids = os.listdir(root_dir)

# min_size = 9999
# max_size = 0
# n_0_64 = 0
# n_64_96 = 0
# n_96_160 = 0
# n_160_224 = 0
# n_224_269 = 0
# for img_id in tqdm(img_ids):
#     path = os.path.join(root_dir, img_id)
#     a = Image.open(path)
#     s = np.array(a).shape[0]
#     if s > max_size:
#         max_size = s
#     if s < min_size:
#         min_size = s
        
#     if s >= 0 and s < 64:
#         n_0_64 += 1
#     elif s >= 64 and s < 96:
#         n_64_96 += 1
#     elif s >= 96 and s < 160:
#         n_96_160 += 1
#     elif s >= 160 and s < 224:
#         n_160_224 += 1
#     elif s >= 224:
#         n_224_269 += 1
        
# print(f"max_size : {max_size}") # max_size : 269
# print(f"min_size : {min_size}") # min_size : 41

# print(f"n_0_64    : {n_0_64}")    # 86
# print(f"n_64_96   : {n_64_96}")   # 3461
# print(f"n_96_160  : {n_96_160}")  # 368914
# print(f"n_160_224 : {n_160_224}") # 28305
# print(f"n_224_269 : {n_224_269}") # 293

# """
# The above code runs for : 37:20
# """

In [None]:
if os.path.exists(CONFIG.my_train_csv):
    train = pd.read_csv(CONFIG.my_train_csv)
else:
    train = pd.read_csv(CONFIG.train_csv)

valid = pd.read_csv(CONFIG.old_my_train_csv)
train

In [None]:
if not os.path.exists(CONFIG.my_train_csv):
    print("KFold....")
    # # Setting StratifiedKFold parameters
    # skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=308)

    # # Create a new column to hold the KFold labels
    # train['kfold'] = -1

    # # Iterate over each fold and assign labels
    # for fold, (train_idx, val_idx) in enumerate(skf.split(train, train['target'])):
    #     train.loc[val_idx, 'kfold'] = fold
train

In [None]:
if use_803098:
    new_train = train
else:
    if use_1954_train:
        new_train = pd.read_csv(CONFIG.train_1954_csv)
    elif use_1977_train:
        new_train = pd.read_csv(CONFIG.train_1977_csv)
    elif use_10999_train:
        new_train = pd.read_csv(CONFIG.train_10999_csv)
    elif use_401059_train:
        new_train = pd.read_csv(CONFIG.my_train_csv)
    else:
        new_train = pd.read_csv(CONFIG.train_793_csv)
    # train_0 = train[train["target"] != 1] # 400666 rows × 56 columns
    # train_1 = train[train["target"] == 1] # 393 rows × 56 columns

    # train_0 = train_0.reset_index(drop=True)
    # train_1 = train_1.reset_index(drop=True)

    # # train_1[train_1["kfold"] == 4] # [78, 79, 79, 79, 78] sum --> 393

    # # 0 : 1 ---> 1020 : 1

    # # Each fold takes 80 samples, a total of 400
    # some_train_0 = []
    # for fold in range(CONFIG.n_folds):
    #     _tmp = train_0[train_0["kfold"] == fold].reset_index(drop=True).iloc[:200, :]
    #     some_train_0.append(_tmp)

    # some_train_0 = pd.concat(some_train_0).reset_index(drop=True)
    # some_train_0

    # new_train = pd.concat([some_train_0, train_1]).reset_index(drop=True)
    # new_train = pd.concat([new_train, train_1_2020]).reset_index(drop=True)
    # new_train = new_train.sample(frac=1).reset_index(drop=True)
new_train

## Dataset and DataLoader

In [None]:
class HairAugmentation(A.ImageOnlyTransform):
    def __init__(self, num_hairs_range=(5, 15), hair_color_range=((0, 0, 0), (255, 255, 255)), always_apply=False, p=0.5):
        super(HairAugmentation, self).__init__(always_apply, p)
        self.num_hairs_range = num_hairs_range
        self.hair_color_range = hair_color_range

    def apply(self, img, **params):
        img = img.copy()
        h, w, _ = img.shape

        num_hairs = random.randint(self.num_hairs_range[0], self.num_hairs_range[1])
        hair_color = (
            random.randint(self.hair_color_range[0][0], self.hair_color_range[1][0]),
            random.randint(self.hair_color_range[0][1], self.hair_color_range[1][1]),
            random.randint(self.hair_color_range[0][2], self.hair_color_range[1][2])
        )

        for _ in range(num_hairs):
            # Randomly choose the position and size of the hair
            x1, y1 = random.randint(0, w), random.randint(0, h)
            x2, y2 = random.randint(0, w), random.randint(0, h)
            thickness = random.randint(1, 1)  # Making the hair thinner
            img = cv2.line(img, (x1, y1), (x2, y2), hair_color, thickness)

        return img

    def get_params_dependent_on_targets(self, params):
        return {}

    def get_transform_init_args_names(self):
        return ("num_hairs_range", "hair_color_range")
    
# HairAugmentation(num_hairs_range=(5, 15), hair_color_range=((0, 0, 0), (255, 255, 255)), p=1.0)

In [None]:
def transform(img, img_size):
    composition = A.Compose([
        A.Resize(img_size[0], img_size[1]),
        A.Normalize(),
        ToTensorV2(),
    ])
    return composition(image=img)["image"]

In [None]:
# class MyDataset(Dataset):
#     def __init__(self, df, transform=None, mode="train", threshold=CONFIG.smooth_threshold):
#         super().__init__()
#         self.df = df
#         self.transform = transform
#         self.mode = mode
#         self.threshold = threshold

#     def __len__(self):
#         return len(self.df)
    
#     def __getitem__(self, idx):
#         row = self.df.iloc[idx, :]
#         img_id = row.isic_id + ".jpg"
#         label = torch.tensor(row.target, dtype=torch.float32)
        
#         if use_803098:
#             if label.item() == 1:
#                 img_path = os.path.join(CONFIG.new_train_img_dir, img_id)
#             else:
#                 img_path = os.path.join(CONFIG.train_img_dir, img_id)
#         else:
#             img_path = os.path.join(CONFIG.train_img_dir, img_id)
#         img = Image.open(img_path)
#         img = np.array(img)

#         if self.transform != None:
#             img = self.transform(img)
        
#         if self.mode == "train":
#             if label == 0:
#                 # label += (self.threshold / 2)
#                 label += self.threshold
#             elif label == 1:
#                 # label -= (self.threshold / 2)
#                 label -= self.threshold
#             else:
#                 raise("label is not 0 or 1")
#         elif self.mode == "valid":
#             pass
#         else:
#             raise("mode is not train or valid")

#         return img, label

class MyDataset(Dataset):
    def __init__(self, df, transform=None, n_models=None):
        super().__init__()
        self.df = df
        self.transform = transform
        self.n_models = n_models

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx, :]
        img_id = row.isic_id + ".jpg"
        label = torch.tensor(row.target, dtype=torch.float32)
        
        img_path = os.path.join(CONFIG.train_img_dir, img_id)
        if os.path.exists(img_path) is False:
            img_path = os.path.join(CONFIG.img_dir, img_id)
        img = Image.open(img_path)
        img = np.array(img)

        if self.transform != None:
            img = self.transform(img, img_size=CONFIG.img_size[self.n_models])

        return img, label

In [None]:
def prepare_loaders(df, fold=0, n_models=0):
    df_train = df[df["kfold"] != fold]
    df_valid = valid[valid["kfold"] == fold]
    # if use_401059_val:
    #     df_valid = valid[valid["kfold"] == fold]
    # else:
    #     df_valid = df[df["kfold"] == fold]
    
    # train_datasets = MyDataset(df=df_train, transform=transform_train, mode="train")
    # valid_datasets = MyDataset(df=df_valid, transform=transform_val, mode="valid")
    train_datasets = MyDataset(df=df_train, transform=transform, n_models=n_models)
    valid_datasets = MyDataset(df=df_valid, transform=transform, n_models=n_models)
    
    train_loader = DataLoader(train_datasets, batch_size=CONFIG.train_batch_size, num_workers=CONFIG.n_workers, shuffle=True, pin_memory=True)
    valid_loader = DataLoader(valid_datasets, batch_size=CONFIG.valid_batch_size, num_workers=CONFIG.n_workers, shuffle=False, pin_memory=True)
    
    
    return train_loader, valid_loader

In [None]:
# train_loader, valid_loader = prepare_loaders(train)
# x, y = next(iter(train_loader))
# x.shape
# x, y = next(iter(valid_loader))
# y

## Evaluation

In [None]:
def compute_pAUC(y_true, y_scores, min_tpr=0.8):
    y_hat = y_scores
    if len(np.unique(y_true)) == 1:
        return 0.0
    min_tpr = min_tpr
    max_fpr = abs(1 - min_tpr)
    
    v_gt = abs(y_true - 1)
    v_pred = np.array([1.0 - x for x in y_hat])
    
    partial_auc_scaled = roc_auc_score(v_gt, v_pred, max_fpr=max_fpr)
    partial_auc = 0.5 * max_fpr**2 + (max_fpr - 0.5 * max_fpr**2) / (1.0 - 0.5) * (partial_auc_scaled - 0.5)
    
    return partial_auc

y_true = np.array([0.0, 0.0, 1.0, 1.0])
y_scores = np.array([0.0, 0.0, 0.9, 0.1])

pAUC = compute_pAUC(y_true, y_scores)
print(f"pAUC: {pAUC:.4f}")

## DL Model

In [None]:
def updata_req_grad(models, requires_grad=True):
    for model in models:
        for param in model.parameters():
            param.requires_grad = requires_grad

In [None]:
class up2to4(nn.Module):
    def __init__(self):
        super(up2to4, self).__init__()
        
    def forward(self, x):
        shape = x.shape
        return x.reshape(shape[0], shape[1], 1, 1)

class GeMPool(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeMPool, self).__init__()
        self.p = nn.Parameter(torch.ones(1) * p)
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)
    
    def gem(self, x, p=3, eps=1e-6):
        return torch.mean(x.clamp(min=eps).pow(p), dim=(-2, -1)).pow(1./p)
    
    def __repr__(self):
        return self.__class__.__name__ + f'(p={self.p.data.tolist()[0]:.4f}, eps={self.eps})'

In [None]:
class ensemblelinear(nn.Module):
    def __init__(self, in_features, out_features=1) -> None:
        super().__init__()
        self.model = nn.Linear(in_features, in_features, bias=False)
        self.softmax = nn.Softmax()
        self.out_features = out_features

    def forward(self, x):
        Identity = x
        _tmp = self.model(x)
        _tmp = self.softmax(_tmp)
        _tmp = Identity * _tmp
        output = _tmp.sum(dim=1, keepdim=True)
        return output

In [None]:
class ISIC2024Model(nn.Module):
    def __init__(self, model_name=CONFIG.model_names[0]):
        super(ISIC2024Model, self).__init__()
        self.backbone = timm.create_model(model_name=model_name, 
                                          pretrained=False)
        
        if "efficientnet" in model_name:
            in_features = self.backbone.classifier.in_features
            self.backbone.classifier = nn.Identity()
            if CONFIG.use_gempool:
                self.backbone.global_pool = GeMPool()
        elif "convnext" in model_name or "tiny_vit" in model_name:
            in_features = self.backbone.head.fc.in_features
            self.backbone.head.fc = nn.Identity()
        elif "eva" in model_name:
            in_features = 768
        elif "efficientvit" in model_name:
            in_features = self.backbone.head.classifier[4].in_features
            self.backbone.head.classifier[4] = nn.Identity()
        elif "edgenext" in model_name:
            in_features = self.backbone.head.fc.in_features
            self.backbone.head.fc = nn.Identity()

        self.head = nn.Sequential(
            nn.Linear(in_features, CONFIG.n_classes)
        )
        
    def forward(self, x):
        _tmp = self.backbone(x)
        output = self.head(_tmp)
        return output

## Load DL Models

In [None]:
from collections import OrderedDict

def remove_module(path):
    # 加载保存的状态字典
    state_dict = torch.load(path)

    # 使用 OrderedDict 去掉 'module.' 前缀
    new_state_dict = OrderedDict()
    for key, value in state_dict.items():
        new_state_dict[key.replace('module.', '')] = value
    return new_state_dict

In [None]:
more_models = []

for n_models in range(len(CONFIG.model_names)):
    models = []
    all_paths = sorted(os.listdir(CONFIG.ckpt_path[n_models]))
    paths = []
    for i in range(CONFIG.n_folds):
        _tmp_paths = []
        for path in all_paths:
            if path[0] == str(i+1):
                _tmp_paths.append(path)
        paths.append(_tmp_paths[-1])
    # paths = ["1_CV0.1547_Loss0.6784_epoch2.bin",
    #          "2_CV0.1708_Loss0.5088_epoch5.bin",
    #          "3_CV0.1791_Loss0.3739_epoch7.bin",
    #          "4_CV0.1654_Loss0.5382_epoch3.bin",
    #          "5_CV0.1812_Loss0.4236_epoch8.bin"]

    if CONFIG.DataParallel:
        device_ids = [0, 1]
        for i in range(CONFIG.n_folds):
            model = ISIC2024Model(model_name=CONFIG.model_names[n_models])
            model = torch.nn.DataParallel(model, device_ids=device_ids)
            model = model.cuda()
            model.load_state_dict(torch.load(os.path.join(CONFIG.ckpt_path[n_models], paths[i])))
            print(f"fold : {i} --------- path : {paths[i]}")
            model.eval()
            models.append(model)
    else:
        for i in range(CONFIG.n_folds):
            model = ISIC2024Model(model_name=CONFIG.model_names[n_models])
            model = model.cuda()
            model.load_state_dict(remove_module(os.path.join(CONFIG.ckpt_path[n_models], paths[i])))
            print(f"fold : {i} --------- path : {paths[i]}")
            model.eval()
            models.append(model)
    more_models.append(models)
    print(f"{CONFIG.model_names[n_models]} load success.")

## Train and Valid Function

In [None]:
criterion = nn.BCELoss()

"""
a = torch.tensor([0, 0, 1, 1]).float()
b = torch.tensor([0.7, 0.2, 0.5, 0.3]).float()
c = torch.tensor([[0.7], [0.2], [0.5], [0.3]]).float()
criterion(a, b)
"""

In [None]:
def train_one_epoch(model, dl_models, optimizer, scheduler, train_loader, epoch):
    model.train()
    
    y_preds = []
    y_trues = []
    
    dataset_size = 0
    running_loss = 0.0
    bar = tqdm(enumerate(train_loader), total=len(train_loader))
    for step, (images, labels) in bar:
        optimizer.zero_grad()
        
        batch_size = images.size(0)
        if CONFIG.DataParallel:
            images = images.cuda().float()
            labels = labels.cuda().float()
        else:
            images = images.to(CONFIG.device, dtype=torch.float)
            labels = labels.to(CONFIG.device, dtype=torch.float)
            
        ensemble_input = []
        with torch.no_grad():
            for dl_model in dl_models:
                outputs = dl_model(images)
                outputs = F.sigmoid(outputs)
                ensemble_input.append(outputs)
            ensemble_input = torch.cat(ensemble_input, axis=1)

        outputs = model(ensemble_input)
        loss = criterion(outputs.flatten(), labels) / CONFIG.n_accumulate
        loss.backward()
        
        if (step + 1) % CONFIG.n_accumulate == 0:
            optimizer.step()

            # zero the parameter gradients
            optimizer.zero_grad()

            if scheduler is not None:
                scheduler.step()
        y_preds.append(outputs.flatten().detach().cpu().numpy())
        y_trues.append(labels.detach().cpu().numpy())

        train_cv = compute_pAUC(np.concatenate(y_trues).round(), np.concatenate(y_preds))

        running_loss += (loss.item() * batch_size)

        dataset_size += batch_size
        
        epoch_loss = running_loss / dataset_size
        
        bar.set_postfix(Epoch=epoch,
                        Train_Loss=epoch_loss,
                        Train_CV_pAUC=train_cv,
                        LR=optimizer.param_groups[0]['lr'])
    # Ensure that a parameter update is performed after the last accumulation cycle
    if (step + 1) % CONFIG.n_accumulate != 0:
        optimizer.step()
        optimizer.zero_grad()
        if scheduler is not None:
                scheduler.step()
        
    return epoch_loss, train_cv

In [None]:
@torch.inference_mode()
def valid_one_epoch(model, dl_models, optimizer, valid_loader, epoch):
    model.eval()
    
    y_preds = []
    y_trues = []
    dataset_size = 0
    running_loss = 0.0
    bar = tqdm(enumerate(valid_loader), total=len(valid_loader))
    with torch.no_grad():
        for step, (images, labels) in bar:
            batch_size = images.size(0)
            if CONFIG.DataParallel:
                images = images.cuda().float()
                labels = labels.cuda().float()
            else:
                images = images.to(CONFIG.device, dtype=torch.float)
                labels = labels.to(CONFIG.device, dtype=torch.float)

            ensemble_input = []
            for dl_model in dl_models:
                outputs = dl_model(images)
                outputs = F.sigmoid(outputs)
                ensemble_input.append(outputs)
            ensemble_input = torch.cat(ensemble_input, axis=1)

            outputs = model(ensemble_input)
            loss = criterion(outputs.flatten(), labels) / CONFIG.n_accumulate

            y_preds.append(outputs.flatten().detach().cpu().numpy())
            y_trues.append(labels.detach().cpu().numpy())
            valid_cv = compute_pAUC(np.concatenate(y_trues), np.concatenate(y_preds))
        
            running_loss += (loss.item() * batch_size)

            dataset_size += batch_size

            epoch_loss = running_loss / dataset_size

            bar.set_postfix(Epoch=epoch,
                            Valid_Loss=epoch_loss,
                            Valid_CV_pAUC=valid_cv,
                            LR=optimizer.param_groups[0]['lr'])
        

        y_preds = np.concatenate(y_preds)
        y_trues = np.concatenate(y_trues)
        cv = compute_pAUC(y_trues, y_preds) 
    
    return epoch_loss, cv

In [None]:
# Get the current time stamp
current_time = time.time()
print("Current timestamp:", current_time)

# Convert a timestamp to a local time structure
local_time = time.localtime(current_time)

# Formatting local time
CONFIG.formatted_time = time.strftime('%Y-%m-%d_%H:%M:%S', local_time)
print("Current time:", CONFIG.formatted_time)

CONFIG.ckpt_save_path = f"output/{CONFIG.formatted_time}_{CONFIG.model_name}_output"
if os.path.exists(CONFIG.ckpt_save_path) is False:
    os.makedirs(CONFIG.ckpt_save_path)

In [None]:
def run_training(fold, model, dl_models, optimizer, scheduler, train_loader, valid_loader, num_epochs=CONFIG.epochs, now_cv=CONFIG.now_cv):
    if torch.cuda.is_available():
        print("[INFO] Using GPU: {} x {}\n".format(torch.cuda.get_device_name(), torch.cuda.device_count()))
    
    start = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_epoch_cv = now_cv
    best_model_path = None
    history = defaultdict(list)
    
    for epoch in range(1, num_epochs + 1):
        gc.collect()
        train_epoch_loss, train_epoch_cv = train_one_epoch(model, dl_models, optimizer, scheduler, train_loader, epoch)
        valid_epoch_loss, valid_epoch_cv = valid_one_epoch(model, dl_models, optimizer, valid_loader, epoch)
        print(f"epoch: {epoch}, LOSS = {valid_epoch_loss}, CV = {valid_epoch_cv}")
        
        history['Train Loss'].append(train_epoch_loss)
        history['Valid Loss'].append(valid_epoch_loss)
        history['Train CV'].append(train_epoch_cv)
        history['Valid CV'].append(valid_epoch_cv)
        history['lr'].append(scheduler.get_lr()[0])
        
        # deep copy the model
        if valid_epoch_cv >= best_epoch_cv:
            print(f"{b_}epoch: {epoch}, Validation CV Improved ({best_epoch_cv} ---> {valid_epoch_cv}))")
            best_epoch_cv = valid_epoch_cv
            best_model_wts = copy.deepcopy(model.state_dict())
            PATH = "./{}/{}_CV_{:.4f}_Loss{:.4f}_epoch{:.0f}.bin".format(CONFIG.ckpt_save_path, fold, best_epoch_cv, valid_epoch_loss, epoch)
            best_model_path = PATH
            torch.save(model.state_dict(), PATH)
            print(f"Model Saved{sr_}")
            
        print()
    
    end = time.time()
    time_elapsed = end - start
    print('Training complete in {:.0f}h {:.0f}m {:.0f}s'.format(
        time_elapsed // 3600, (time_elapsed % 3600) // 60, (time_elapsed % 3600) % 60))
    print("Best CV: {:.4f}".format(best_epoch_cv))

    # load best model weights
    model.load_state_dict(best_model_wts)

    return model, history, best_model_path

## Optimizer

In [None]:
class CosineAnnealingWithWarmupLR(_LRScheduler):
    def __init__(self, optimizer, T_max, eta_min=0, warmup_epochs=10, last_epoch=-1):
        self.T_max = T_max
        self.eta_min = eta_min
        self.warmup_epochs = warmup_epochs
        self.cosine_epochs = T_max - warmup_epochs
        super(CosineAnnealingWithWarmupLR, self).__init__(optimizer, last_epoch)

    def get_lr(self):
        if self.last_epoch < self.warmup_epochs:
            # Linear warmup
            return [(base_lr * (self.last_epoch + 1) / self.warmup_epochs) for base_lr in self.base_lrs]
        else:
            # Cosine annealing
            cosine_epoch = self.last_epoch - self.warmup_epochs
            return [self.eta_min + (base_lr - self.eta_min) * (1 + math.cos(math.pi * cosine_epoch / self.cosine_epochs)) / 2 for base_lr in self.base_lrs]


In [None]:
# The learning rate decreases with training
def fetch_scheduler(optimizer, T_max, min_lr):
    if CONFIG.scheduler == 'CosineAnnealingLR':
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer,T_max=T_max, 
                                                   eta_min=min_lr)
    elif CONFIG.scheduler == "CosineAnnealingWithWarmupLR":
        scheduler = CosineAnnealingWithWarmupLR(optimizer, T_max=T_max, eta_min=min_lr, warmup_epochs=T_max//CONFIG.train_batch_size)
        
    elif CONFIG.scheduler == None:
        return None
        
    return scheduler

In [None]:
# optimizer = torch.optim.AdamW(model.parameters(), lr=CONFIG.learning_rate, 
#                               weight_decay=CONFIG.weight_decay)
# scheduler = fetch_scheduler(optimizer, T_max=CONFIG.T_max, min_lr=CONFIG.min_lr)

## Start Training

In [None]:
# Open the file and open it in write mode 'w' to write cfg information
with open(f'{CONFIG.ckpt_save_path}/info.txt', 'w') as file:
    # Write a string to a file
    if use_1954_train:
        file.write(f'train on 1954\n')
    elif use_1977_train:
        file.write(f'train on 1977\n')
    elif use_10999_train:
        file.write(f'train on 10999\n')
    elif use_401059_train:
        file.write(f'train on 401059\n')
    else:
        file.write(f'train on 793\n')

    if use_401059_val:
        file.write(f'valid on 401059\n')
        
    file.write(f'seed: {CONFIG.seed}\n')
    file.write(f'epochs: {CONFIG.epochs}\n')
    file.write(f'train_batch_size: {CONFIG.train_batch_size}\n')
    file.write(f'valid_batch_size: {CONFIG.valid_batch_size}\n')
    file.write(f'img_size: {CONFIG.img_size}\n')
    file.write(f'n_classes: {CONFIG.n_classes}\n')
    file.write(f'n_folds: {CONFIG.n_folds}\n')
    file.write(f'learning_rate: {CONFIG.learning_rate}\n')
    file.write(f'model_name: {CONFIG.model_name}\n')
    file.write(f'use_gempool: {CONFIG.use_gempool}\n')
    file.write(f'smooth_threshold: {CONFIG.smooth_threshold}\n')
    file.write(f'model_names: {CONFIG.model_names}\n')
    file.write(f'ckpt_path: {CONFIG.ckpt_path}\n')

In [None]:
oof = []
true = []
historys = []

for fold in range(0, CONFIG.n_folds):
    print(f"==================== Train on Fold {fold+1} ====================")
    del model
    torch.cuda.empty_cache()
    model = ensemblelinear(in_features=len(CONFIG.model_names))
    if CONFIG.DataParallel:
        device_ids = [0, 1] # Two graphics cards with IDs 0 and 1
        model = torch.nn.DataParallel(model, device_ids=device_ids)
        model = model.cuda()
    else:
        model = model.to(CONFIG.device)
    dl_models = []
    for models in more_models:
        dl_models.append(models[fold])
        
    optimizer = torch.optim.AdamW(model.parameters(), lr=CONFIG.learning_rate, 
                                  weight_decay=CONFIG.weight_decay)
    scheduler = fetch_scheduler(optimizer, T_max=CONFIG.T_max[fold], min_lr=CONFIG.min_lr)
    
    train_loader, valid_loader = prepare_loaders(new_train, fold, 0)
    model, history, best_model_path = run_training(fold+1, model, dl_models, optimizer, scheduler, 
                                                   train_loader, valid_loader, 
                                                   num_epochs=CONFIG.epochs, now_cv=CONFIG.now_cv)
    historys.append(history)
    
    bar = tqdm(enumerate(valid_loader), total=len(valid_loader))
    with torch.no_grad():
        for step, (images, labels) in bar:
            batch_size = images.size(0)
            if CONFIG.DataParallel:
                images = images.cuda().float()
                labels = labels.cuda().float()
            else:
                images = images.to(CONFIG.device, dtype=torch.float)
                labels = labels.to(CONFIG.device, dtype=torch.float)

            ensemble_input = []
            for dl_model in dl_models:
                outputs = dl_model(images)
                outputs = F.sigmoid(outputs)
                ensemble_input.append(outputs)
            ensemble_input = torch.cat(ensemble_input, axis=1)

            outputs = model(ensemble_input)

            oof.append(outputs.flatten().detach().cpu().numpy())
            true.append(labels.detach().cpu().numpy())
        print()

oof = np.concatenate(oof)
true = np.concatenate(true)

## Local CV

In [None]:
local_cv = compute_pAUC(true, oof)
print("Local CV : ", local_cv)

In [None]:
# np.save(f"result_analysis/{CONFIG.formatted_time}_{CONFIG.model_name}.npy", oof)

In [None]:
# Open the file in append mode 'a'
with open(f'{CONFIG.ckpt_save_path}/info.txt', 'a') as file:
    # Append a string to a file
    file.write(f'cv: {local_cv}\n')

## Logs

In [None]:
fold = 0
history = historys[fold]

In [None]:
plt.plot( range(len(history["Train Loss"])), history["Train Loss"], label="Train Loss")
plt.plot( range(len(history["Valid Loss"])), history["Valid Loss"], label="Valid Loss")
plt.xlabel("epochs")
plt.ylabel("Loss")
plt.grid()
plt.legend()
plt.show()

In [None]:
plt.plot( range(len(history["Train CV"])), history["Train CV"], label="Train CV")
plt.plot( range(len(history["Valid CV"])), history["Valid CV"], label="Valid CV")
plt.xlabel("epochs")
plt.ylabel("CV or AUC")
plt.grid()
plt.legend()
plt.show()

In [None]:
plt.plot( range(len(history["lr"])), history["lr"], label="lr")
plt.xlabel("epochs")
plt.ylabel("lr")
plt.grid()
plt.legend()
plt.show()