In [1]:
# !pip install timm==1.0.9
# !pip install albumentations==1.4.14
# !pip install torcheval==0.0.7
# !pip install pandas==2.2.2
# !pip install numpy==1.26.4

In [8]:
import sys, os, time, copy, gc
import torch
from torch import nn
from torch.utils.data import DataLoader
from pathlib import Path

import numpy as np
import pandas as pd
import albumentations as A
from albumentations.pytorch import ToTensorV2
import multiprocessing as mp

from torcheval.metrics.functional import binary_auroc, multiclass_auroc

from sklearn.model_selection import StratifiedGroupKFold

import hashlib
from joblib import Parallel, delayed
from sklearn.model_selection import train_test_split

from PIL import Image
import torch.optim as optim

from collections import defaultdict




sys.path.append('./src')
from utils import set_seed, visualize_augmentations_positive, print_trainable_parameters
from models import setup_model
from training import fetch_scheduler, train_one_epoch, valid_one_epoch, run_training, get_nth_test_step
from models import ISICModel, ISICModelEdgnet, setup_model
from datasets import ISICDatasetSamplerW, ISICDatasetSampler, ISICDatasetSimple, ISICDatasetSamplerMulticlass, prepare_loaders
from augmentations import get_augmentations

In [9]:
# Set up device and random seed
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Number of GPUs: {torch.cuda.device_count()}")

Using device: cuda
GPU: Tesla T4
Number of GPUs: 1


In [10]:
original_data_path = "/data/original"
original_root = Path('/data/original')

data_artifacts = "/data/artifacts"
os.makedirs(data_artifacts, exist_ok=True)

In [11]:
# Set the HDF5 file path
TRAIN_HDF5_FILE_PATH = original_root / 'train-image.hdf5'

train_path = original_root / 'train-metadata.csv'
df_train = pd.read_csv(train_path)
df_train["path"] = '/data/original/train-image/image/' + df_train['isic_id'] + ".jpg"
original_positive_cases = df_train['target'].sum()
original_total_cases = len(df_train)
original_positive_ratio = original_positive_cases / original_total_cases

print(f"Number of positive cases: {original_positive_cases}")
print(f"Number of negative cases: {original_total_cases - original_positive_cases}")
print(f"Ratio of negative to positive cases: {(original_total_cases - original_positive_cases) / original_positive_cases:.2f}:1")

  df_train = pd.read_csv(train_path)


Number of positive cases: 393
Number of negative cases: 400666
Ratio of negative to positive cases: 1019.51:1


In [None]:
MODEL_NAME = "EDGENEXT" # "EVA"


CONFIG = {
    "seed": 42 if MODEL_NAME == 'EVA' else 1997,
    "epochs": 500,
    "img_size": 336 if MODEL_NAME == 'EVA' else 256,
    "train_batch_size": 32,
    "valid_batch_size": 64,
    "learning_rate": 1e-4,
    "scheduler": 'CosineAnnealingLR',
    "min_lr": 1e-6,
    "T_max": 2000,
    "weight_decay": 1e-6,
    "fold" : 0,
    "n_fold": 5,
    "n_accumulate": 1,
    "group_col": 'patient_id',
    "device": device
}

model_name = "eva02_small_patch14_336.mim_in22k_ft_in1k" if MODEL_NAME == 'EVA' else "edgenext_base.in21k_ft_in1k"
checkpoint_path = None


if MODEL_NAME == 'EVA':
    ISICModelPrep = ISICModel
else:
    ISICModelPrep = ISICModelEdgnet

In [13]:
data_transforms = get_augmentations(CONFIG)

  A.GaussNoise(var_limit=(5.0, 30.0)),
  original_init(self, **validated_kwargs)
  A.CoarseDropout(


In [14]:
def criterion(outputs, targets):
    return nn.BCELoss()(outputs, targets)

In [8]:
# synthetic_custom_data = f"../data/artifacts/syntetic_custom_base_{CONFIG['seed']}"
# os.makedirs(synthetic_custom_data, exist_ok=True)

# tsp = StratifiedGroupKFold(2, shuffle=True, random_state=CONFIG['seed'])
# metrics_ev_df = []
# test_forecast = []
# val_forecast = []
# for fold_n, (train_index, val_index) in enumerate(tsp.split(df_train, y=df_train.target, groups=df_train[CONFIG["group_col"]])):
#     fold_df_train = df_train.iloc[train_index].reset_index(drop=True)
#     fold_df_valid = df_train.iloc[val_index].reset_index(drop=True)
#     synthetic_custom_data_pr = os.path.join(synthetic_custom_data, str(fold_n))
#     os.makedirs(synthetic_custom_data_pr, exist_ok=True)

#     for fn in fold_df_train[fold_df_train.target==1].isic_id.values:
#         if fn not in images_to_include:
#             continue
#         img = Image.open(os.path.join('../data/original/train-image/image', fn + ".jpg"))
#         img.save(os.path.join(synthetic_custom_data_pr, fn + ".png"))
    

In [26]:
folder_name = f"./models/oof_{MODEL_NAME.lower()}_base"
os.makedirs(folder_name, exist_ok=True)

In [29]:
def get_metrics(drop_path_rate, drop_rate, models_folder, model_maker):
    tsp = StratifiedGroupKFold(5, shuffle=True, random_state=CONFIG['seed'])
    results_list = []
    fold_df_valid_list = []
    for fold_n, (train_index, val_index) in enumerate(tsp.split(df_train, y=df_train.target, groups=df_train[CONFIG["group_col"]])):
        fold_df_train = df_train.iloc[train_index].reset_index(drop=True)
        fold_df_valid = df_train.iloc[val_index].reset_index(drop=True)
        set_seed(CONFIG['seed'])
        model = setup_model(model_name, drop_path_rate=drop_path_rate, drop_rate=drop_rate, model_maker=model_maker)
        print_trainable_parameters(model)

        train_loader, valid_loader = prepare_loaders(fold_df_train, fold_df_valid, CONFIG, data_transforms)
    
        optimizer = optim.Adam(model.parameters(), lr=CONFIG['learning_rate'], 
                           weight_decay=CONFIG['weight_decay'])
        scheduler = fetch_scheduler(optimizer, CONFIG)
    
        model, history = run_training(
            train_loader, valid_loader,
            model, optimizer, scheduler,
            device=CONFIG['device'],
            num_epochs=CONFIG['epochs'],
            CONFIG=CONFIG, 
            tolerance_max=20,
            test_every_nth_step=lambda x: 5,
            seed=CONFIG['seed'])
        torch.save(model.state_dict(), os.path.join(models_folder, f"model__{fold_n}"))
        results_list.append(np.max(history['Valid Kaggle metric']))

        val_epoch_loss, val_epoch_auroc, val_epoch_custom_metric, tmp_predictions_all, tmp_targets_all = valid_one_epoch(
            model, 
            valid_loader, 
            device=CONFIG['device'], 
            epoch=1, 
            optimizer=optimizer, 
            criterion=criterion, 
            use_custom_score=True,
            metric_function=binary_auroc, 
            num_classes=1,
            return_preds=True)

        fold_df_valid['tmp_targets_all'] = tmp_targets_all
        fold_df_valid['tmp_predictions_all'] = tmp_predictions_all
        fold_df_valid['fold_n'] = fold_n
        fold_df_valid_list.append(fold_df_valid)
    fold_df_valid_list = pd.concat(fold_df_valid_list).reset_index(drop=True)
    return results_list, fold_df_valid_list

In [None]:
base_metrics, oof_forecasts = get_metrics(drop_path_rate=0, drop_rate=0, models_folder=folder_name, model_maker=ISICModelPrep)
oof_forecasts.to_parquet(f'/data/artifacts/oof_forecasts_{MODEL_NAME.lower()}_base.parquet')

model.safetensors:   0%|          | 0.00/88.5M [00:00<?, ?B/s]

trainable params: 21744385 || all params: 21744385 || trainable%: 100.00


100%|█████████████████████████████████████████████████| 19/19 [00:27<00:00,  1.43s/it, Epoch=1, LR=0.0001, Train_Auroc=0.494, Train_Loss=0.779]





100%|████████████████████████████████████████████████| 19/19 [00:23<00:00,  1.25s/it, Epoch=2, LR=9.99e-5, Train_Auroc=0.507, Train_Loss=0.708]





100%|████████████████████████████████████████████████| 19/19 [00:23<00:00,  1.26s/it, Epoch=3, LR=9.98e-5, Train_Auroc=0.575, Train_Loss=0.695]





100%|████████████████████████████████████████████████| 19/19 [00:24<00:00,  1.27s/it, Epoch=4, LR=9.96e-5, Train_Auroc=0.572, Train_Loss=0.704]





100%|████████████████████████████████████████████████| 19/19 [00:24<00:00,  1.28s/it, Epoch=5, LR=9.94e-5, Train_Auroc=0.556, Train_Loss=0.695]
100%|████████████████████████████████████████████| 1112/1112 [13:21<00:00,  1.39it/s, Epoch=5, LR=9.94e-5, Valid_Auroc=0.508, Valid_Loss=0.541]
  _warn_get_lr_called_within_step(self)


Validation AUROC Improved (-inf ---> 0.05113914670231129)



100%|████████████████████████████████████████████████| 19/19 [00:24<00:00,  1.28s/it, Epoch=6, LR=9.92e-5, Train_Auroc=0.634, Train_Loss=0.696]





100%|████████████████████████████████████████████████| 19/19 [00:24<00:00,  1.28s/it, Epoch=7, LR=9.89e-5, Train_Auroc=0.667, Train_Loss=0.672]





100%|████████████████████████████████████████████████| 19/19 [00:24<00:00,  1.27s/it, Epoch=8, LR=9.86e-5, Train_Auroc=0.721, Train_Loss=0.664]





100%|█████████████████████████████████████████████████| 19/19 [00:24<00:00,  1.27s/it, Epoch=9, LR=9.82e-5, Train_Auroc=0.75, Train_Loss=0.616]





100%|███████████████████████████████████████████████| 19/19 [00:24<00:00,  1.27s/it, Epoch=10, LR=9.78e-5, Train_Auroc=0.809, Train_Loss=0.562]
100%|███████████████████████████████████████████████| 19/19 [00:24<00:00,  1.27s/it, Epoch=12, LR=9.69e-5, Train_Auroc=0.829, Train_Loss=0.535]





100%|███████████████████████████████████████████████| 19/19 [00:24<00:00,  1.26s/it, Epoch=13, LR=9.63e-5, Train_Auroc=0.848, Train_Loss=0.507]





100%|███████████████████████████████████████████████| 19/19 [00:24<00:00,  1.27s/it, Epoch=14, LR=9.57e-5, Train_Auroc=0.855, Train_Loss=0.498]





100%|███████████████████████████████████████████████| 19/19 [00:24<00:00,  1.27s/it, Epoch=15, LR=9.51e-5, Train_Auroc=0.882, Train_Loss=0.455]
100%|███████████████████████████████████████████| 1112/1112 [13:10<00:00,  1.41it/s, Epoch=15, LR=9.51e-5, Valid_Auroc=0.528, Valid_Loss=0.386]
  _warn_get_lr_called_within_step(self)


Validation AUROC Improved (0.12893225665001556 ---> 0.13083583754694925)



100%|█████████████████████████████████████████████████| 19/19 [00:24<00:00,  1.28s/it, Epoch=16, LR=9.45e-5, Train_Auroc=0.9, Train_Loss=0.415]





100%|███████████████████████████████████████████████| 19/19 [00:24<00:00,  1.28s/it, Epoch=17, LR=9.38e-5, Train_Auroc=0.887, Train_Loss=0.448]





100%|█████████████████████████████████████████████████| 19/19 [00:24<00:00,  1.28s/it, Epoch=18, LR=9.3e-5, Train_Auroc=0.89, Train_Loss=0.431]





100%|███████████████████████████████████████████████| 19/19 [00:24<00:00,  1.28s/it, Epoch=19, LR=9.23e-5, Train_Auroc=0.901, Train_Loss=0.419]





100%|███████████████████████████████████████████████| 19/19 [00:24<00:00,  1.28s/it, Epoch=20, LR=9.14e-5, Train_Auroc=0.877, Train_Loss=0.485]
  1%|▏                                             | 6/1112 [00:04<13:30,  1.36it/s, Epoch=20, LR=9.14e-5, Valid_Auroc=0.583, Valid_Loss=0.549]