In [1]:
pip install torcheval

Collecting torcheval
  Downloading torcheval-0.0.7-py3-none-any.whl.metadata (8.6 kB)
Downloading torcheval-0.0.7-py3-none-any.whl (179 kB)
Installing collected packages: torcheval
Successfully installed torcheval-0.0.7
Note: you may need to restart the kernel to use updated packages.




In [1]:
import os
import gc
import cv2
import math
import copy
import time
import random
import glob
from matplotlib import pyplot as plt

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp
import torchvision
from torcheval.metrics.functional import binary_auroc

# Utils
import joblib
from tqdm import tqdm
from collections import defaultdict

# Sklearn Imports
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold, StratifiedGroupKFold 

# For Image Models
import timm

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

# For colored terminal text
from colorama import Fore, Back, Style
b_ = Fore.BLUE
sr_ = Style.RESET_ALL

import warnings
warnings.filterwarnings("ignore")

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

  from .autonotebook import tqdm as notebook_tqdm


### Training Configuration

In [2]:
CONFIG = {
    "seed": 42,
    "epochs": 50,
    "img_size": 384,
    "model_name": "tf_efficientnet_b0_ns",
    "checkpoint_path" : "../Kaggle-Skin-Cancer/tf_efficientnet_b0_aa-827b6e33.pth",
    "train_batch_size": 32,
    "valid_batch_size": 64,
    "learning_rate": 1e-4,
    "scheduler": 'CosineAnnealingLR',
    "min_lr": 1e-6,
    "T_max": 500,
    "weight_decay": 1e-6,
    "fold" : 0,
    "n_fold": 5,
    "n_accumulate": 1,
    "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
}

### Set Seed 
Reproducibility

In [3]:
def set_seed(seed=42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    
set_seed(CONFIG['seed'])

In [4]:
ROOT_DIR = "../isic-2024-challenge"
TRAIN_DIR = f'{ROOT_DIR}/train-image/image'

In [5]:
def get_train_file_path(image_id):
    return f"{TRAIN_DIR}/{image_id}.jpg"

### Read Image Data

In [31]:
train_images = sorted(glob.glob(f'{TRAIN_DIR}/*.jpg'))
train_images = ['../isic-2024-challenge/train-image/image/' + i.split('/')[-1][6:] for i in train_images]
train_images

['../isic-2024-challenge/train-image/image/ISIC_0015670.jpg',
 '../isic-2024-challenge/train-image/image/ISIC_0015845.jpg',
 '../isic-2024-challenge/train-image/image/ISIC_0015864.jpg',
 '../isic-2024-challenge/train-image/image/ISIC_0015902.jpg',
 '../isic-2024-challenge/train-image/image/ISIC_0024200.jpg',
 '../isic-2024-challenge/train-image/image/ISIC_0035502.jpg',
 '../isic-2024-challenge/train-image/image/ISIC_0051648.jpg',
 '../isic-2024-challenge/train-image/image/ISIC_0051665.jpg',
 '../isic-2024-challenge/train-image/image/ISIC_0051710.jpg',
 '../isic-2024-challenge/train-image/image/ISIC_0051758.jpg',
 '../isic-2024-challenge/train-image/image/ISIC_0051812.jpg',
 '../isic-2024-challenge/train-image/image/ISIC_0051822.jpg',
 '../isic-2024-challenge/train-image/image/ISIC_0051896.jpg',
 '../isic-2024-challenge/train-image/image/ISIC_0051897.jpg',
 '../isic-2024-challenge/train-image/image/ISIC_0051958.jpg',
 '../isic-2024-challenge/train-image/image/ISIC_0051983.jpg',
 '../isi

In [32]:
df = pd.read_csv('../isic-2024-challenge/train-metadata.csv')

print("        df.shape, # of positive cases, # of patients")
print("original>", df.shape, df.target.sum(), df["patient_id"].unique().shape)

df_positive = df[df["target"] == 1].reset_index(drop=True)
df_negative = df[df["target"] == 0].reset_index(drop=True)

df = pd.concat([df_positive, df_negative.iloc[:df_positive.shape[0]*20, :]])  # positive:negative = 1:20
print("filtered>", df.shape, df.target.sum(), df["patient_id"].unique().shape)

df['file_path'] = df['isic_id'].apply(get_train_file_path)
df = df[ df["file_path"].isin(train_images) ].reset_index(drop=True)
df

        df.shape, # of positive cases, # of patients
original> (401059, 55) 393 (1042,)
filtered> (8253, 55) 393 (950,)


Unnamed: 0,isic_id,target,patient_id,age_approx,sex,anatom_site_general,clin_size_long_diam_mm,image_type,tbp_tile_type,tbp_lv_A,...,iddx_full,iddx_1,iddx_2,iddx_3,iddx_4,iddx_5,mel_mitotic_index,mel_thick_mm,tbp_lv_dnn_lesion_confidence,file_path
0,ISIC_0082829,1,IP_3249371,80.0,female,anterior torso,9.27,TBP tile: close-up,3D: XP,18.093368,...,Malignant::Malignant epidermal proliferations:...,Malignant,Malignant epidermal proliferations,Squamous cell carcinoma in situ,,,,,72.211683,../isic-2024-challenge/train-image/image/ISIC_...
1,ISIC_0096034,1,IP_6723298,75.0,male,head/neck,3.88,TBP tile: close-up,3D: white,26.187010,...,Malignant::Malignant adnexal epithelial prolif...,Malignant,Malignant adnexal epithelial proliferations - ...,Basal cell carcinoma,"Basal cell carcinoma, Nodular",,,,94.559590,../isic-2024-challenge/train-image/image/ISIC_...
2,ISIC_0104229,1,IP_9057861,80.0,male,anterior torso,6.55,TBP tile: close-up,3D: white,33.480140,...,Malignant::Malignant adnexal epithelial prolif...,Malignant,Malignant adnexal epithelial proliferations - ...,Basal cell carcinoma,"Basal cell carcinoma, Nodular",,,,66.196170,../isic-2024-challenge/train-image/image/ISIC_...
3,ISIC_0119495,1,IP_6856511,60.0,male,upper extremity,5.27,TBP tile: close-up,3D: white,25.872050,...,Malignant::Malignant epidermal proliferations:...,Malignant,Malignant epidermal proliferations,"Squamous cell carcinoma, Invasive",,,,,99.991270,../isic-2024-challenge/train-image/image/ISIC_...
4,ISIC_0157834,1,IP_3927284,55.0,male,posterior torso,5.29,TBP tile: close-up,3D: white,30.966662,...,Malignant::Malignant adnexal epithelial prolif...,Malignant,Malignant adnexal epithelial proliferations - ...,Basal cell carcinoma,"Basal cell carcinoma, Superficial",,,,99.960846,../isic-2024-challenge/train-image/image/ISIC_...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8248,ISIC_0267522,0,IP_9577633,50.0,male,lower extremity,4.94,TBP tile: close-up,3D: XP,21.738880,...,Benign,Benign,,,,,,,96.325510,../isic-2024-challenge/train-image/image/ISIC_...
8249,ISIC_0267560,0,IP_7746572,80.0,male,anterior torso,9.32,TBP tile: close-up,3D: white,18.481750,...,Benign,Benign,,,,,,,97.022320,../isic-2024-challenge/train-image/image/ISIC_...
8250,ISIC_0267568,0,IP_0379091,60.0,male,anterior torso,3.45,TBP tile: close-up,3D: white,13.901465,...,Benign,Benign,,,,,,,72.961688,../isic-2024-challenge/train-image/image/ISIC_...
8251,ISIC_0267594,0,IP_1433033,70.0,,lower extremity,3.00,TBP tile: close-up,3D: XP,17.986882,...,Benign,Benign,,,,,,,99.899274,../isic-2024-challenge/train-image/image/ISIC_...


In [33]:
df.shape[0], df.target.sum()

(8253, 393)

In [34]:
CONFIG['T_max'] = df.shape[0] * (CONFIG["n_fold"]-1) * CONFIG['epochs'] // CONFIG['train_batch_size'] // CONFIG["n_fold"]
CONFIG['T_max']

10316