In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from PIL import Image
import torch
from torch import nn, optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
import torchvision.models as models
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import torchvision.transforms as transforms
import cv2
import timm
import time
from tqdm import tqdm
from sklearn.metrics import roc_auc_score
from torch.optim.lr_scheduler import ReduceLROnPlateau

import warnings
warnings.filterwarnings("ignore")

  check_for_updates()


In [2]:
def seet_everything(seed=42):
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seet_everything()

In [3]:
class MelanomaDataset(Dataset):
    def __init__(self, df: pd.DataFrame, imfolder: str, train: bool = True, transforms = None):
        """
        Class initialization
        Args:
            df (pd.DataFrame): DataFrame with data description
            imfolder (str): folder with images
            train (bool): flag of whether a training dataset is being initialized or testing one
            transforms: image transformation method to be applied            
        """
        self.df = df
        self.imfolder = imfolder
        self.transforms = transforms
        self.train = train
        
    def __getitem__(self, index):
        im_path = os.path.join(self.imfolder, self.df.iloc[index]['image_name'] + '.jpg')
        x = cv2.imread(im_path)

        if self.transforms:
            x = self.transforms(image = x)
            
        if self.train:
            y = self.df.iloc[index]['target']
            return x['image'], y
        else:
            return x['image']
    
    def __len__(self):
        return len(self.df)

In [4]:
LR    = 4e-5
H     = 224
W     = 224
INFERENCE   = False

In [5]:
train_df = pd.read_csv('/kaggle/input/siim-isic-melanoma-classification/train.csv')
test_df = pd.read_csv('/kaggle/input/siim-isic-melanoma-classification/test.csv')

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, _, _ = train_test_split(train_df, train_df.target,
                                                    stratify=train_df.target, 
                                                    test_size=0.2)

In [7]:
normalize = A.Normalize(mean=[0.5, 0.5, 0.5],
            std=[0.5, 0.5, 0.5], max_pixel_value=255.0)  #imagenet values

train_transform = A.Compose(
    [
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.Resize(H,W),
        normalize,
        ToTensorV2(),
    ],
)

val_transforms = A.Compose(
    [
        A.Resize(H,W),
        normalize,
        ToTensorV2(),
    ],
)

In [8]:
X_train

Unnamed: 0,image_name,patient_id,sex,age_approx,anatom_site_general_challenge,diagnosis,benign_malignant,target
4410,ISIC_1429439,IP_8140841,male,75.0,upper extremity,unknown,benign,0
1861,ISIC_0658963,IP_1870306,male,75.0,torso,unknown,benign,0
24570,ISIC_7459410,IP_3722746,male,50.0,torso,nevus,benign,0
23914,ISIC_7261174,IP_8041141,female,15.0,head/neck,melanoma,malignant,1
3387,ISIC_1124744,IP_2610032,male,40.0,upper extremity,nevus,benign,0
...,...,...,...,...,...,...,...,...
6113,ISIC_1929320,IP_3782995,female,25.0,torso,nevus,benign,0
12486,ISIC_3844983,IP_0825081,male,80.0,lower extremity,unknown,benign,0
17599,ISIC_5366635,IP_7770083,female,45.0,,unknown,benign,0
16768,ISIC_5130558,IP_6078411,male,35.0,torso,unknown,benign,0


In [9]:
train = MelanomaDataset(df=X_train.reset_index(drop=True), 
                        imfolder='/kaggle/input/siim-isic-melanoma-classification/jpeg/train/', 
                        train=True, 
                        transforms=train_transform,)

val = MelanomaDataset(df=X_test.reset_index(drop=True), 
                        imfolder='/kaggle/input/siim-isic-melanoma-classification/jpeg/train/', 
                        train=True, 
                        transforms=val_transforms,)

test = MelanomaDataset(df=test_df,
                       imfolder='/kaggle/input/siim-isic-melanoma-classification/jpeg/test/', 
                       train=False,
                       transforms=val_transforms)

train_loader = DataLoader(dataset=train, batch_size=64, shuffle=True, num_workers=2)
val_loader = DataLoader(dataset=val, batch_size=16, shuffle=False, num_workers=2)
test_loader = DataLoader(dataset=test, batch_size=16, shuffle=False, num_workers=2)

In [10]:
class ViTNet(nn.Module):
    def __init__(self, num_classes=1):
        super(ViTNet, self).__init__()
        self.vit = timm.create_model("vit_base_patch16_224", pretrained=True, num_classes=num_classes)

    def forward(self, x):
        return self.vit(x)

In [11]:
from torchmetrics.classification import AUROC
criterion = nn.BCEWithLogitsLoss()#AUROC(task="binary")


In [12]:
model     = ViTNet().to('cuda')
device = 'cuda'
optim = torch.optim.Adam(model.parameters(), lr=0.001)
epochs = 20  # Number of epochs to run
es_patience = 3  # Early Stopping patience - for how many epochs with no improvements to wait
scheduler = ReduceLROnPlateau(optimizer=optim, mode='max', patience=1, verbose=True, factor=0.2)
best_val = 0
model_path = 'model_skin_safe.pth' 

for epoch in range(epochs):
    correct = 0
    epoch_loss = 0
    model.train()
    
    for x, y in tqdm(train_loader):
        x = torch.tensor(x, device=device, dtype=torch.float32)
        y = torch.tensor(y, device=device, dtype=torch.float32)
        optim.zero_grad()
        z = model(x)
        loss = criterion(z, y.unsqueeze(1))
        loss.backward()
        optim.step()
        pred = torch.round(torch.sigmoid(z))  # round off sigmoid to obtain predictions
        correct += (pred.cpu() == y.cpu().unsqueeze(1)).sum().item()  # tracking number of correctly predicted samples
        epoch_loss += loss.item()
        
    train_acc = correct / len(train)
    
    model.eval()  # switch model to the evaluation mode
    val_preds = torch.zeros((len(X_test), 1), dtype=torch.float32, device=device)
    with torch.no_grad():  # Do not calculate gradient since we are only predicting
        # Predicting on validation set
        for j, (x_val, y_val) in enumerate(val_loader):
            x_val = torch.tensor(x_val, device=device, dtype=torch.float32)
            y_val = torch.tensor(y_val, device=device, dtype=torch.float32)
            z_val = model(x_val)
            val_pred = torch.sigmoid(z_val)
            val_preds[j*val_loader.batch_size:j*val_loader.batch_size + x_val.shape[0]] = val_pred
        val_roc = roc_auc_score(X_test['target'].values, val_preds.cpu())
        
        print('Epoch {:03}: | Loss: {:.3f} | Train acc: {:.3f} | Val roc_auc: {:.3f} '.format(
        epoch + 1, 
        epoch_loss, 
        train_acc, 
        val_roc))
        
        scheduler.step(val_roc)
            
        if val_roc >= best_val:
            best_val = val_roc
            patience = es_patience  # Resetting patience since we have new best validation accuracy
            torch.save(model, model_path)  # Saving current best model
        else:
            patience -= 1
            if patience == 0:
                print('Early stopping. Best Val roc_auc: {:.3f}'.format(best_val))
                break    

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

100%|██████████| 415/415 [27:38<00:00,  4.00s/it]


Epoch 001: | Loss: 54.800 | Train acc: 0.979 | Val roc_auc: 0.793 


100%|██████████| 415/415 [26:06<00:00,  3.77s/it]


Epoch 002: | Loss: 34.582 | Train acc: 0.982 | Val roc_auc: 0.803 


100%|██████████| 415/415 [25:19<00:00,  3.66s/it]


Epoch 003: | Loss: 34.061 | Train acc: 0.982 | Val roc_auc: 0.797 


100%|██████████| 415/415 [24:31<00:00,  3.55s/it]


Epoch 004: | Loss: 33.963 | Train acc: 0.982 | Val roc_auc: 0.805 


100%|██████████| 415/415 [24:49<00:00,  3.59s/it]


Epoch 005: | Loss: 34.061 | Train acc: 0.982 | Val roc_auc: 0.798 


100%|██████████| 415/415 [24:22<00:00,  3.52s/it]


Epoch 006: | Loss: 34.294 | Train acc: 0.982 | Val roc_auc: 0.795 


100%|██████████| 415/415 [24:38<00:00,  3.56s/it]


Epoch 007: | Loss: 32.746 | Train acc: 0.982 | Val roc_auc: 0.802 
Early stopping. Best Val roc_auc: 0.805
