In [19]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torchvision
from torchvision import models, datasets, transforms
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold, StratifiedGroupKFold 
from sklearn.metrics import roc_auc_score, auc, roc_curve
import h5py
from tqdm import tqdm
from PIL import Image
import io
import albumentations as A
from albumentations.pytorch import ToTensorV2
import pickle

import warnings
warnings.filterwarnings('ignore')

device = 'cuda' if torch.cuda.is_available else 'cpu'

In [2]:
model = models.resnet18(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 151MB/s]


In [3]:
weights = torchvision.models.ResNet18_Weights.DEFAULT
preprocess = weights.transforms()
preprocess

ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BILINEAR
)

In [4]:
for params in model.parameters():
    params.requires_grad = False

In [5]:
model.fc = nn.Linear(in_features=512, out_features=1, bias=True)

In [6]:
next(model.parameters()).device

device(type='cpu')

In [7]:
train_data = h5py.File("/kaggle/input/isic-2024-challenge/train-image.hdf5", 'r')
test_data = h5py.File("/kaggle/input/isic-2024-challenge/test-image.hdf5", 'r')

In [8]:
train_meta = pd.read_csv("/kaggle/input/isic-2024-challenge/train-metadata.csv")
test_meta = pd.read_csv("/kaggle/input/isic-2024-challenge/test-metadata.csv")

In [9]:
neg_train_meta = train_meta[train_meta["target"]==0].sample(n=10000, random_state=42)
pos_train_meta = train_meta[train_meta["target"]==1]

In [10]:
new_train_meta = pd.concat([neg_train_meta, pos_train_meta]).sample(frac=1, random_state=42).reset_index(drop=True)
new_train_meta.shape

(10393, 55)

In [11]:
class dataset(Dataset):
    def __init__(self, li, transform, train):
        self.li = li
        self.transform = transform
        self.train = train
        
    def __len__(self):
        return len(self.li)
    
    def __getitem__(self, idx):
        img = np.array(Image.open(io.BytesIO(self.li[idx][0][()])))
        label = self.li[idx][1]
        if self.train:
            img = self.transform(image = img)["image"]
        return {
            "image" : img,
            "label" : label
        }
#         else:
#             img = self.transform(image = img)["image"]
#             return{
#                 "image": img
#             }

In [12]:
aug_transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.Resize(224, 224),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

test_transform = A.Compose([
    A.Resize(224, 224),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

In [20]:
train_li = []

for id in tqdm(new_train_meta["isic_id"]):
    img = train_data[id]
    label = new_train_meta[new_train_meta["isic_id"]==id]["target"].item()
    li = [img, label]
    train_li.append(li)

100%|██████████| 10393/10393 [00:16<00:00, 623.39it/s]


In [21]:
len(train_li)

10393

In [23]:
train_dict, val_dict = train_test_split(train_li, test_size=0.2, random_state=42)

In [24]:
data_train = dataset(train_dict, aug_transform, True)
data_val = dataset(val_dict, aug_transform, True)

In [25]:
train_load = DataLoader(data_train, batch_size=32, shuffle=True, num_workers=4)
val_load = DataLoader(data_val, batch_size=32, shuffle=True, num_workers=4)

In [26]:
x, y = next(iter(train_load)).values()
x.shape

torch.Size([32, 3, 224, 224])

In [27]:
loss_fn = nn.BCEWithLogitsLoss()
optimiser = torch.optim.SGD(params=model.parameters(), lr=0.01)
LR_Scheduler = torch.optim.lr_scheduler.StepLR(optimiser, 2, 0.1)

In [28]:
def calculate_pauc(y_true, y_scores, tpr_threshold=0.8):
    fpr, tpr, thresholds = roc_curve(y_true, y_scores)
    
    mask = tpr >= tpr_threshold
#     if np.sum(mask) < 2:
#         raise ValueError("Not enough points above the TPR threshold for pAUC calculation.")
    
    fpr_above_threshold = fpr[mask]
    tpr_above_threshold = tpr[mask]
    
#     try:
    partial_auc = auc(fpr_above_threshold, tpr_above_threshold)
    return partial_auc * (1 - tpr_threshold)
    
#     except:
#         return 0

In [29]:
model.to(device)
next(model.parameters()).device

device(type='cuda', index=0)

In [30]:
n_epochs=10

for epoch in range(n_epochs):
    torch.cuda.empty_cache()
    model.train()
    net_loss = 0.0
    for data in tqdm(train_load):
        X = data["image"]
        y = data["label"]
        X = X.to(device)
        y = y.to(device).to(torch.float32)
        y_pred = model(X).flatten()
        loss = loss_fn(y_pred, y)
        net_loss += loss.item()
        optimiser.zero_grad()
        loss.backward()
        optimiser.step()
        del X, y, y_pred
    net_loss /= len(train_load)
    
    model.eval()
    with torch.inference_mode():
        y_val_all = []
        y_val_pred_all = []
        for val_data in tqdm(val_load):
            X_val = val_data["image"]
            y_val = val_data["label"]
            X_val = X_val.to(device)
            y_val = y_val.to(device)
            y_val_pred = model(X_val)
            y_val_all.append(y_val)
            y_val_pred_all.append(y_val_pred)
#             fpr, tpr, thresholds = roc_curve(y_val.cpu(), y_val_pred.cpu())
#             val_auc = auc(fpr, tpr)
#             pauc = calculate_pauc(y_val.cpu(), y_val_pred.cpu())
            del X_val, y_val, y_val_pred
    y_val_all = torch.cat(y_val_all, dim=0).cpu().numpy()
    y_val_pred_all = torch.cat(y_val_pred_all, dim=0).cpu().numpy()
    pauc = calculate_pauc(y_val_all, y_val_pred_all)
    print(f"epoch: {epoch+1} | train_loss: {net_loss:.5f} | val_pauc: {pauc}")

100%|██████████| 260/260 [00:11<00:00, 22.49it/s]
100%|██████████| 65/65 [00:02<00:00, 23.85it/s]


epoch: 1 | train_loss: 0.16037 | val_pauc: 0.08642696348174084


100%|██████████| 260/260 [00:09<00:00, 26.94it/s]
100%|██████████| 65/65 [00:02<00:00, 29.95it/s]


epoch: 2 | train_loss: 0.14501 | val_pauc: 0.07473361680840418


100%|██████████| 260/260 [00:09<00:00, 28.15it/s]
100%|██████████| 65/65 [00:02<00:00, 30.02it/s]


epoch: 3 | train_loss: 0.13988 | val_pauc: 0.09206728364182089


100%|██████████| 260/260 [00:09<00:00, 28.78it/s]
100%|██████████| 65/65 [00:02<00:00, 28.54it/s]


epoch: 4 | train_loss: 0.13333 | val_pauc: 0.11419834917458727


100%|██████████| 260/260 [00:09<00:00, 27.16it/s]
100%|██████████| 65/65 [00:02<00:00, 30.17it/s]


epoch: 5 | train_loss: 0.13128 | val_pauc: 0.1026025512756378


100%|██████████| 260/260 [00:09<00:00, 28.28it/s]
100%|██████████| 65/65 [00:02<00:00, 30.29it/s]


epoch: 6 | train_loss: 0.12891 | val_pauc: 0.10142071035517757


100%|██████████| 260/260 [00:09<00:00, 28.48it/s]
100%|██████████| 65/65 [00:02<00:00, 30.35it/s]


epoch: 7 | train_loss: 0.12809 | val_pauc: 0.11429089544772383


100%|██████████| 260/260 [00:09<00:00, 27.56it/s]
100%|██████████| 65/65 [00:02<00:00, 30.47it/s]


epoch: 8 | train_loss: 0.12641 | val_pauc: 0.1062281140570285


100%|██████████| 260/260 [00:09<00:00, 28.78it/s]
100%|██████████| 65/65 [00:02<00:00, 30.95it/s]


epoch: 9 | train_loss: 0.12744 | val_pauc: 0.12279014507253624


100%|██████████| 260/260 [00:09<00:00, 28.70it/s]
100%|██████████| 65/65 [00:02<00:00, 30.57it/s]

epoch: 10 | train_loss: 0.12223 | val_pauc: 0.10524762381190593





In [34]:
pickle.dump(model, open('model_saved3.pkl', 'wb'))