In [None]:
import os
import sys
import io
from io import BytesIO
from pathlib import Path
import pandas as pd
import h5py
import matplotlib.pyplot as plt
from PIL import Image
from dataclasses import dataclass
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset,DataLoader
from PIL import Image
from sklearn.model_selection import train_test_split
from tqdm.auto import tqdm
import torchmetrics
from torchvision import transforms
from sklearn.model_selection import train_test_split
from torchinfo import summary
from torchmetrics import Accuracy
from transformers import AutoImageProcessor,AutoModel
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold

In [None]:
fi_hdf5='/kaggle/input/isic-2024-challenge/train-image.hdf5'
df_train='/kaggle/input/isic-2024-challenge/train-metadata.csv'

In [None]:
class StratifiedBatchSampler:
    def __init__(self, y, batch_size, shuffle=True):
        if torch.is_tensor(y):
            y = y.numpy()
        assert len(y.shape) == 1
        n_batches = int(len(y) / batch_size)
        self.skf = StratifiedKFold(n_splits=n_batches, shuffle=shuffle)
        self.X = torch.randn(len(y),1).numpy()
        self.y = y
        self.shuffle = shuffle
        self.len=int(len(y) / batch_size)

    def __iter__(self):
        if self.shuffle:
            self.skf.random_state = torch.randint(0,int(1e8),size=()).item()
        for train_idx, test_idx in self.skf.split(self.X, self.y):
            yield test_idx

    def __len__(self):
        return self.len
    
def pauc(y_true, y_pred):
    min_tpr=0.8
    v_gt = abs(np.asarray(y_true)-1)
    v_pred = np.array([1.0 - x for x in y_pred])
    max_fpr = abs(1-min_tpr)
    partial_auc_scaled = roc_auc_score(v_gt, v_pred, max_fpr=max_fpr)
    partial_auc = 0.5 * max_fpr*2 + (max_fpr - 0.5 * max_fpr*2) / (1.0 - 0.5) * (partial_auc_scaled - 0.5)
    return partial_auc

In [None]:
target_train=pd.read_csv(df_train)

In [None]:
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import h5py,io
import albumentations as A
from albumentations.pytorch import ToTensorV2
class HDF5Dataset(Dataset): 
    def __init__(self, data, metadata, transform=None):
        self.data = h5py.File(data, 'r') 
        if type(metadata) is pd.DataFrame:
            self.metadata = metadata
        else:
            self.metadata = pd.read_csv(metadata)
        self.transform = transform

    def __len__(self):
        return len(self.metadata) 

    def __getitem__(self, idx):
        img_name = self.metadata['isic_id'].iloc[idx] 
        image = np.array(self.data[img_name]) 
        image = np.array(Image.open(io.BytesIO(image)),dtype=np.float32)/255

        label = int(self.metadata['target'].iloc[idx]) 

        if self.transform:
            augmented = self.transform(image=image) 
            image = augmented['image'] 

        return image, label   

dataset = '/kaggle/input/isic-2024-challenge/train-image.hdf5'
train_data = '/kaggle/input/isic-2024-challenge/train-metadata.csv'

train_transform = A.Compose([
    A.Resize(height=224, width=224), 
#     A.OneOf([A.RGBShift(r_shift_limit=15, g_shift_limit=15, b_shift_limit=15),
#              A.RandomBrightnessContrast() 
#              ], p=0.5),
#     A.HorizontalFlip(p=0.5),
#     A.VerticalFlip(p=0.5),
    A.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=1.0),
    ToTensorV2(),
])


train_dataset = HDF5Dataset(dataset, train_data, transform=train_transform) 

train_load = DataLoader(train_dataset,
                        batch_size=256,
                        shuffle=True,
                        num_workers=4
                       ) 

In [None]:
device="cuda" if torch.cuda.is_available() else "cpu"

In [None]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True)

In [None]:
for param in model.parameters():
    param.requires_grad=False
model.fc=nn.Sequential(
    nn.Linear(512, 1),
    nn.Sigmoid()
)
for param in model.fc.parameters():
    param.requires_grad=True

In [None]:
# model=model
# model_4.load_state_dict(torch.load(f="/kaggle/input/dinov2/model_1.pth"))
model=model.to(device)

In [None]:
loss_fn=nn.BCELoss()
optimizer=torch.optim.NAdam(model.parameters(),lr=0.01)
LR_SCHEDULER = torch.optim.lr_scheduler.StepLR(optimizer,20,0.1)

In [None]:
EPOCHS=5
for epoch in tqdm(range(EPOCHS)):
    model.train()
    net_loss=0
    net_acc=0
    print(f"Epoch: {epoch+1}\n-----------\nTraining")
    for image,label in tqdm(train_load):
        x=image.to(device)
        
        y=label.to(device)
        pred=model(x)
        loss=loss_fn(pred.type(torch.float).squeeze(),y.type(torch.float))
        net_loss+=loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        del x
        del y
        del pred
        del loss
        gc.collect()
        
    net_loss/=len(train_load)
    model.eval()
    with torch.no_grad():
        print("Validating")
        tot_score=0
        for data in train_val_load:
            x=data["image"].to(device)
            y=data["label"].to(device)
            pred=model(x)
            del x
            del y
            del pred
            tot_score+=pauc(y,pred)
        print(f"Loss: {net_loss:.6f} |pauc:{tot_score/len(train_val_load)}")
    LR_SCHEDULER.step()

In [None]:
path="/kaggle/working/model_ezy.pth"
torch.save(obj=model.state_dict(),
           f=path)