In [None]:
import os
import sys
import io
from io import BytesIO
from pathlib import Path
import pandas as pd
import h5py
import matplotlib.pyplot as plt
from PIL import Image
from dataclasses import dataclass
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset,DataLoader
from PIL import Image
from sklearn.model_selection import train_test_split
from tqdm.auto import tqdm
import torchmetrics
from torchvision import transforms
from sklearn.model_selection import train_test_split
from torchinfo import summary
from torchmetrics import Accuracy
from transformers import AutoImageProcessor,AutoModel
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold,GroupShuffleSplit
import pickle
import timm
import gc

In [None]:
fi_hdf5='/kaggle/input/isic-2024-challenge/train-image.hdf5'
df_train='/kaggle/input/isic-2024-challenge/train-metadata.csv'
df_train=pd.read_csv(df_train)
train_target=df_train['target']
train_drop=['lesion_id','iddx_full','iddx_1',
            'iddx_2','iddx_3','iddx_4','iddx_5','mel_mitotic_index',
            'mel_thick_mm','tbp_lv_dnn_lesion_confidence',
            'patient_id','image_type','tbp_tile_type','tbp_lv_location',
            'attribution','tbp_lv_location_simple',
            'copyright_license']
df_train.drop(train_drop,axis=1,inplace=True)
temp=df_train['age_approx'].median()
df_train['age_approx']=df_train['age_approx'].fillna(temp)
df_train.dropna(axis=1,inplace=True)

In [None]:
df_neg=df_train[df_train['target']==0]
df_pos=df_train[df_train['target']==1]
df_neg_lg,df_neg_sm=train_test_split(df_neg,test_size=0.025,shuffle=True)
df_train=pd.concat([df_neg_sm,df_pos])
y=df_train['target']
splitter = GroupShuffleSplit(test_size=.25, n_splits=2, random_state = 7)
split = splitter.split(df_train, groups=df_train['isic_id'])
train_inds, test_inds = next(split)
df_val = df_train.iloc[test_inds]

print(df_train.shape,df_val.shape)

In [None]:
def pauc(y_true, y_pred):
    min_tpr=0.8
    v_gt = abs(np.asarray(y_true)-1)
    v_pred = np.array([1.0 - x for x in y_pred])
    max_fpr = abs(1-min_tpr)
    partial_auc_scaled = roc_auc_score(v_gt, v_pred, max_fpr=max_fpr)
    partial_auc = 0.5 * max_fpr**2 + (max_fpr - 0.5 * max_fpr**2) / (1.0 - 0.5) * (partial_auc_scaled - 0.5)
    return partial_auc

In [None]:
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import h5py,io
import albumentations as A
from albumentations.pytorch import ToTensorV2
class HDF5Dataset(Dataset):
    def __init__(self, data, metadata, transform=None):
        self.data = h5py.File(data, 'r')
        if type(metadata) is pd.DataFrame:
            self.metadata = metadata
        else:
            self.metadata = pd.read_csv(metadata)
        self.transform = transform  

    def __len__(self):
        return len(self.metadata)  

    def __getitem__(self, idx): 
        img_name = self.metadata['isic_id'].iloc[idx] 
        image = np.array(self.data[img_name]) 
        image = np.array(Image.open(io.BytesIO(image)),dtype=np.float32)/255

        label = int(self.metadata['target'].iloc[idx])  

        if self.transform:
            augmented = self.transform(image=image) 
            image = augmented['image'] 
        meta=torch.from_numpy(np.asarray(self.metadata.iloc[idx])[2:].astype('f'))
        return image, label,meta   

dataset = '/kaggle/input/isic-2024-challenge/train-image.hdf5'
train_data = '/kaggle/input/isic-2024-challenge/train-metadata.csv'

train_transform = A.Compose([
    A.Resize(height=224, width=224), 
    A.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=1.0),
    ToTensorV2(),
])

train_dataset = HDF5Dataset(dataset, df_train, transform=train_transform)
train_load = DataLoader(train_dataset,
                        batch_size=64,
                        shuffle=True,
                        num_workers=4
                       ) 

val_dataset = HDF5Dataset(dataset, df_val, transform=train_transform) 
#load training data 
val_load =   DataLoader(val_dataset,
                        batch_size=64,
                        shuffle=True,
                        num_workers=4
                       )

In [None]:
device="cuda" if torch.cuda.is_available() else "cpu"

In [None]:
processor = AutoImageProcessor.from_pretrained('facebook/dinov2-base')
model = AutoModel.from_pretrained('facebook/dinov2-base')
for param in model.parameters():
    param.requires_grad=False

In [None]:
class model_4(nn.Module):
    def __init__(self,model):
        super().__init__()
        self.model=model
        self.relu=nn.ReLU()
        self.linear1=nn.Linear(768,1)
        self.sigmoid=nn.Sigmoid()
        self.linear2=nn.Linear(34,512)
        self.bn1=nn.BatchNorm1d(512)
        self.rel1=nn.ReLU()
        self.drop=nn.Dropout(0.3)
        self.linear3=nn.Linear(512,128)
        self.bn2=nn.BatchNorm1d(128)
        self.rel2=nn.ReLU()
        self.drop1=nn.Dropout(0.5)
        self.linear4=nn.Linear(768+128,1)
        
        self.linear5=nn.Linear(128,1)
        self.linear6=nn.Linear(768,1)
    def forward(self,x,meta):
        x=self.model(x)
        x1=self.sigmoid(self.linear6(x[1]))
        meta=self.rel2(self.bn2(self.linear3(self.drop(self.rel1(self.bn1(self.linear2(meta)))))))
        x2=self.sigmoid(self.linear5(meta))
        x=torch.cat([x[1],meta],axis=1)
        x=self.sigmoid(self.linear4(self.drop1(x)))
        return x,x1,x2

In [None]:
model=pickle.load(open('/kaggle/input/auxiliary/num_1_sol.sav', 'rb'))
# model=model_4(model)
# model.load_state_dict(torch.load(f="/kaggle/input/version16/ver16.pth"))
model=model.to(device)

In [None]:
loss_fn=nn.BCELoss()
optimizer=torch.optim.Adam(model.parameters(),lr=0.0001)
LR_SCHEDULER = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 
                                                          T_max=100,
                                                          eta_min=0,
                                                          last_epoch=-1,
                                                          verbose='deprecated')

In [None]:
EPOCHS=25
for epoch in tqdm(range(EPOCHS)):
    model.train()
    print(f"Epoch: {epoch+1}\n-----------\nTraining")
    for image,label,meta in tqdm(train_load):
        
        x=image.to(device)
        meta=meta.to(device)
        y=label.to(device)
        pred,pred1,pred2=model(x,meta)
        loss1=0.6*loss_fn(pred.type(torch.float).squeeze(),y.type(torch.float))
        loss2=0.2*loss_fn(pred1.type(torch.float).squeeze(),y.type(torch.float))
        loss3=0.2*loss_fn(pred2.type(torch.float).squeeze(),y.type(torch.float))
        loss=loss1+loss2+loss3
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        del x
        del y
        del meta
        del pred
        del pred1
        del pred2
        del loss
        gc.collect()
        torch.cuda.empty_cache()
    model.eval()
    with torch.inference_mode():
        print("Validating")
        preds=[]
        ys=[]
        for image,label,meta in tqdm(val_load):
            x=image.to(device)
            y=label
            meta=meta.to(device)
            pred,pred1,pred2=model(x,meta)
            preds.append(pred.cpu())
            ys.append(y)
            del x
            del y
            del meta
            del pred
            del pred1
            del pred2
            gc.collect()
            torch.cuda.empty_cache()
        val_preds=torch.cat(preds)
        val_true=torch.cat(ys)
        print(f"pauc:{pauc(val_true,val_preds)}")
        del val_preds
        del val_true
        gc.collect()
        torch.cuda.empty_cache()
    path="/kaggle/working/model_ezy.pth"
    torch.save(obj=model.state_dict(),
               f=f"/kaggle/working/model_ezy{epoch}.pth")
    LR_SCHEDULER.step()

In [None]:
pickle.dump(model, open('/kaggle/working/num_1_sol.sav', 'wb'))