In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         os.path.join(dirname, filename)

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader, random_split
import h5py,io
import albumentations as A
from albumentations.pytorch import ToTensorV2
from PIL import Image
from tqdm.auto import tqdm
from sklearn.metrics import roc_auc_score
import torch.optim.lr_scheduler as lr_scheduler

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
train_meta='/kaggle/input/isic-2024-challenge/train-metadata.csv'
train_hdf5='/kaggle/input/isic-2024-challenge/train-image.hdf5'

In [None]:
train_transformations = A.Compose([
    A.Resize(height=224, width=224), #resize 
    A.OneOf([A.RGBShift(r_shift_limit=15, g_shift_limit=15, b_shift_limit=15),
             A.RandomBrightnessContrast() 
             ], p=0.5),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    # !!One needs to change the mean and std values to appropriate ones for this dataset.!!
    A.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=1.0),
    ToTensorV2(),
])

In [None]:
val_transformations = A.Compose([
    A.Resize(height=224, width=224), #resize 
    A.OneOf([A.RGBShift(r_shift_limit=15, g_shift_limit=15, b_shift_limit=15),
             A.RandomBrightnessContrast() 
             ], p=0.5),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=1.0),
    ToTensorV2(),
])

In [None]:

class HDF5Dataset_train(Dataset): #defining the dataset 
    def __init__(self, data, metadata, transform=None):
        self.data = h5py.File(data, 'r') #creating data argument
        if type(metadata) is pd.DataFrame:
            self.metadata = metadata
        else:
            self.metadata = pd.read_csv(metadata,low_memory = False)
        self.transform = transform #creating transform argument 

    def __len__(self):
        return len(self.metadata) #returns dataset 

    def __getitem__(self, idx):#gets image and label 
        img_name = self.metadata['isic_id'].iloc[idx] #accesses image filepath 
        image = np.array(self.data[img_name]) #opens image
        image = np.array(Image.open(io.BytesIO(image)),dtype=np.float32)/255

        label = int(self.metadata['target'].iloc[idx]) #find the label 

        if self.transform:
            augmented = self.transform(image=image) #transformation 
            image = augmented['image'] #grab the augmented image 
             
        return image, label

In [None]:
class HDF5Dataset_val(Dataset): #defining the dataset 
    def __init__(self, data, metadata, transform=None):
        self.data = h5py.File(data, 'r') #creating data argument
        if type(metadata) is pd.DataFrame:
            self.metadata = metadata
        else:
            self.metadata = pd.read_csv(metadata,low_memory = False)
        self.transform = transform #creating transform argument 

    def __len__(self):
        return len(self.metadata) #returns dataset 

    def __getitem__(self, idx):#gets image and label 
        img_name = self.metadata['isic_id'].iloc[idx] #accesses image filepath 
        image = np.array(self.data[img_name]) #opens image
        image = np.array(Image.open(io.BytesIO(image)),dtype=np.float32)/255

        label = int(self.metadata['target'].iloc[idx]) #find the label 

        if self.transform:
            augmented = self.transform(image=image) #transformation 
            image = augmented['image'] #grab the augmented image 

        return image, label

In [None]:
train_meta_df = pd.read_csv(train_meta,low_memory = False)

train_size = int(0.9 * len(train_meta_df))
# val_size = len(train_data) - train_size


train_set = HDF5Dataset_train(train_hdf5,train_meta_df.iloc[0:train_size],train_transformations)
# val_set = HDF5Dataset_val(train_hdf5,train_meta_df.iloc[train_size:len(train_data)],val_transformations)


train_batch_size = 64
val_batch_size = 64

torch.manual_seed(42)

# train_set,val_set = random_split(train_data ,[train_size, val_size])

In [10]:
# train_size

In [None]:
train_data_loader = DataLoader(train_set,train_batch_size,shuffle=True)
val_data_loader = DataLoader(val_set,val_batch_size,shuffle=False)

In [None]:
from torchvision.models import ResNet18_Weights
weights = ResNet18_Weights.IMAGENET1K_V1

In [None]:
model = models.resnet18().to(device)
model.fc = nn.Sequential(nn.Flatten(),
                         nn.Linear(model.fc.in_features,64),
                         nn.ReLU(),
                         nn.Dropout(p=0.5),
                         nn.Linear(64,1),
                         nn.Sigmoid()).to(device)


model.load_state_dict(torch.load(f='/kaggle/input/resnet-furtrain/model_resnet (3).pth'))


for param in model.parameters():
    param.requires_grad = True

In [None]:
epochs = 10

loss_1 = nn.BCELoss()
optimizer = optim.Adam(model.parameters(),lr=0.00001)

scheduler = lr_scheduler.StepLR(optimizer, step_size=8 , gamma=0.1)

In [None]:
for epoch in tqdm(range(epochs)) :
  model.train()
  running_loss = 0 
  for images,labels in tqdm(train_data_loader) :
    images,labels = images.to(device),labels.to(device)
    optimizer.zero_grad()
    output = model(images)
    loss = loss_1(output.squeeze(),labels.type(torch.float))
    loss.backward()
    optimizer.step()
    running_loss += loss.item()*train_batch_size 
  avg_epoch_loss = running_loss/len(train_set)
  print(f"epoch {epoch+1}/{epochs} loss : {avg_epoch_loss}")
  scheduler.step()

In [None]:
path='/kaggle/working/model_resnet.pth'
torch.save(obj=model.state_dict(),
          f=path)

LOADING MODEL 

In [17]:
model = models.resnet18().to(device)
model.fc = nn.Sequential(nn.Flatten(),
                         nn.Linear(model.fc.in_features,64),
                         nn.ReLU(),
                         nn.Dropout(p=0.5),
                         nn.Linear(64,1),
                         nn.Sigmoid()).to(device)


In [18]:
model.load_state_dict(torch.load(f='/kaggle/input/resnet-v27/model_resnet (4).pth'))

<All keys matched successfully>

VALIDATION 

In [None]:
def calc_auc(solution: pd.DataFrame, submission: pd.DataFrame, min_tpr: float=0.80):
    '''
    Function to calculate tpr from auc
    solution : target
    submission : predict 
    '''
    v_gt = abs(np.asarray(solution.values)-1)
    v_pred = np.array([1.0 - x for x in submission.values])
    max_fpr = abs(1-min_tpr)
    partial_auc_scaled = roc_auc_score(v_gt, v_pred, max_fpr=max_fpr)
    partial_auc = 0.5 * max_fpr**2 + (max_fpr - 0.5 * max_fpr**2) / (1.0 - 0.5) * (partial_auc_scaled - 0.5)
    return partial_auc

In [None]:
model.eval()

all_labels = []
all_probs = []  

running_loss = 0 

for images,labels in tqdm(val_data_loader) :
    images,labels = images.to(device),labels.to(device)
    output = model(images)
    loss = loss_1(output.squeeze(),labels.type(torch.float))
    running_loss += loss.item()
    
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(output).cpu().detach().numpy()
    
    all_labels.extend(labels.cpu().detach().numpy().flatten())
    all_probs.extend(probs.flatten())

pred_df = pd.DataFrame(all_probs, columns=['pred'])
labels_df = pd.DataFrame(all_labels, columns=['targets'])

val_auc_score = calc_auc(labels_df,pred_df)
print(f"val_auc_score = {val_auc_score}")


In [21]:
# print(val_output)

**TESTING**

In [22]:
test_meta='/kaggle/input/isic-2024-challenge/test-metadata.csv'
test_hdf5='/kaggle/input/isic-2024-challenge/test-image.hdf5'

In [23]:
test_transformations = A.Compose([ 
    A.Resize(height=224, width=224), #resize 
#     A.OneOf([A.RGBShift(r_shift_limit=15, g_shift_limit=15, b_shift_limit=15),
#              A.RandomBrightnessContrast() 
#              ], p=0.5),
#     A.HorizontalFlip(p=0.5),
#     A.VerticalFlip(p=0.5),
    # !!One needs to change the mean and std values to appropriate ones for this dataset.!!
    A.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=1.0),
    ToTensorV2(),
])

In [24]:

class HDF5Dataset_test(Dataset): #defining the dataset 
    def __init__(self, data, metadata, transform=None):
        self.data = h5py.File(data, 'r') #creating data argument
        if type(metadata) is pd.DataFrame:
            self.metadata = metadata
        else:
            self.metadata = pd.read_csv(metadata,low_memory = False)
        self.transform = transform #creating transform argument 

    def __len__(self):
        return len(self.metadata) #returns dataset 

    def __getitem__(self, idx):#gets image and label 
        img_name = self.metadata['isic_id'].iloc[idx] #accesses image filepath 
        image = np.array(self.data[img_name]) #opens image
        image = np.array(Image.open(io.BytesIO(image)),dtype=np.float32)/255

#         label = int(self.metadata['target'].iloc[idx]) #find the label 

        if self.transform:
            augmented = self.transform(image=image) #transformation 
            image = augmented['image'] #grab the augmented image 

        return image

In [25]:
test_data = HDF5Dataset_test(test_hdf5,test_meta,test_transformations)
test_data_loader = DataLoader(test_data,1,shuffle=False)

In [26]:
test_outputs = np.zeros(len(test_data))

In [27]:
i = 0
model.eval()
for image in test_data_loader :
    image = image.to(device)
    test_outputs[i] = model(image).item()
    i = i+1 

In [28]:
df_test = pd.read_csv('/kaggle/input/isic-2024-challenge/test-metadata.csv')

In [29]:
df_sample_submission = pd.DataFrame({'isic_id' : df_test['isic_id'],'target' : test_outputs})
df_sample_submission.to_csv('/kaggle/working/submission.csv',index=False)

In [30]:
print(df_sample_submission)

        isic_id    target
0  ISIC_0015657  0.000191
1  ISIC_0015729  0.000053
2  ISIC_0015740  0.000128
