In [None]:
import os
import sys
from io import BytesIO
from pathlib import Path
import pandas as pd
import h5py
import matplotlib.pyplot as plt
from PIL import Image
from dataclasses import dataclass
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset,DataLoader
from PIL import Image
from sklearn.model_selection import train_test_split
from tqdm.auto import tqdm
import torchmetrics
from torchvision import transforms
from sklearn.model_selection import train_test_split
from torchinfo import summary
from torchmetrics import Accuracy
from transformers import AutoImageProcessor,AutoModel
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold
import pickle
import io
import albumentations as A
from albumentations.pytorch import ToTensorV2

In [None]:
class HDF5Dataset(Dataset): 
    def __init__(self, data, metadata, transform=None):
        self.data = h5py.File(data, 'r') 
        if type(metadata) is pd.DataFrame:
            self.metadata = metadata
        else:
            self.metadata = pd.read_csv(metadata)
        self.transform = transform 

    def __len__(self):
        return len(self.metadata) 

    def __getitem__(self, idx):
        img_name = self.metadata['isic_id'].iloc[idx]  
        image = np.array(self.data[img_name]) 
        image = np.array(Image.open(io.BytesIO(image)),dtype=np.float32)/255 

        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image'] 

        return image   

dataset = '/kaggle/input/isic-2024-challenge/test-image.hdf5'
train_data = '/kaggle/input/isic-2024-challenge/test-metadata.csv'
train_transform = A.Compose([
    A.Resize(height=224, width=224),
    A.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=1.0),
    ToTensorV2(),
])

train_dataset = HDF5Dataset(dataset, train_data, transform=train_transform) 
train_load = DataLoader(train_dataset,
                        batch_size=256,
                        shuffle=True,
                        num_workers=4
                       )

In [None]:
device="cuda" if torch.cuda.is_available() else "cpu"
from torchvision.models import resnet50
model=resnet50()
model.fc=nn.Sequential(
    nn.Linear(2048, 1),
    nn.Sigmoid()
)
model.load_state_dict(torch.load(f="/kaggle/input/resnet50-cancer/model_ezy.pth"))

In [None]:
fi_hdf5=h5py.File('/kaggle/input/isic-2024-challenge/test-image.hdf5')
df_train=pd.read_csv('/kaggle/input/isic-2024-challenge/test-metadata.csv')
isic_id=df_train['isic_id'].to_numpy()
isic_id=isic_id.astype('str')

In [None]:
preds=[]
model.to(device)
with torch.inference_mode():
    for image in train_load:
        x=image.to(device)
        pred=model(x)
        preds.append(pred.cpu())
        del x
        del pred

In [None]:
predictions=[]
for pred in preds:
    for pre in pred.tolist():
        predictions.append(pre)

In [None]:
preds=np.array(predictions)
preds=preds.squeeze()
preds[np.isnan(preds)]=0
preds= pd.DataFrame(preds)
df_sub=pd.concat([df_train['isic_id'],preds],axis=1)
df_sub

In [None]:
df_sub.to_csv("submission.csv", index=False)