In [1]:
# https://www.kaggle.com/code/nosherwantahir/notebookea3cca46ba

import io

import numpy as np
from scipy.special import softmax
import h5py
import pandas as pd
from PIL import Image

import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow

import torch
from torchvision.transforms import v2 as transforms
from transformers import ViTForImageClassification, ViTImageProcessor
from torch.utils.data import Dataset, DataLoader

from tqdm import tqdm

val_transform = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]
)

# !ls /kaggle/input/vit_base_batch_64/transformers/default/1
model_path = '/kaggle/input/vit_base_batch_64/transformers/default/1/'
train_hdf_path = '/kaggle/input/isic-2024-challenge/train-image.hdf5'
hdf_file_path = '/kaggle/input/isic-2024-challenge/test-image.hdf5'

# Function to load images from encoded data
def load_image_from_encoded_data(encoded_data):
    image = Image.open(io.BytesIO(encoded_data))
    return image.convert('RGB')

# Define a custom Dataset for the HDF5 images
class HDF5TestDataset(Dataset):
    def __init__(self, image_data, ids, transform=None):
        self.image_data = image_data
        self.ids = ids
        self.transform = transform

    def __len__(self):
        return len(self.image_data)

    def __getitem__(self, idx):
        image_data = self.image_data[idx]
        image = load_image_from_encoded_data(image_data)
        #imshow(image)
        #plt.show()
        if self.transform:
            image = self.transform(image)
        
        # https://stackoverflow.com/a/76866298
        # print(image.element_size() * image.nelement())
        # 602112 B = 0.574 MB
        # P100 has 16G GRAM, can hold about 2**14 images???
        return image, self.ids[idx]

# Load the HDF5 file and extract images and IDs
with h5py.File(hdf_file_path, 'r') as f:
    image_data = [f[image_id][()] for image_id in tqdm(f.keys())]
    ids = list(f.keys())

100%|██████████| 3/3 [00:00<00:00, 1087.36it/s]


In [2]:
# Create the test dataset and dataloader
test_dataset = HDF5TestDataset(image_data=image_data, ids=ids, transform=val_transform)
test_dataloader = DataLoader(test_dataset, batch_size=2**10, shuffle=False, num_workers=4)

# https://github.com/pytorch/pytorch/issues/37726#issuecomment-623064132
# https://github.com/pytorch/pytorch/issues/72117#issuecomment-1242802837
# !nvidia-smi -r
# torch.cuda.empty_cache()

device = torch.device("cuda")
model = ViTForImageClassification.from_pretrained(model_path)
model.to(device)

results = {'isic_id': [], 'target': []}

with torch.no_grad():
    for inputs, batch_ids in tqdm(test_dataloader, total = len(test_dataloader)):
        inputs = inputs.to(device)
        # print(inputs.element_size() * inputs.nelement())
        outputs = model(inputs)
        outputs = outputs.logits.cpu()

        outputs = softmax(outputs, axis=1)[:, 1]
        
        results['isic_id'].extend(batch_ids)
        results['target'].extend(outputs)
        
results_df = pd.DataFrame.from_dict(results)

print(results_df.head())

results_df.to_csv('submission.csv', index = False)

  self.pid = os.fork()
  self.pid = os.fork()
100%|██████████| 1/1 [00:00<00:00,  1.09it/s]

        isic_id    target
0  ISIC_0015657  0.000365
1  ISIC_0015729  0.000057
2  ISIC_0015740  0.000146





In [3]:
!ls /kaggle/input/
!lscpu

  pid, fd = os.forkpty()


isic-2024-challenge  vit_base_batch_64
Architecture:             x86_64
  CPU op-mode(s):         32-bit, 64-bit
  Address sizes:          46 bits physical, 48 bits virtual
  Byte Order:             Little Endian
CPU(s):                   4
  On-line CPU(s) list:    0-3
Vendor ID:                GenuineIntel
  Model name:             Intel(R) Xeon(R) CPU @ 2.00GHz
    CPU family:           6
    Model:                85
    Thread(s) per core:   2
    Core(s) per socket:   2
    Socket(s):            1
    Stepping:             3
    BogoMIPS:             4000.38
    Flags:                fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge m
                          ca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht sysc
                          all nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xt
                          opology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq
                           ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt
    