In [1]:
import os
print(os.listdir("/kaggle/input/resnet18-vsloss-v2"))

['resnet18_vsloss_v2']


In [12]:
import torch
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd
import h5py
import numpy as np
import io  # Ensure io is imported for handling byte streams
import logging, os
import torch.nn as nn
import torch.nn.functional as F 

# Define a custom dataset class to handle HDF5 files
class HDF5Dataset(Dataset):
    def __init__(self, hdf5_file, transform=None):
        """
        Args:
            hdf5_file (string): Path to the HDF5 file with images.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.hdf5_file = hdf5_file
        self.transform = transform
        # Open the HDF5 file
        self.file = h5py.File(hdf5_file, 'r')
        self.keys = list(self.file.keys())

    def __len__(self):
        return len(self.keys)

    def __getitem__(self, idx):
        # HDF5 keys can be used to access images
        image_name = self.keys[idx]
        image_data = self.file[image_name][()]
        # Convert image data to PIL Image for consistency with transforms
        image = Image.open(io.BytesIO(image_data)).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, image_name

    def close(self):
        if self.file:
            self.file.close()


class MyDataset(Dataset):
    def __init__(self, csv_file, img_dir, class_mapping, columns=None, transform=None):
        """
        Initializes the dataset.
        :param csv_file: Path to the CSV file containing data.
        :param img_dir: Directory where images are stored.
        :param class_mapping: Dictionary mapping class names to numeric values.
        :param columns: List of column names to include as features. If None, all columns are included.
        :param transform: Optional transform to be applied on a sample.
        """
        self.data_frame = pd.read_csv(csv_file, low_memory=False)
        self.img_dir = img_dir
        self.transform = transform
        # Use specified columns if provided, otherwise use all columns starting from the third column
        if columns is not None:
            self.csv_data = self.data_frame[columns]
        # Ensure the image names include the file extension if it's missing
        self.data_frame['image_name'] = self.data_frame['isic_id'].apply(lambda x: f"{x}.jpg" if not x.lower().endswith('.jpg') else x)
        # Directly use the numeric targets from the dataset
        self.targets = self.data_frame['target'].astype(int)
        self.class_mapping = class_mapping

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        img_name = self.data_frame.loc[idx, 'image_name']
        img_path = os.path.join(self.img_dir, img_name)
        try:
            image = Image.open(img_path)
            if self.transform:
                image = self.transform(image)
        except FileNotFoundError:
            logging.error(f"Image not found: {img_path}")
            return None  # Consider how you handle missing images in your training loop

        target = self.targets[idx]
        return image, target

# Initialize device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define transformations
transform = transforms.Compose([
    transforms.Resize((64, 64)),  # Adjust the size to 224x224 for ResNet50
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.6984, 0.5219, 0.4197], std=[0.1396, 0.1318, 0.1236]),  # Use ImageNet norms
])

# Create the dataset
# hdf5_file = '/kaggle/input/isic-2024-challenge/test-image.hdf5'
hdf5_file = 'dataset/dump/train-image.hdf5'
val_csv = 'dataset/val.csv'
val_dir = 'dataset/val'
class_mapping = {'benign': 0, 'malignant': 1}
columns_to_use  = None
dataset = MyDataset(csv_file=val_csv, img_dir=val_dir, class_mapping=class_mapping, columns=columns_to_use, transform=transform)

# Create the DataLoader
test_loader = DataLoader(dataset, batch_size=64, shuffle=False)


# Model setup
model = models.resnet50()
num_ftrs = model.fc.in_features


model.fc = nn.Linear(num_ftrs, 2)  # Assuming 2 classes (benign and malignant)
# model = model.to(device)

    
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Load existing model if available
# model_saved_path = os.path.join("/kaggle/input/resnet50-gamma-9/Resnet50_gamma.9")
model_saved_path = os.path.join("checkpoint/30 July 09:37-resnet50-VS_loss-SGD_gamma.9.pt")
if os.path.exists(model_saved_path):
    # Load state dict properly onto the specified device
    model.load_state_dict(torch.load(model_saved_path, map_location=device))
    print(f'Model loaded from {model_saved_path}')
    logging.info(f'Model loaded from {model_saved_path}')
else: 
    print('model not found')


model.eval()



from sklearn.metrics import roc_auc_score
def calculate_auc(model, data_loader, device):
    model.eval()  # Ensure the model is in evaluation mode
    true_labels = []
    predictions = []

    count = 0
    with torch.no_grad():
        for inputs, labels in data_loader:
            inputs = inputs.to(device)

            outputs = model(inputs)
            probabilities = F.softmax(outputs, dim=1)
            class_one_prob = probabilities[:, 1].cpu().numpy()
            predictions.extend(class_one_prob)
            true_labels.extend(labels)
            count += 1
            if count >= 500:
                print(f'Processed {count} samples')
                break

    # Calculate pAUC using sklearn's roc_auc_score with max_fpr
    partial_auc_scaled = roc_auc_score(true_labels, predictions, max_fpr=0.2)

    # Scale from [0.5, 1.0] to [0.0, 0.2]
    partial_auc = (partial_auc_scaled - 0.5) * 0.4

    return partial_auc

print(f'Partial AUC: {calculate_auc(model, test_loader, device):.4f}')


        




Model loaded from checkpoint/30 July 09:37-resnet50-VS_loss-SGD_gamma.9.pt
Processed 500 samples
Partial AUC: 0.1136


In [14]:
from torchvision import transforms as T
from data.transformation import ImgAugTransform
# Define the test time augmentation function
def test_time_augmentation(model, image, transforms):
    image = image.unsqueeze(0)
    predictions = []
    for transform in transforms:
        augmented_image = transform(image)
        augmented_image = augmented_image.unsqueeze(0)  # Add batch dimension
        augmented_image = augmented_image.to(device)
        output = model(augmented_image)
        predictions.append(output)
    return torch.mean(torch.stack(predictions), dim=0)

# Apply TTA during evaluation
tta_transforms = [
    T.Compose([T.Resize((64, 64)), T.ToTensor(), T.Normalize((0.6984, 0.5219, 0.4197), (0.1396, 0.1318, 0.1236))]),
    T.Compose([T.Resize((64, 64)), ImgAugTransform(), T.ToTensor(), T.Normalize((0.6984, 0.5219, 0.4197), (0.1396, 0.1318, 0.1236))]),
    T.Compose([T.Resize((64, 64)), T.RandomHorizontalFlip(p=1), T.ToTensor(), T.Normalize((0.6984, 0.5219, 0.4197), (0.1396, 0.1318, 0.1236))]),
    T.Compose([T.Resize((64, 64)), T.RandomVerticalFlip(p=1), T.ToTensor(), T.Normalize((0.6984, 0.5219, 0.4197), (0.1396, 0.1318, 0.1236))]),
    T.Compose([T.Resize((64, 64)), T.RandomRotation(90), T.ToTensor(), T.Normalize((0.6984, 0.5219, 0.4197), (0.1396, 0.1318, 0.1236))])
]


from sklearn.metrics import roc_auc_score
def calculate_auc(model, data_loader, device):
    model.eval()  # Ensure the model is in evaluation mode
    true_labels = []
    predictions = []

    count = 0
    with torch.no_grad():
        for inputs, labels in data_loader:
            inputs = inputs.to(device)

            outputs = model(inputs)
            probabilities = F.softmax(outputs, dim=1)
            class_one_prob = probabilities[:, 1].cpu().numpy()
            predictions.extend(class_one_prob)
            true_labels.extend(labels)
            count += 1
            if count >= 500:
                print(f'Processed {count} samples')
                break

    # Calculate pAUC using sklearn's roc_auc_score with max_fpr
    partial_auc_scaled = roc_auc_score(true_labels, predictions, max_fpr=0.2)

    # Scale from [0.5, 1.0] to [0.0, 0.2]
    partial_auc = (partial_auc_scaled - 0.5) * 0.4

    return partial_auc

print(f'Partial AUC: {calculate_auc(model, test_loader, device):.4f}')


        

Processed 500 samples
Partial AUC: 0.1136


In [3]:

# Save predictions to CSV

df = pd.DataFrame({
    'isic_id': image_ids,
    'target': predictions
})
df.to_csv('submission.csv', index=False)
print("Predictions saved to submission.csv")

Predictions saved to submission.csv


In [4]:
import pandas as pd

df = pd.read_csv('submission.csv')
df.head()

Unnamed: 0,isic_id,target
0,ISIC_0015657,0.749103
1,ISIC_0015729,0.014884
2,ISIC_0015740,0.00032
