In [None]:
# Team DSTI - ISIC 2024

In [None]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

# Import all necessary packages
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
import h5py
from PIL import Image
import io
import timm

# Disable version checking for albumentations
A.core.transforms_interface.is_module_available = lambda _: False

In [None]:
# Load Test Data
# Define paths
test_hdf5_path = '/kaggle/input/isic-2024-challenge/test-image.hdf5'
test_metadata_path = '/kaggle/input/isic-2024-challenge/test-metadata.csv'

# Load metadata
df_test = pd.read_csv(test_metadata_path)


In [None]:
# Define the EfficientNetWithVSURF model
class EfficientNetWithVSURF(nn.Module):
    def __init__(self, model_name='efficientnet_b0', num_classes=1, vsurf_size=5, dropout_rate=0.5):
        super(EfficientNetWithVSURF, self).__init__()
        
        self.base_model = timm.create_model(model_name, pretrained=False, num_classes=num_classes)
        
        if 'efficientnet' in model_name:
            in_features = self.base_model.classifier.in_features
            self.base_model.classifier = nn.Identity()
        elif 'resnet' in model_name:
            in_features = self.base_model.fc.in_features
            self.base_model.fc = nn.Identity()
        
        self.pool = nn.AdaptiveMaxPool2d((1, 1))
        self.vsurf_size = vsurf_size
        combined_feature_size = in_features + self.vsurf_size

        self.fc = nn.Sequential(
            nn.Linear(combined_feature_size, 512),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(512, num_classes)
        )
        
    def forward(self, img_inputs, vsurf_features):
        img_features = self.base_model(img_inputs)
        
        if len(img_features.size()) == 2:
            img_features = img_features.unsqueeze(-1).unsqueeze(-1)
        
        img_features = self.pool(img_features)
        img_features = img_features.view(img_features.size(0), -1)
        
        vsurf_features = vsurf_features.to(img_features.dtype)
        combined_features = torch.cat([img_features, vsurf_features], dim=1)
        
        output = self.fc(combined_features)
        
        return output

In [None]:
# Define Dataset Class and Tranformation
import numpy as np

class ISICDataset(Dataset):
    def __init__(self, hdf5_file, df, transform=None):
        self.hdf5_file = h5py.File(hdf5_file, 'r')
        self.df = df
        self.transform = transform
        self.vsurf_columns = ['clin_size_long_diam_mm', 'tbp_lv_H', 'tbp_lv_deltaLBnorm', 'tbp_lv_perimeterMM', 'tbp_lv_Hext']

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        isic_id = self.df.iloc[idx]['isic_id']
        img_bytes = self.hdf5_file[isic_id][()]
        img = Image.open(io.BytesIO(img_bytes))
        img = np.array(img)

        if self.transform:
            augmented = self.transform(image=img)
            img = augmented['image']

        # Convert VSURF features to float, replacing any non-numeric values with 0
        vsurf_features = self.df.iloc[idx][self.vsurf_columns].values
        vsurf_features = np.array([float(val) if isinstance(val, (int, float)) else 0.0 for val in vsurf_features])
        vsurf_features = torch.tensor(vsurf_features, dtype=torch.float32)

        return img, vsurf_features

    def __del__(self):
        self.hdf5_file.close()

In [None]:
# Load Pretrained Model
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

def load_model(model_path):
    model = EfficientNetWithVSURF(model_name='efficientnet_b0', num_classes=1, vsurf_size=5, dropout_rate=0.5)
    state_dict = torch.load(model_path, map_location='cpu')  # Load to CPU to avoid CUDA issues
    model.load_state_dict(state_dict)
    model.eval()
    return model

# Prediction function
def predict(model, test_loader, device):
    model.eval()
    preds = []
    
    with torch.no_grad():
        for inputs, vsurf_features in test_loader:
            inputs = inputs.to(device)
            vsurf_features = vsurf_features.to(device)
            outputs = model(inputs, vsurf_features)
            preds.append(torch.sigmoid(outputs).cpu().numpy())
    
    return np.concatenate(preds).flatten()

In [None]:
#Make Prediction

import logging
logging.basicConfig(level=logging.INFO)

if __name__ == "__main__":
    try:
        # Set up paths
        test_hdf5_path = '/kaggle/input/isic-2024-challenge/test-image.hdf5'
        test_metadata_path = '/kaggle/input/isic-2024-challenge/test-metadata.csv'
        model_path = '/kaggle/input/isic-2024-en0-bes/best_model.pth'

        # Set device
        device = torch.cuda.is_available() and 'cuda' or 'cpu'
        print(f"Using device: {device}")

        # Load test data
        df_test = pd.read_csv(test_metadata_path)
        print(f"Loaded test data with {len(df_test)} samples")

        # Ensure get_transforms is defined
        def get_transforms(*, data):
            if data == 'valid':
                return A.Compose([
                    A.Resize(224, 224),
                    A.Normalize(
                        mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225],
                    ),
                    ToTensorV2(),
                ])

        # Create test dataset and dataloader
        test_dataset = ISICDataset(test_hdf5_path, df_test, transform=get_transforms(data='valid'))
        test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)
        print("Created test dataloader")

        # Load model
        model = load_model(model_path)
        model = model.to(device)
        print("Loaded model")

        # Make predictions
        predictions = predict(model, test_loader, device)
        print(f"Made predictions for {len(predictions)} samples")

        # Create output DataFrame with raw probabilities
        output_df = pd.DataFrame({
            'isic_id': df_test['isic_id'],
            'target': predictions  # Use the raw prediction probabilities
        })

         # Save the predictions to a CSV file for submission
        output_df.to_csv('submission.csv', index=False)
        print("Submission file created!")
        
        # Display the first few rows
        print("First few rows of predictions:")
        print(output_df.head())

        print("Execution completed successfully")
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        raise