In [1]:
import os
import random
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset

class MoleDataset(Dataset):
    def __init__(self, image_dir, metadata_path, transform=None):
        """
        Args:
            image_dir (str): Path to the directory containing all images.
            metadata_path (str): Path to the metadata CSV file.
            transform (callable, optional): Optional transform to be applied to the images.
        """
        self.image_dir = image_dir
        self.transform = transform
        self.images = []
        self.labels = []
        
        # Load the metadata CSV file
        df = pd.read_csv(metadata_path)
        
        # Get the ISIC_ids for target == 1 and target == 0
        target_1_ids = df[df['target'] == 1]['isic_id'].tolist()
        target_0_ids = df[df['target'] == 0]['isic_id'].tolist()
        
        # Subsample target_0 images to 1/100 of their original size
        subsample_size = max(1, len(target_0_ids) // 100)  # Ensure at least 1 image is sampled
        target_0_ids_subsampled = random.sample(target_0_ids, subsample_size)
        
        # Add all target_1 images
        for isic_id in target_1_ids:
            img_path = os.path.join(self.image_dir, f"{isic_id}.jpg")  # Assuming images are named as ISIC_id.jpg
            if os.path.isfile(img_path):  # Check if the file exists
                self.images.append(img_path)
                self.labels.append(1)
        
        # Add the subsampled target_0 images
        for isic_id in target_0_ids_subsampled:
            img_path = os.path.join(self.image_dir, f"{isic_id}.jpg")  # Assuming images are named as ISIC_id.jpg
            if os.path.isfile(img_path):  # Check if the file exists
                self.images.append(img_path)
                self.labels.append(0)
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img_path = self.images[idx]
        image = Image.open(img_path).convert("RGB")  # Open image and convert to RGB
        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

In [2]:
# # This Python 3 environment comes with many helpful analytics libraries installed
# # It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# # For example, here's several helpful packages to load

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# # Input data files are available in the read-only "../input/" directory
# # For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# # You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# # You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
# Path to the image directory and metadata CSV file
image_dir = "/kaggle/input/isic-2024-challenge/train-image/image"
metadata_path = "/kaggle/input/isic-2024-challenge/train-metadata.csv"

# Create the dataset
dataset = MoleDataset(image_dir=image_dir, metadata_path=metadata_path, transform=None)

# Access the first image and label
image, label = dataset[0]
print(f"Image shape: {image.size}, Label: {label}")

  df = pd.read_csv(metadata_path)


Image shape: (137, 137), Label: 1


In [4]:
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import torch
from torchvision import transforms
from sklearn.model_selection import train_test_split
import pytorch_lightning as pl

In [5]:
class MoleDataModule(pl.LightningDataModule):
    def __init__(self, image_dir, metadata_path, batch_size=32, val_split=0.005):
        super().__init__()
        self.image_dir = image_dir
        self.metadata_path = metadata_path
        self.batch_size = batch_size
        self.val_split = val_split

        # Define separate transforms for training and validation
        self.train_transform = transforms.Compose([
            transforms.Resize((224, 224)),  # Resize to a larger size
            
            #Color transformations relevant for dermatological images
            transforms.ColorJitter(
                brightness=0.1,
                contrast=0.1,
                saturation=0.1,
                hue=0.05
            ),

            # Random affine transformations
            transforms.RandomAffine(
                degrees=0,
                translate=(0.05, 0.05),
                scale=(0.95, 1.05),
                fill=0
            ),
            
            # Gaussian blur
            transforms.RandomApply([
                transforms.GaussianBlur(kernel_size=3)
            ], p=0.1
            ),

            # Random rotations and flips
            transforms.RandomRotation(
                degrees=180,
                fill=0
            ),
            transforms.RandomHorizontalFlip(),
            transforms.RandomVerticalFlip(),

            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalization

            transforms.RandomErasing(
                p=0.2,
                scale=(0.02, 0.06),
                ratio=(0.3, 3.3),
                value=0
            )
        ])
        
        self.val_transform = transforms.Compose([
            transforms.Resize((224, 224)),  # Resize to a larger size
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalization
        ])

    def setup(self, stage=None):
        # Initialize datasets with separate transforms
        full_dataset = MoleDataset(self.image_dir, self.metadata_path, transform=None)  # Initially no transform
        labels = full_dataset.labels

        # Split into train and validation indices with stratification
        train_indices, val_indices = train_test_split(
            range(len(full_dataset)),
            test_size=self.val_split,
            stratify=labels,
            random_state=42
        )

        # Apply different transforms for training and validation datasets
        train_dataset = MoleDataset(self.image_dir, self.metadata_path, transform=self.train_transform)
        val_dataset = MoleDataset(self.image_dir, self.metadata_path, transform=self.val_transform)

        self.train_dataset = torch.utils.data.Subset(train_dataset, train_indices)
        self.val_dataset = torch.utils.data.Subset(val_dataset, val_indices)

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=3)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, shuffle=False, num_workers=3)

In [6]:
# Create the data module
data_module = MoleDataModule(image_dir=image_dir, metadata_path=metadata_path, batch_size=32, val_split=0.1)

# Setup the data module
data_module.setup()

# Access the train and validation dataloaders
train_loader = data_module.train_dataloader()
val_loader = data_module.val_dataloader()

# Example: Iterate through the first batch of the training dataloader
for images, labels in train_loader:
    print(f"Batch images shape: {images.shape}, Batch labels: {labels}")
    break

  df = pd.read_csv(metadata_path)
  df = pd.read_csv(metadata_path)
  df = pd.read_csv(metadata_path)


Batch images shape: torch.Size([32, 3, 224, 224]), Batch labels: tensor([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0])


In [7]:
import torch
import torch.nn as nn
import torchvision.models as models

class MoleCNN(nn.Module):
    def __init__(self, num_classes=1, weight_path="/kaggle/input/resnet50/pytorch/default/1/resnet50.pth"):  
        super(MoleCNN, self).__init__()
        
        # Load ResNet50 architecture
        self.resnet = models.resnet50()
        
        # Load pretrained weights from local file
        self.resnet.load_state_dict(torch.load(weight_path, map_location=torch.device('cuda' if torch.cuda.is_available() else 'cpu')))
        
        # Freeze all layers except layer4
        for param in self.resnet.parameters():
            param.requires_grad = True
        # for param in self.resnet.layer4.parameters():
        #     param.requires_grad = True

        # Replace the final fully connected layer
        self.resnet.fc = nn.Linear(2048, num_classes)
        
    def forward(self, x):
        return self.resnet(x)  # Now returning raw logits



In [8]:
from collections import Counter
import torch

# MoleDataset instanzieren (ohne Transformationspipeline)
dataset = MoleDataset(image_dir="/kaggle/input/isic-2024-challenge/train-image/image", metadata_path="/kaggle/input/isic-2024-challenge/train-metadata.csv")
labels = dataset.labels

# Klassenverteilung berechnen
class_counts = Counter(labels)  # Gibt ein Dictionary mit {Klasse: Anzahl} zurück
print("Class counts:", class_counts)

# Berechnung der Gewichte
total_samples = sum(class_counts.values())
class_weights = {cls: total_samples / count for cls, count in class_counts.items()}
print("Class weights:", class_weights)

# Gewicht für positive Klasse (target_1)
pos_weight = torch.tensor([class_weights[1]], dtype=torch.float)
print("Positive class weight (target_1):", pos_weight)

  df = pd.read_csv(metadata_path)


Class counts: Counter({0: 4006, 1: 393})
Class weights: {1: 11.193384223918574, 0: 1.0981028457314028}
Positive class weight (target_1): tensor([11.1934])


In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pytorch_lightning as pl
import torch
from torch import nn
from torchmetrics.classification import BinaryAccuracy
from torchvision.utils import make_grid
from sklearn.metrics import roc_curve
import numpy as np
from pytorch_lightning.loggers import TensorBoardLogger
import matplotlib.pyplot as plt
import io
from torchmetrics import Accuracy


In [10]:
import numpy as np
from sklearn.metrics import roc_curve
import pandas as pd
from sklearn.metrics import roc_auc_score

In [11]:
def calculate_pauc(y_true, y_pred, min_tpr=0.8):
    """
    Calculate partial AUC (pAUC) for the region TPR >= min_tpr using the comp_score method.
    
    Args:
        y_true (np.array): True binary labels (0 or 1).
        y_pred (np.array): Predicted probabilities for the positive class.
        min_tpr (float): Minimum TPR threshold (e.g., 0.8).
        
    Returns:
        float: pAUC score.
    """
    def comp_score(solution: pd.DataFrame, submission: pd.DataFrame, row_id_column_name: str, min_tpr: float = 0.8):
        v_gt = abs(np.floor(np.asarray(solution.values)) - 1)
        v_pred = np.array([1.0 - x for x in submission.values])
        max_fpr = abs(1 - min_tpr)
        partial_auc_scaled = roc_auc_score(v_gt, v_pred, max_fpr=max_fpr)
        partial_auc = (
            0.5 * max_fpr**2
            + (max_fpr - 0.5 * max_fpr**2) / (1.0 - 0.5) * (partial_auc_scaled - 0.5)
        )
        return partial_auc

    def pauc_score_func(y_true, y_pred, min_tpr=0.8):
        y_true = np.asarray(y_true).flatten()
        y_pred = np.asarray(y_pred).flatten()
        y_true_df = pd.DataFrame(y_true, columns=["target"])
        y_pred_df = pd.DataFrame(y_pred, columns=["prediction"])
        return comp_score(y_true_df, y_pred_df, "", min_tpr)

    return pauc_score_func(y_true, y_pred, min_tpr)



In [12]:
class MoleLightningModel(pl.LightningModule):
    def __init__(self):
        super(MoleLightningModel, self).__init__()
        self.model = MoleCNN()
        self.loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
        # self.accuracy = BinaryAccuracy()
        self.accuracy = Accuracy(task='binary')
        self.validation_step_outputs = []
        self.best_pauc = 0.0
        self.save_hyperparameters()

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self(x).squeeze(1)  # Raw logits
        loss = self.loss_fn(y_pred, y.float())
    
        y_pred_probs = torch.sigmoid(y_pred)  # Convert logits to probabilities
        acc = self.accuracy(y_pred_probs, y.int())
    
        self.log('train_loss', loss, on_step=False, on_epoch=True, prog_bar=True)
        self.log('train_acc', acc, on_step=False, on_epoch=True, prog_bar=True)
        return loss
        
       
    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self(x).squeeze(1)  # Raw logits
        loss = self.loss_fn(y_pred, y.float())
    
        y_pred_probs = torch.sigmoid(y_pred)  # Convert logits to probabilities
        acc = self.accuracy(y_pred_probs, y.int())
    
        self.validation_step_outputs.append({
            'y_true': y.cpu(),
            'y_pred': y_pred_probs.cpu(),  # Store probabilities for pAUC calculation
            'val_loss': loss,
            'val_acc': acc
        })
    
        self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=True)
        self.log('val_acc', acc, on_step=False, on_epoch=True, prog_bar=True)
        return loss
    
    def on_validation_epoch_end(self):
        # Calculate and log pAUC for this epoch
        y_true = torch.cat([x['y_true'] for x in self.validation_step_outputs]).numpy()
        y_pred = torch.cat([x['y_pred'] for x in self.validation_step_outputs]).numpy()
    
        pauc_score = calculate_pauc(y_true, y_pred, min_tpr=0.8)
        self.log('val_pAUC', pauc_score, on_epoch=True, prog_bar=True)
    
        # Update best pAUC if necessary
        if pauc_score > self.best_pauc:
            self.best_pauc = pauc_score
    
        # Clear the outputs list
        self.validation_step_outputs.clear()


    def on_fit_end(self):
        """Create and save ROC plot at the end of training"""
        # Get predictions for the entire validation set
        self.eval()
        all_preds = []
        all_labels = []
        
        with torch.no_grad():
            val_loader = self.trainer.datamodule.val_dataloader()
            for batch in val_loader:
                x, y = batch
                x = x.to(self.device)
                y_pred = self(x).squeeze(1)
                y_pred_probs = torch.sigmoid(y_pred)
                
                all_preds.extend(y_pred_probs.cpu().numpy())
                all_labels.extend(y.cpu().numpy())
        
        # Convert to numpy arrays
        y_true = np.array(all_labels)
        y_pred = np.array(all_preds)
        
        # Calculate final pAUC
        final_pauc = calculate_pauc(y_true, y_pred, min_tpr=0.8)
        
        # Create ROC curve plot
        fpr, tpr, _ = roc_curve(y_true, y_pred)
        
        plt.figure(figsize=(10, 10))
        plt.plot(fpr, tpr, 'b-', label=f'pAUC = {final_pauc:.4f}')
        plt.plot([0, 1], [0.8, 0.8], 'r--', label='TPR = 0.8')
        plt.fill_between(fpr, tpr, 0.8, where=(tpr >= 0.8), alpha=0.3, color='blue')
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title(f'Final ROC Curve\nBest pAUC = {self.best_pauc:.4f}')
        plt.legend()
        plt.grid(True)
        
        # Save plot to TensorBoard
        buf = io.BytesIO()
        plt.savefig(buf, format='png')
        buf.seek(0)
        img = plt.imread(buf)
        plt.close()
        
        self.logger.experiment.add_image('final_roc_curve', 
                                       torch.from_numpy(img).permute(2, 0, 1),
                                       0)
         
        print(f"\nTraining completed!")
        print(f"Best validation pAUC: {self.best_pauc:.4f}")
        print(f"Final validation pAUC: {final_pauc:.4f}")

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-3)
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
        return [optimizer], [scheduler]

In [13]:
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

In [14]:
# Initialize the model and data module
mole_model1 = MoleLightningModel()
mole_data_module1 = MoleDataModule(image_dir, metadata_path, batch_size=32)
# Initialize the model and data module
mole_model2 = MoleLightningModel()
mole_data_module2 = MoleDataModule(image_dir, metadata_path, batch_size=32)
# Initialize the model and data module
mole_model3 = MoleLightningModel()
mole_data_module3 = MoleDataModule(image_dir, metadata_path, batch_size=32)
# Initialize the model and data module
mole_model4 = MoleLightningModel()
mole_data_module4 = MoleDataModule(image_dir, metadata_path, batch_size=32)
# Initialize the model and data module
mole_model5 = MoleLightningModel()
mole_data_module5 = MoleDataModule(image_dir, metadata_path, batch_size=32)
# Initialize the model and data module
mole_model6 = MoleLightningModel()
mole_data_module6 = MoleDataModule(image_dir, metadata_path, batch_size=32)

# Update the trainer with the logger
trainer1 = Trainer(
    accelerator="gpu",
    devices=1,
    max_epochs=8,
    logger=None,
    log_every_n_steps=2
)
trainer2 = Trainer(
    accelerator="gpu",
    devices=1,
    max_epochs=8,
    logger=None,
    log_every_n_steps=2
)
trainer3 = Trainer(
    accelerator="gpu",
    devices=1,
    max_epochs=8,
    logger=None,
    log_every_n_steps=2
)
trainer4 = Trainer(
    accelerator="gpu",
    devices=1,
    max_epochs=8,
    logger=None,
    log_every_n_steps=2
)
trainer5 = Trainer(
    accelerator="gpu",
    devices=1,
    max_epochs=8,
    logger=None,
    log_every_n_steps=2
)
trainer6 = Trainer(
    accelerator="gpu",
    devices=1,
    max_epochs=8,
    logger=None,
    log_every_n_steps=2
)

# Train the model
trainer1.fit(mole_model1, mole_data_module1)
trainer2.fit(mole_model2, mole_data_module2)
trainer3.fit(mole_model3, mole_data_module3)
trainer4.fit(mole_model4, mole_data_module4)
trainer5.fit(mole_model5, mole_data_module5)
trainer6.fit(mole_model6, mole_data_module6)


  self.resnet.load_state_dict(torch.load(weight_path, map_location=torch.device('cuda' if torch.cuda.is_available() else 'cpu')))
  df = pd.read_csv(metadata_path)
  df = pd.read_csv(metadata_path)
  df = pd.read_csv(metadata_path)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]


Training completed!
Best validation pAUC: 0.1900
Final validation pAUC: 0.1300


  df = pd.read_csv(metadata_path)
  df = pd.read_csv(metadata_path)
  df = pd.read_csv(metadata_path)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]


Training completed!
Best validation pAUC: 0.1900
Final validation pAUC: 0.1300


  df = pd.read_csv(metadata_path)
  df = pd.read_csv(metadata_path)
  df = pd.read_csv(metadata_path)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]


Training completed!
Best validation pAUC: 0.2000
Final validation pAUC: 0.2000


  df = pd.read_csv(metadata_path)
  df = pd.read_csv(metadata_path)
  df = pd.read_csv(metadata_path)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]


Training completed!
Best validation pAUC: 0.2000
Final validation pAUC: 0.1800


  df = pd.read_csv(metadata_path)
  df = pd.read_csv(metadata_path)
  df = pd.read_csv(metadata_path)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]


Training completed!
Best validation pAUC: 0.1800
Final validation pAUC: 0.1700


  df = pd.read_csv(metadata_path)
  df = pd.read_csv(metadata_path)
  df = pd.read_csv(metadata_path)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]


Training completed!
Best validation pAUC: 0.2000
Final validation pAUC: 0.2000


In [15]:
# class MoleLightningModel(pl.LightningModule):
#     def __init__(self, weight_path="/kaggle/input/molemodel/pytorch/default/1"):
#         super(MoleLightningModel, self).__init__()
#         self.model = MoleCNN()  # Uses ResNet50 backbone

#         # Load pre-trained weights
#         self.model.load_state_dict(torch.load(weight_path, map_location=torch.device('cuda' if torch.cuda.is_available() else 'cpu')))
#         self.model.eval()  # Set to evaluation mode
    
#     def forward(self, x):
#         return self.model(x)


In [16]:
import h5py
import os
from PIL import Image
import io

# Paths
hdf5_path = "/kaggle/input/isic-2024-challenge/test-image.hdf5"
output_folder = "/kaggle/working/jpg_test_images"

# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Open HDF5 file
with h5py.File(hdf5_path, "r") as f:
    for isic_id in f.keys():  # Iterate over image keys
        raw_data = f[isic_id][()]  # Read raw image data

        try:
            image = Image.open(io.BytesIO(raw_data))  # Decode image
            image = image.convert("RGB")  # Ensure RGB format

            # Save as JPG
            save_path = os.path.join(output_folder, f"{isic_id}.jpg")
            image.save(save_path, "JPEG")

            print(f"Saved: {save_path}")
        except Exception as e:
            print(f"Skipping {isic_id}: {e}")


Saved: /kaggle/working/jpg_test_images/ISIC_0015657.jpg
Saved: /kaggle/working/jpg_test_images/ISIC_0015729.jpg
Saved: /kaggle/working/jpg_test_images/ISIC_0015740.jpg


In [17]:
 class TestMoleDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        self.image_dir = image_dir
        self.transform = transform
        self.images = [os.path.join(image_dir, img) for img in os.listdir(image_dir) if os.path.isfile(os.path.join(image_dir, img))]
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img_path = self.images[idx]
        image = Image.open(img_path).convert("RGB")  # Open and convert to RGB

        if self.transform:
            image = self.transform(image)

        return image, img_path  # Return image path to track predictions


In [18]:
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_dataset = TestMoleDataset(image_dir=output_folder, transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=0)


In [19]:
import torch
import pandas as pd

# Load trained models (adjust path if needed)
mole_model1.eval()
mole_model2.eval()
mole_model3.eval()
mole_model4.eval()
mole_model5.eval()
mole_model6.eval()

# Ensure models run on GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
mole_model1.to(device)
mole_model2.to(device)
mole_model3.to(device)
mole_model4.to(device)
mole_model5.to(device)
mole_model6.to(device)

# Store results
results = []

# Disable gradients for inference
with torch.no_grad():
    for images, img_paths in test_loader:
        images = images.to(device)
        logits1 = mole_model1(images)
        logits2 = mole_model2(images)
        logits3 = mole_model3(images)
        logits4 = mole_model4(images)
        logits5 = mole_model5(images)
        logits6 = mole_model6(images)
        
        probabilities1 = torch.sigmoid(logits1.squeeze(1))
        probabilities2 = torch.sigmoid(logits2.squeeze(1))
        probabilities3 = torch.sigmoid(logits3.squeeze(1))
        probabilities4 = torch.sigmoid(logits4.squeeze(1))
        probabilities5 = torch.sigmoid(logits5.squeeze(1))
        probabilities6 = torch.sigmoid(logits6.squeeze(1))

        probabilities = (probabilities1 + probabilities2 + probabilities3 + 
                         probabilities4 + probabilities5 + probabilities6) / 6

        # Store probabilities with image names (excluding the extension)
        for img_path, prob in zip(img_paths, probabilities.cpu().numpy()):
            isic_id = os.path.splitext(os.path.basename(img_path))[0]
            results.append({"isic_id": isic_id, "target": prob})

# Convert results to DataFrame and save
df = pd.DataFrame(results)
df.to_csv("/kaggle/working/submission.csv", index=False)

print("Predictions saved to submission.csv")
df.head()


Predictions saved to submission.csv


Unnamed: 0,isic_id,target
0,ISIC_0015657,0.500291
1,ISIC_0015740,0.405188
2,ISIC_0015729,0.272928


In [20]:
import os
import pandas as pd
import shutil

file_path = "/kaggle/working/submission.csv"

# Check if the file exists
if os.path.exists(file_path):
    print("✅ Submission file saved successfully!")

    # Display first few rows
    df_check = pd.read_csv(file_path)
    print(df_check.head())
else:
    print("❌ Submission file NOT found. Check the file path.")

# Cleanup: Remove all files and directories except submission.csv
working_dir = "/kaggle/working/"
file_to_keep = "submission.csv"

# List all files and folders in the directory
items = os.listdir(working_dir)

# Iterate through all items
for item in items:
    item_path = os.path.join(working_dir, item)

    # Remove files other than submission.csv
    if os.path.isfile(item_path) and item != file_to_keep:
        os.remove(item_path)
        print(f"Deleted file: {item_path}")

    # Remove any directories
    elif os.path.isdir(item_path):
        shutil.rmtree(item_path)
        print(f"Deleted folder: {item_path}")

print("Cleanup complete.")


✅ Submission file saved successfully!
        isic_id    target
0  ISIC_0015657  0.500291
1  ISIC_0015740  0.405188
2  ISIC_0015729  0.272928
Deleted folder: /kaggle/working/lightning_logs
Deleted file: /kaggle/working/__notebook__.ipynb
Deleted folder: /kaggle/working/jpg_test_images
Cleanup complete.
