In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/working'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# print('Directory Searched')        

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import os
import io
import time
import h5py
import base64
import logging
import hashlib
import pickle
import multiprocessing

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from PIL import Image
from tqdm import tqdm

from sklearn.metrics import f1_score, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

import imblearn
from imblearn.over_sampling import RandomOverSampler

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.models import resnet50, ResNet50_Weights

from tqdm import tqdm

In [3]:
logging.basicConfig(level=logging.DEBUG,
                    format='(asctime)s - %(name)s - %(levelname)s - %(message)s',
                    handlers=[logging.StreamHandler()])

# Data paths and parameters
TRAIN_METADATA_PATH = "/kaggle/input/isic-2024-challenge/train-metadata.csv"
TRAIN_IMAGE_DIR = '/kaggle/input/isic-2024-challenge/train-image.hdf5'

SUBMISSION_METADATA_PATH = '/kaggle/input/isic-2024-challenge/test-metadata.csv'
SUBMISSION_IMAGE_PATH = '/kaggle/input/isic-2024-challenge/test-image.hdf5'

BATCH_SIZE = 32
OVERSAMPLING_RATIO = 0.1 # .1 = 1:10, .2 = 1:5
    
# Due to different datatype, need to separate features for processing
FEATURES = ['age_approx', 'sex', 'anatom_site_general', 'clin_size_long_diam_mm', 'image_type', 'tbp_tile_type']
NUMERIC_FEATURES = ['age_approx', 'clin_size_long_diam_mm']
CATEGORICAL_FEATURES = ['sex', 'anatom_site_general', 'image_type', 'tbp_tile_type']

def load_and_preprocess_metadata(metadata_path):
    """Loads and preprocesses the metadata."""
    df = pd.read_csv(metadata_path, low_memory=False)

    if 'isic_id' not in df.columns:
        raise ValueError("CSV file does not contain 'isic_id' column")

    # Fill missing values
    df['age_approx'] = df['age_approx'].fillna(df['age_approx'].mean())
    df['sex'] = df['sex'].fillna('unknown')
    df['anatom_site_general'] = df['anatom_site_general'].fillna('unknown')

    return df


# Prepare features using the existing encoder
def prepare_features(df, encoder):
    """Prepares features for the model."""
    df[NUMERIC_FEATURES] = df[NUMERIC_FEATURES].fillna(df[NUMERIC_FEATURES].mean())

    # One-hot encode categorical features using the pre-fitted encoder
    encoded_features = encoder.transform(df[CATEGORICAL_FEATURES])
    encoded_feature_names = encoder.get_feature_names_out(CATEGORICAL_FEATURES)

    # Combine numeric and encoded features
    all_feature_names = NUMERIC_FEATURES + list(encoded_feature_names)
    feature_array = np.hstack((df[NUMERIC_FEATURES].values, encoded_features))
    features_df = pd.DataFrame(feature_array, columns=all_feature_names)
    
    return features_df, all_feature_names

# Define the Dataset class
class SkinLesionDataset(Dataset):
    def __init__(self, hdf5_file, metadata_df, features_df, features, transform=None, return_target=True, use_cache=True, cache_dir='./dataset_cache'):
        self.hdf5_file = h5py.File(hdf5_file, 'r')
        self.dataframe = metadata_df
        self.features_df = features_df
        self.all_features_names = all_features
        self.transform = transform
        self.use_cache = use_cache
        self.cache_dir = cache_dir
        self.return_target = return_target

        if self.use_cache:
            os.makedirs(self.cache_dir, exist_ok=True)

        self.logger = logging.getLogger(__name__)
        self.logger.setLevel(logging.DEBUG)

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        try:
            if self.use_cache:
                cache_path = self._get_cache_path(idx)
                if os.path.exists(cache_path):
                    item = self._load_from_cache(cache_path)
                    return item if self.return_target else item[:2]
                else:
                    # Load and save to cache
                    item = self._load_item(idx)
                    self._save_to_cache(cache_path, item)
                    return item
            else:
                # Caching disabled, just load and transform
                return self._load_item(idx)
        except Exception as e:
            print(f"Error getting item at index {idx}: {e}")
            self.logger.error(f"Error getting item at index {idx}: {e}")
            raise

    @staticmethod
    def positive_transforms(image):
        positive_transforms = transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomVerticalFlip(),
            transforms.RandomRotation(30),
            transforms.ColorJitter(brightness=0.2,
                                   contrast=0.2, saturation=0.2, hue=0.1)
        ])
        return positive_transforms(image)
    @staticmethod
    def negative_transforms(image):
        negative_transforms = transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(brightness=0.1, contrast=0.1),
        ])
        return negative_transforms(image)
    
    def _load_item(self, idx):
        try:
            image_id = self.dataframe.iloc[idx]['isic_id']

            # HDF5 decoding (using base64 encoding/decoding)
            image_data = self.hdf5_file[image_id][()]
            encoded_image_data = base64.b64encode(image_data).decode('ascii')
            image_bytes = base64.b64decode(encoded_image_data)
            image = Image.open(io.BytesIO(image_bytes))

            # Apply the transform to the image
            if self.transform is not None:
                image = self.transform(image)

            features = torch.tensor(self.features_df.iloc[idx].values, dtype=torch.float)

            # Only return the target if it's required and exists in the dataframe
            if self.return_target and 'target' in self.dataframe.columns:
                target_value = self.dataframe.iloc[idx]['target']
                target = torch.tensor(target_value, dtype=torch.long)
                return image, features, target
            else:
                return image, features

        except Exception as e:
            self.logger.error(f"Error loading item at index {idx}: {e}")
            raise
    def _get_cache_path(self, idx):
        filename = f"{idx}_{self.dataframe.iloc[idx]['image_filename']}"
        hashed_filename = hashlib.md5(filename.encode()).hexdigest()
        return os.path.join(self.cache_dir, f"{hashed_filename}.pkl")

    def _save_to_cache(self, cache_path, item):
        try:
            with open(cache_path, 'wb') as f:
                pickle.dump(item, f)
        except Exception as e:
            # self.logger.error(f"Error Saving item to disk at index {idx}: {e}")
            raise
            
    def _load_from_cache(self, cache_path):
        try:
            with open(cache_path, 'rb') as f:
                return pickle.load(f)
        except Exception as e:
            # self.logger.error(f"Error loading item from cache at index {idx}: {e}")
            raise
    def preload(self):
        for i in tqdm(range(len(self)), desc='Preloading Data'):
            _ = self[i]
        # print(f'Preloaded {len(self)} items into cache.')

In [4]:
# Executing data preparation
if __name__ == "__main__":
    # Load and preprocess metadata
    df = load_and_preprocess_metadata(TRAIN_METADATA_PATH)
#     print(df.isna().sum())

    # Split data into features and target
    features_data = df[FEATURES]
    target_labels = df['target']

    # Initial train-test split (before oversampling)
    features_train, features_temp, target_train, target_temp = train_test_split(
        features_data, target_labels, test_size=0.25, random_state=42, stratify=target_labels
    )

    # Add an extra index column to features_train
    features_train['original_index'] = features_train.index
    
    # Oversample the minority class in training set
    oversampler = RandomOverSampler(sampling_strategy=OVERSAMPLING_RATIO, random_state=42)
    features_train_resampled, target_train_resampled = oversampler.fit_resample(features_train, target_train)
    
    # Ensure that resampled data is used
#     print("Length of features_train_resampled:", len(features_train_resampled))
#     print("Length of target_train_resampled:", len(target_train_resampled))
    
    # Extract original indices from the oversampled features
    original_sample_indices = features_train_resampled['original_index']
    
#     print(original_sample_indices.dtype)    
#     # Check for NaN values in target_train_resampled
#     print(target_train_resampled.isna().sum())  # Should be 0
#     print(features_train_resampled.isna().sum())  # Check this as well
    
#     # Now, align 'target_train_resampled' using the 'original_sampled_indices'
#     print(target_train_resampled.index.dtype)
    
    # Train-test split with stratification
    features_train, features_temp, target_train, target_temp = train_test_split(
        features_train_resampled, target_train_resampled, test_size=0.25, random_state=42, stratify=target_train_resampled
    )
    features_val, features_test, target_val, target_test = train_test_split(
        features_temp, target_temp, test_size=0.5, random_state=42, stratify=target_temp
    )

    
    # Print NaN checks for each prepared feature set
#     print(f"Train features NaN check:\n{train_features.isna().sum()}")
#     print(f"Val features NaN check:\n{val_features.isna().sum()}")
#     print(f"Test features NaN check:\n{test_features.isna().sum()}")

#     print(f"Train features shape: {train_features.shape}, dtype: {train_features.dtypes}")
#     print(f"Val features shape: {val_features.shape}, dtype: {val_features.dtypes}")
#     print(f"Test features shape: {test_features.shape}, dtype: {test_features.dtypes}")

    
    # Image transformation
    transform = transforms.Compose([
        transforms.Resize((160, 160)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    # Reset indices for alignment
    features_train_resampled = features_train_resampled.reset_index(drop=True)
    target_train_resampled = target_train_resampled.reset_index(drop=True)
    features_val = features_val.reset_index(drop=True)
    target_val = target_val.reset_index(drop=True)
    features_test = features_test.reset_index(drop=True)
    target_test = target_test.reset_index(drop=True)

    # Align the 'isic_id' with the indices
    isic_id_train = df.loc[features_train_resampled.index, 'isic_id'].reset_index(drop=True)
    isic_id_val = df.loc[features_val.index, 'isic_id'].reset_index(drop=True)
    isic_id_test = df.loc[features_test.index, 'isic_id'].reset_index(drop=True)

    # Reconstruct DataFrames for each set
    train_df = pd.concat([features_train_resampled, target_train_resampled, isic_id_train], axis=1)
    val_df = pd.concat([features_val, target_val, isic_id_val], axis=1)
    test_df = pd.concat([features_test, target_test, isic_id_test], axis=1)

    # Fit encoder for datasets
    encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
    encoder.fit(train_df[CATEGORICAL_FEATURES])

    # Prepare features for each split using the fitted encoder
    train_features, all_features = prepare_features(train_df, encoder)
    val_features, _ = prepare_features(val_df, encoder)
    test_features, _ = prepare_features(test_df, encoder)

    # Create datasets
    train_dataset = SkinLesionDataset(TRAIN_IMAGE_DIR, train_df, train_features, all_features, transform=transform, use_cache=False, cache_dir='./train_cache', return_target = True)
    val_dataset = SkinLesionDataset(TRAIN_IMAGE_DIR, val_df, val_features, all_features, transform=transform, use_cache=False, cache_dir='/val_cache', return_target = True)
    test_dataset = SkinLesionDataset(TRAIN_IMAGE_DIR, test_df, test_features, all_features, transform=transform, use_cache=False, cache_dir='./test_cache', return_target = True)

    # Create DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

    # Creating the submission dataset and dataloader
    submission_df = load_and_preprocess_metadata(SUBMISSION_METADATA_PATH)
    submission_features_df, submission_all_features = prepare_features(submission_df, encoder)

    submission_dataset = SkinLesionDataset(
    SUBMISSION_IMAGE_PATH, 
    submission_df, 
    submission_features_df, 
    submission_all_features, 
    transform=transform, 
    use_cache=False, 
    cache_dir='./submission_cache',
    return_target = False
    )
    
    submission_loader = DataLoader(submission_dataset, batch_size=BATCH_SIZE, shuffle=False)

    print('Data loaders and datasets created successfully!')

Data loaders and datasets created successfully!


In [5]:
class SkinLesionModel(nn.Module):
    def __init__(self, num_classes, num_features):
        super(SkinLesionModel, self).__init__()

        # Image feature extractor (resnet model)
        self.resnet = resnet50(weights=None)
        self.resnet.fc = nn.Identity() # Remove the final fully connected layer
    
        # Freeze the parameters of the ResNet
        for param in self.resnet.parameters():
            param.requires_grad = True
    
        # Additional features processing
        self.feature_fc = nn.Sequential(
            nn.Linear(num_features, 64),
            nn.ReLU(),
            nn.Dropout(0.3)
        )
    
        # Combine image features and additional features
        self.classifier = nn.Sequential(
            nn.Linear(2048 + 64, 512), # 2048 from ResNet50, 64 from additional features
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )
    def forward(self, image, features):
        # Process image through ResNet
        # Input shape: [batch_size, 3, 160, 160]
        # Output shape: [batch_size, 2048]
        image_features = self.resnet(image)

        # Process additional features
        # Input shape: [batch_size, num_features]
        # Output shape: [batch_size, 64]
        processed_features = self.feature_fc(features)

        # Combine features
        # Output shape: [batch_size, 2048 + 64]
        combined_features = torch.cat((image_features, processed_features), dim=1)

        # Final classification
        # Output shape: [batch_size, num_classes]
        output = self.classifier(combined_features)

        return output

def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10, device='cuda'):
    model.to(device)
    print(f'Sending model to device: {device}')
    for epoch in range(num_epochs):
        print(f'\nEpoch {epoch+1}/{num_epochs}')
        print('-' * 60)

        # Training phase
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0
        train_positives = 0
        train_true_positives = 0
        train_false_positives = 0
        train_predictions = []
        train_targets = []

        print("Checking Data Loader...")
        for i, (images, features, targets) in enumerate(train_loader):
            if i == 0:
                print(f'First batch loaded. Shapes: images: {images.shape}, features {features.shape}, targets {targets.shape}')
                break
        print("Data Loader Check Complete")
        
        print('Training:')
        progress_bar = tqdm(train_loader, desc='Training')
        for batch_idx, (images, features, targets) in enumerate(progress_bar):
            # print(f'Processing batch {batch_idx+1}/{len(train_loader)}')
            # print(f'Images shape: {images.shape}, Features shape: {features.shape}, Targets shape: {targets.shape}')
            images = images.to(device)
            features = features.to(device)
            targets = targets.to(device)

            optimizer.zero_grad()
            outputs = model(images, features)
            loss = criterion(outputs, targets.float().unsqueeze(1))
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            predictions = (torch.sigmoid(outputs) > 0.5).squeeze()
            train_correct += (predictions == targets).sum().item()
            train_total += targets.size(0)

            batch_positives = predictions.sum().item()
            train_positives += batch_positives
            train_true_positives += ((predictions == 1) & (targets == 1)).sum().item()
            train_false_positives += ((predictions == 1) & (targets == 0)).sum().item()
            train_false_negatives = ((predictions == 0) & (targets == 1)).sum().item()

            progress_bar.set_postfix({
                'loss': f'{loss.item():.4f}',
                'acc': f'{train_correct/train_total:.4f}',
                'pos_pred': f'{batch_positives}/{targets.size(0)}'
            })

        train_loss /= len(train_loader)
        train_accuracy = train_correct / train_total
        train_precision = train_true_positives / (train_positives + 1e-8)
        train_recall = train_true_positives / (train_true_positives + train_false_negatives + 1e-8)
        train_f1 = 2 * (train_precision * train_recall) / (train_precision + train_recall + 1e-8)
            
        print('\nTraining Results:')
        print(f'Loss: {train_loss:.4f}, Accuracy: {train_accuracy}')
        print(f'Total Predictions: {train_total}, Positive Predictions: {train_positives}')
        print(f'True Positives: {train_true_positives}, False Positives: {train_false_positives}')
        print(f'Precision: {train_precision:.4f}, Recall: {train_recall:.4f}, F1: {train_f1:.4f}')
        


        # Validation
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        val_predictions = []
        val_targets = []

        with torch.no_grad():
            progress_bar = tqdm(val_loader, desc='Validation')
            for batch_idx, (images, features, targets) in enumerate(progress_bar):
                images = images.to(device)
                features = features.to(device)
                targets = targets.to(device)

                outputs = model(images, features)
                loss = criterion(outputs, targets.float().unsqueeze(1))

                val_loss += loss.item()
                predictions = (torch.sigmoid(outputs) > 0.5).squeeze()
                val_correct += (predictions == targets).sum().item()
                val_total += targets.size(0)

                val_predictions.extend(predictions.cpu().numpy())
                val_targets.extend(targets.cpu().numpy())
                
                progress_bar.set_postfix({
                    'loss': f'{loss.item():.4f}',
                    'acc': f'{val_correct/val_total:.4f}'
                })

        val_loss /= len(val_loader)
        val_accuracy = val_correct / val_total
        val_f1 = f1_score(val_targets, val_predictions)
        val_confusion_matrix = confusion_matrix(val_targets, val_predictions)

        print(f'\nValidation Results:')
        print(f'Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}, F1: {val_f1:.4f}')
        print('Confusion Matrix:')
        print(val_confusion_matrix)
        print('-' * 60)

    print("Training Complete!")

In [6]:
# Hyperparameters and setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Setting up Hyperparameters\nDevice = {device}")

num_classes = 1
num_features = len(all_features)
batch_size = 32
num_epochs = 5
learning_rate = 0.001

# Create model, loss function, and optimizer
model = SkinLesionModel(num_classes, num_features)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

Setting up Hyperparameters
Device = cuda


In [7]:
# # Define the save directory and path
MODEL_SAVE_DIR = './Model_Weights/Attempt_2'
MODEL_SAVE_PATH = os.path.join(MODEL_SAVE_DIR, 'submission_weights.pth')

# Ensure the directory exists
os.makedirs(MODEL_SAVE_DIR, exist_ok=True)

# Train the model
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device)

# After training is complete, save the model weights
torch.save(model.state_dict(), MODEL_SAVE_PATH)

print(f'Model saved to: {MODEL_SAVE_PATH}')

Sending model to device: cuda

Epoch 1/5
------------------------------------------------------------
Checking Data Loader...
First batch loaded. Shapes: images: torch.Size([32, 3, 160, 160]), features torch.Size([32, 14]), targets torch.Size([32])
Data Loader Check Complete
Training:


Training: 100%|██████████| 10330/10330 [31:34<00:00,  5.45it/s, loss=0.0574, acc=0.9219, pos_pred=0/20]



Training Results:
Loss: 0.2247, Accuracy: 0.9219145177099847
Total Predictions: 330548, Positive Predictions: 9570
True Positives: 6904, False Positives: 2666
Precision: 0.7214, Recall: 1.0000, F1: 0.8382


Validation: 100%|██████████| 1292/1292 [01:55<00:00, 11.18it/s, loss=0.3596, acc=0.9322]



Validation Results:
Loss: 0.2192, Accuracy: 0.9322, F1: 0.4782
Confusion Matrix:
[[37235   327]
 [ 2473  1283]]
------------------------------------------------------------

Epoch 2/5
------------------------------------------------------------
Checking Data Loader...
First batch loaded. Shapes: images: torch.Size([32, 3, 160, 160]), features torch.Size([32, 14]), targets torch.Size([32])
Data Loader Check Complete
Training:


Training: 100%|██████████| 10330/10330 [28:16<00:00,  6.09it/s, loss=0.1617, acc=0.9302, pos_pred=2/20]



Training Results:
Loss: 0.1949, Accuracy: 0.9302219344845529
Total Predictions: 330548, Positive Predictions: 13684
True Positives: 10334, False Positives: 3350
Precision: 0.7552, Recall: 0.9999, F1: 0.8605


Validation: 100%|██████████| 1292/1292 [01:56<00:00, 11.12it/s, loss=0.3173, acc=0.9276]



Validation Results:
Loss: 0.2070, Accuracy: 0.9276, F1: 0.3810
Confusion Matrix:
[[37404   158]
 [ 2835   921]]
------------------------------------------------------------

Epoch 3/5
------------------------------------------------------------
Checking Data Loader...
First batch loaded. Shapes: images: torch.Size([32, 3, 160, 160]), features torch.Size([32, 14]), targets torch.Size([32])
Data Loader Check Complete
Training:


Training: 100%|██████████| 10330/10330 [28:51<00:00,  5.97it/s, loss=0.3473, acc=0.9314, pos_pred=1/20]



Training Results:
Loss: 0.1906, Accuracy: 0.9313564141970304
Total Predictions: 330548, Positive Predictions: 13767
True Positives: 10563, False Positives: 3204
Precision: 0.7673, Recall: 0.9998, F1: 0.8682


Validation: 100%|██████████| 1292/1292 [02:02<00:00, 10.53it/s, loss=0.3387, acc=0.9306]



Validation Results:
Loss: 0.1799, Accuracy: 0.9306, F1: 0.4279
Confusion Matrix:
[[37380   182]
 [ 2684  1072]]
------------------------------------------------------------

Epoch 4/5
------------------------------------------------------------
Checking Data Loader...
First batch loaded. Shapes: images: torch.Size([32, 3, 160, 160]), features torch.Size([32, 14]), targets torch.Size([32])
Data Loader Check Complete
Training:


Training: 100%|██████████| 10330/10330 [29:03<00:00,  5.92it/s, loss=0.1061, acc=0.9315, pos_pred=2/20]



Training Results:
Loss: 0.1878, Accuracy: 0.93154095623026
Total Predictions: 330548, Positive Predictions: 13940
True Positives: 10680, False Positives: 3260
Precision: 0.7661, Recall: 1.0000, F1: 0.8676


Validation: 100%|██████████| 1292/1292 [02:03<00:00, 10.50it/s, loss=0.2906, acc=0.9352]



Validation Results:
Loss: 0.1762, Accuracy: 0.9352, F1: 0.5277
Confusion Matrix:
[[37144   418]
 [ 2260  1496]]
------------------------------------------------------------

Epoch 5/5
------------------------------------------------------------
Checking Data Loader...
First batch loaded. Shapes: images: torch.Size([32, 3, 160, 160]), features torch.Size([32, 14]), targets torch.Size([32])
Data Loader Check Complete
Training:


Training: 100%|██████████| 10330/10330 [29:07<00:00,  5.91it/s, loss=0.2295, acc=0.9320, pos_pred=2/20]



Training Results:
Loss: 0.1869, Accuracy: 0.9320007986737175
Total Predictions: 330548, Positive Predictions: 14078
True Positives: 10825, False Positives: 3253
Precision: 0.7689, Recall: 0.9998, F1: 0.8693


Validation: 100%|██████████| 1292/1292 [02:00<00:00, 10.71it/s, loss=0.3097, acc=0.9324]



Validation Results:
Loss: 0.1754, Accuracy: 0.9324, F1: 0.4555
Confusion Matrix:
[[37354   208]
 [ 2587  1169]]
------------------------------------------------------------
Training Complete!
Model saved to: ./Model_Weights/Attempt_2/submission_weights.pth


In [8]:
# Load the model and weights
model = SkinLesionModel(num_classes=1, num_features=len(all_features))
model.load_state_dict(torch.load(MODEL_SAVE_PATH), strict=False)
model.eval()

# Send the model to the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

print('Beginning evaluation')
# Perform inference and generate submission.csv
all_predictions = []
with torch.no_grad():
    for images, features in tqdm(submission_loader, desc="Evaluating"):
        images = images.to(device)
        features = features.to(device)
        outputs = model(images, features)
        probabilities = torch.sigmoid(outputs).cpu().numpy()  # Get probability of class 1 (malignant)
        all_predictions.extend(probabilities)

print('Creating submission dataframe')

# Create submission dataframe
final_submission_df = pd.DataFrame({
    'isic_id': submission_df['isic_id'],
    'target': [round(float(pred[0]), 5) for pred in all_predictions]  # Ensure the target is rounded to 5 decimal places
})

print('Submitting to CSV')
# Save to CSV
final_submission_df.to_csv('submission.csv', index=False)

  model.load_state_dict(torch.load(MODEL_SAVE_PATH), strict=False)


Beginning evaluation


Evaluating: 100%|██████████| 1/1 [00:00<00:00, 22.54it/s]

Creating submission dataframe
Submitting to CSV



