# SIIM-ISIC Melanoma Classification
## Deep Learning Model for Melanoma Detection

This notebook implements a CNN-based classifier for melanoma detection using the SIIM-ISIC dataset.

In [None]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import os
import cv2
from tqdm import tqdm

# Deep Learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint

# Sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, confusion_matrix, classification_report

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")

## Configuration

In [None]:
# Configuration
IMG_SIZE = 224
BATCH_SIZE = 32
EPOCHS = 50
LEARNING_RATE = 0.001

# Paths (adjust based on your Kaggle data structure)
TRAIN_CSV = '/kaggle/input/siim-isic-melanoma-classification/train.csv'
TEST_CSV = '/kaggle/input/siim-isic-melanoma-classification/test.csv'
TRAIN_IMG_DIR = '/kaggle/input/siim-isic-melanoma-classification/jpeg/train/'
TEST_IMG_DIR = '/kaggle/input/siim-isic-melanoma-classification/jpeg/test/'

## Load and Explore Data

In [None]:
# Load training data
train_df = pd.read_csv(TRAIN_CSV)
test_df = pd.read_csv(TEST_CSV)

print(f"Training samples: {len(train_df)}")
print(f"Test samples: {len(test_df)}")
print(f"\nClass distribution:")
print(train_df['target'].value_counts())

# Display first few rows
train_df.head()

In [None]:
# Visualize class distribution
plt.figure(figsize=(8, 6))
sns.countplot(data=train_df, x='target')
plt.title('Class Distribution (0: Benign, 1: Malignant)')
plt.xlabel('Target')
plt.ylabel('Count')
plt.show()

# Check for class imbalance
melanoma_count = train_df['target'].sum()
benign_count = len(train_df) - melanoma_count
print(f"\nClass Imbalance Ratio: {benign_count/melanoma_count:.2f}:1")

## Data Preprocessing

In [None]:
# Add full image paths
train_df['image_path'] = train_df['image_name'].apply(lambda x: os.path.join(TRAIN_IMG_DIR, f'{x}.jpg'))
test_df['image_path'] = test_df['image_name'].apply(lambda x: os.path.join(TEST_IMG_DIR, f'{x}.jpg'))

# Split training data into train and validation
train_data, val_data = train_test_split(
    train_df, 
    test_size=0.2, 
    random_state=42, 
    stratify=train_df['target']
)

print(f"Train set: {len(train_data)} samples")
print(f"Validation set: {len(val_data)} samples")

In [None]:
# Visualize sample images
fig, axes = plt.subplots(2, 4, figsize=(16, 8))
axes = axes.ravel()

# Show 4 benign and 4 malignant samples
benign_samples = train_df[train_df['target'] == 0].sample(4)
malignant_samples = train_df[train_df['target'] == 1].sample(4)
samples = pd.concat([benign_samples, malignant_samples])

for idx, (_, row) in enumerate(samples.iterrows()):
    img = Image.open(row['image_path'])
    axes[idx].imshow(img)
    axes[idx].set_title(f"{'Malignant' if row['target'] == 1 else 'Benign'}")
    axes[idx].axis('off')

plt.tight_layout()
plt.show()

## Data Augmentation and Generators

In [None]:
# Calculate class weights to handle imbalance
melanoma_count = train_data['target'].sum()
benign_count = len(train_data) - melanoma_count
total = len(train_data)

class_weights = {
    0: total / (2 * benign_count),
    1: total / (2 * melanoma_count)
}

print(f"Class weights: {class_weights}")

In [None]:
# Data augmentation for training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest'
)

# Only rescaling for validation
val_datagen = ImageDataGenerator(rescale=1./255)

# Create generators
train_generator = train_datagen.flow_from_dataframe(
    train_data,
    x_col='image_path',
    y_col='target',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='raw',
    shuffle=True
)

val_generator = val_datagen.flow_from_dataframe(
    val_data,
    x_col='image_path',
    y_col='target',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='raw',
    shuffle=False
)

## Model Architecture

In [None]:
def build_model():
    """
    Build EfficientNetB3-based transfer learning model
    """
    # Load pre-trained EfficientNetB3
    base_model = EfficientNetB3(
        include_top=False,
        weights='imagenet',
        input_shape=(IMG_SIZE, IMG_SIZE, 3)
    )
    
    # Freeze base model initially
    base_model.trainable = False
    
    # Build model
    model = keras.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.BatchNormalization(),
        layers.Dropout(0.5),
        layers.Dense(256, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.3),
        layers.Dense(1, activation='sigmoid')
    ])
    
    return model, base_model

model, base_model = build_model()
model.summary()

## Compile and Train Model

In [None]:
# Compile model
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=LEARNING_RATE),
    loss='binary_crossentropy',
    metrics=['accuracy', keras.metrics.AUC(name='auc')]
)

# Callbacks
reduce_lr = ReduceLROnPlateau(
    monitor='val_auc',
    factor=0.5,
    patience=3,
    mode='max',
    verbose=1,
    min_lr=1e-7
)

early_stop = EarlyStopping(
    monitor='val_auc',
    patience=7,
    mode='max',
    restore_best_weights=True,
    verbose=1
)

checkpoint = ModelCheckpoint(
    'best_model.h5',
    monitor='val_auc',
    mode='max',
    save_best_only=True,
    verbose=1
)

In [None]:
# Train model - Phase 1: Train only top layers
print("Phase 1: Training top layers...")
history_phase1 = model.fit(
    train_generator,
    epochs=15,
    validation_data=val_generator,
    class_weight=class_weights,
    callbacks=[reduce_lr, checkpoint]
)

In [None]:
# Fine-tuning - Phase 2: Unfreeze and train entire model
print("\nPhase 2: Fine-tuning entire model...")
base_model.trainable = True

# Recompile with lower learning rate
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=LEARNING_RATE/10),
    loss='binary_crossentropy',
    metrics=['accuracy', keras.metrics.AUC(name='auc')]
)

history_phase2 = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=val_generator,
    class_weight=class_weights,
    callbacks=[reduce_lr, early_stop, checkpoint]
)

## Training Visualization

In [None]:
# Combine histories
history = {
    'loss': history_phase1.history['loss'] + history_phase2.history['loss'],
    'val_loss': history_phase1.history['val_loss'] + history_phase2.history['val_loss'],
    'auc': history_phase1.history['auc'] + history_phase2.history['auc'],
    'val_auc': history_phase1.history['val_auc'] + history_phase2.history['val_auc']
}

# Plot training history
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Loss plot
axes[0].plot(history['loss'], label='Train Loss')
axes[0].plot(history['val_loss'], label='Val Loss')
axes[0].set_title('Model Loss')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].legend()
axes[0].grid(True)

# AUC plot
axes[1].plot(history['auc'], label='Train AUC')
axes[1].plot(history['val_auc'], label='Val AUC')
axes[1].set_title('Model AUC')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('AUC')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout()
plt.show()

## Model Evaluation

In [None]:
# Load best model
model.load_weights('best_model.h5')

# Predictions on validation set
val_predictions = model.predict(val_generator, verbose=1)
val_predictions_binary = (val_predictions > 0.5).astype(int).flatten()

# Calculate metrics
val_auc = roc_auc_score(val_data['target'].values, val_predictions)
print(f"\nValidation AUC: {val_auc:.4f}")

# Confusion matrix
cm = confusion_matrix(val_data['target'].values, val_predictions_binary)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

# Classification report
print("\nClassification Report:")
print(classification_report(val_data['target'].values, val_predictions_binary, 
                          target_names=['Benign', 'Malignant']))

## Predictions on Test Set

In [None]:
# Test data generator
test_datagen = ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_dataframe(
    test_df,
    x_col='image_path',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode=None,
    shuffle=False
)

# Generate predictions
test_predictions = model.predict(test_generator, verbose=1)

# Create submission file
submission = pd.DataFrame({
    'image_name': test_df['image_name'],
    'target': test_predictions.flatten()
})

submission.to_csv('submission.csv', index=False)
print("\nSubmission file created: submission.csv")
print(submission.head())

## Save Model

In [None]:
# Save final model
model.save('melanoma_classifier_final.h5')
print("Model saved as melanoma_classifier_final.h5")

# Save model in TensorFlow SavedModel format (recommended)
model.save('melanoma_classifier_saved_model')
print("Model saved as melanoma_classifier_saved_model/")

## Model Analysis - Sample Predictions

In [None]:
# Visualize some predictions
sample_indices = np.random.choice(len(val_data), 8, replace=False)
sample_data = val_data.iloc[sample_indices]

fig, axes = plt.subplots(2, 4, figsize=(16, 8))
axes = axes.ravel()

for idx, (ax, (_, row)) in enumerate(zip(axes, sample_data.iterrows())):
    img = Image.open(row['image_path'])
    ax.imshow(img)
    
    # Get prediction
    img_array = cv2.imread(row['image_path'])
    img_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
    img_array = img_array / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    pred = model.predict(img_array, verbose=0)[0][0]
    
    true_label = 'Malignant' if row['target'] == 1 else 'Benign'
    pred_label = 'Malignant' if pred > 0.5 else 'Benign'
    color = 'green' if true_label == pred_label else 'red'
    
    ax.set_title(f"True: {true_label}\nPred: {pred_label} ({pred:.2f})", color=color)
    ax.axis('off')

plt.tight_layout()
plt.show()