In [1]:
# Import Required Libraries
import os
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
import seaborn as sns
import cv2
from PIL import Image
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print("All libraries imported successfully!")
print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")

ModuleNotFoundError: No module named 'tensorflow'


classification of skin cancer images (Benign vs Malignant) using transfer learning with MobileNetV2.

## Load and Explore Dataset

In [None]:
# Define dataset paths
data_dir = Path("../data")

# Load dataset using image_dataset_from_directory
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
SEED = 42

# Load all data
dataset = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    seed=SEED,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    validation_split=0.2,
    subset=None  # None to get full dataset first
)

# Split into train and validation
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    seed=SEED,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    validation_split=0.2,
    subset='training'
)

val_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    seed=SEED,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    validation_split=0.2,
    subset='validation'
)

class_names = train_dataset.class_names
num_classes = len(class_names)

print(f"Classes: {class_names}")
print(f"Number of classes: {num_classes}")
print(f"Training batches: {len(train_dataset)}")
print(f"Validation batches: {len(val_dataset)}")

In [None]:
# Visualize sample images from dataset
plt.figure(figsize=(12, 8))
for images, labels in train_dataset.take(1):
    for i in range(min(9, len(images))):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        label_idx = labels[i].numpy()
        label_name = class_names[int(label_idx)]
        plt.title(f"Label: {label_name}")
        plt.axis("off")
plt.suptitle("Sample Images from Dataset", fontsize=16)
plt.tight_layout()
plt.show()

# Count images per class
print("\nDataset Statistics:")
benign_count = len([f for f in (data_dir / "benign").glob("*.*")])
malignant_count = len([f for f in (data_dir / "malignant").glob("*.*")])
print(f"Benign images: {benign_count}")
print(f"Malignant images: {malignant_count}")
print(f"Total images: {benign_count + malignant_count}")

## Data Preprocessing and Augmentation

In [None]:
# Normalize pixel values and create augmentation pipeline
def normalize(image, label):
    """Normalize pixel values to 0-1 range"""
    return image / 255.0, label

# Apply normalization to datasets
train_dataset = train_dataset.map(normalize).prefetch(tf.data.AUTOTUNE)
val_dataset = val_dataset.map(normalize).prefetch(tf.data.AUTOTUNE)

# Create data augmentation pipeline
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.2),
    layers.RandomZoom(0.2),
    layers.RandomBrightness(0.2),
], name="data_augmentation")

# Apply augmentation to training data
train_dataset_augmented = train_dataset.map(
    lambda x, y: (data_augmentation(x, training=True), y),
    num_parallel_calls=tf.data.AUTOTUNE
).prefetch(tf.data.AUTOTUNE)

print("Data augmentation pipeline created successfully!")
print("\nAugmentation layers:")
print("- RandomFlip (horizontal)")
print("- RandomRotation (0.2)")
print("- RandomZoom (0.2)")
print("- RandomBrightness (0.2)")

In [None]:
# Visualize augmented images
plt.figure(figsize=(12, 8))
for images, labels in train_dataset_augmented.take(1):
    for i in range(min(9, len(images))):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy())
        label_idx = labels[i].numpy()
        label_name = class_names[int(label_idx)]
        plt.title(f"Augmented: {label_name}")
        plt.axis("off")
plt.suptitle("Sample Augmented Images", fontsize=16)
plt.tight_layout()
plt.show()

## Build Transfer Learning Model with MobileNetV2

In [None]:
# Load pretrained MobileNetV2 model
base_model = MobileNetV2(
    input_shape=(224, 224, 3),
    include_top=False,
    weights="imagenet"
)

# Freeze base model weights
base_model.trainable = False

print(f"Base model loaded: MobileNetV2")
print(f"Base model parameters: {base_model.count_params():,}")

# Build classification head
model = models.Sequential([
    layers.Input(shape=(224, 224, 3)),
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu', name='dense_1'),
    layers.Dropout(0.3, name='dropout_1'),
    layers.Dense(1, activation='sigmoid', name='output')
], name='skin_cancer_classifier')

print("\n" + "="*50)
print("Model Architecture Summary")
print("="*50)
model.summary()

In [None]:
# Compile model
optimizer = Adam(learning_rate=1e-4)
model.compile(
    loss='binary_crossentropy',
    optimizer=optimizer,
    metrics=['accuracy', 
             tf.keras.metrics.Precision(name='precision'),
             tf.keras.metrics.Recall(name='recall')]
)

print("\nModel compiled successfully!")
print(f"Loss: binary_crossentropy")
print(f"Optimizer: Adam (lr=1e-4)")
print(f"Metrics: Accuracy, Precision, Recall")

## Train the Model

In [None]:
# Define callbacks
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True,
    verbose=1
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    min_lr=1e-7,
    verbose=1
)

print("Callbacks configured:")
print("- EarlyStopping (patience=5, monitor=val_loss)")
print("- ReduceLROnPlateau (factor=0.5, patience=3)")

In [None]:
# Train the model
EPOCHS = 25

print("Starting model training...")
print("="*50)

history = model.fit(
    train_dataset_augmented,
    validation_data=val_dataset,
    epochs=EPOCHS,
    callbacks=[early_stopping, reduce_lr],
    verbose=1
)

print("="*50)
print("Training completed!")

## Evaluate Model Performance

In [None]:
# Collect true labels and predictions
y_true = []
y_pred = []
y_pred_prob = []

for images, labels in val_dataset:
    predictions = model.predict(images, verbose=0)
    y_true.extend(labels.numpy())
    y_pred_prob.extend(predictions.flatten())
    y_pred.extend((predictions > 0.5).astype(int).flatten())

y_true = np.array(y_true)
y_pred = np.array(y_pred)
y_pred_prob = np.array(y_pred_prob)

# Calculate metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

print("\n" + "="*50)
print("MODEL EVALUATION METRICS")
print("="*50)
print(f"Accuracy:  {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1-Score:  {f1:.4f}")
print("="*50)

# Print classification report
print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=class_names))

In [None]:
# Confusion Matrix
cm = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=class_names, yticklabels=class_names,
            cbar_kws={'label': 'Count'})
plt.title('Confusion Matrix - Validation Set', fontsize=14, fontweight='bold')
plt.ylabel('True Label', fontsize=12)
plt.xlabel('Predicted Label', fontsize=12)
plt.tight_layout()
plt.show()

print("\nConfusion Matrix:")
print(cm)

In [None]:
# Plot training history
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Loss plot
axes[0].plot(history.history['loss'], label='Training Loss', linewidth=2)
axes[0].plot(history.history['val_loss'], label='Validation Loss', linewidth=2)
axes[0].set_xlabel('Epoch', fontsize=12)
axes[0].set_ylabel('Loss', fontsize=12)
axes[0].set_title('Training vs Validation Loss', fontsize=14, fontweight='bold')
axes[0].legend(fontsize=11)
axes[0].grid(True, alpha=0.3)

# Accuracy plot
axes[1].plot(history.history['accuracy'], label='Training Accuracy', linewidth=2)
axes[1].plot(history.history['val_accuracy'], label='Validation Accuracy', linewidth=2)
axes[1].set_xlabel('Epoch', fontsize=12)
axes[1].set_ylabel('Accuracy', fontsize=12)
axes[1].set_title('Training vs Validation Accuracy', fontsize=14, fontweight='bold')
axes[1].legend(fontsize=11)
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("Training and validation curves displayed.")

## Save Model and Labels

In [None]:
# Save model
model_save_path = Path("../model/model.h5")
model_save_path.parent.mkdir(parents=True, exist_ok=True)

model.save(model_save_path)
print(f"Model saved to: {model_save_path}")

# Save labels
labels_save_path = Path("../model/labels.txt")
with open(labels_save_path, 'w') as f:
    for label in class_names:
        f.write(f"{label}\n")
print(f"Labels saved to: {labels_save_path}")

print("\nModel and labels saved successfully!")

## Generate Grad-CAM Visualizations

In [None]:
# Grad-CAM Implementation
def generate_gradcam(model, img_array, layer_name):
    """
    Generate Grad-CAM heatmap for model prediction visualization
    
    Args:
        model: Trained Keras model
        img_array: Input image array (normalized, shape: (224, 224, 3))
        layer_name: Name of convolutional layer for visualization
    
    Returns:
        heatmap: Grad-CAM heatmap (224, 224)
    """
    # Create a model that outputs both predictions and layer activations
    grad_model = tf.keras.models.Model(
        [model.inputs],
        [model.get_layer(layer_name).output, model.output]
    )
    
    # Record operations for automatic differentiation
    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(np.expand_dims(img_array, axis=0))
        loss = predictions[:, 0]
    
    # Compute gradients
    output = conv_outputs[0]
    grads = tape.gradient(loss, conv_outputs)[0]
    
    # Compute weights (average pooling of gradients)
    weights = tf.reduce_mean(grads, axis=(0, 1))
    
    # Generate heatmap
    heatmap = tf.reduce_sum(tf.multiply(weights, output), axis=-1)
    heatmap = tf.maximum(heatmap, 0)  # ReLU
    heatmap /= tf.math.reduce_max(heatmap)  # Normalize to 0-1
    
    return heatmap.numpy()

def overlay_gradcam(img_array, heatmap, alpha=0.5):
    """
    Overlay Grad-CAM heatmap on original image
    
    Args:
        img_array: Original image (224, 224, 3)
        heatmap: Grad-CAM heatmap (224, 224)
        alpha: Transparency of heatmap overlay
    
    Returns:
        overlaid_img: Image with overlaid heatmap
    """
    # Resize heatmap to match image size
    heatmap_resized = cv2.resize(heatmap, (224, 224))
    
    # Create color heatmap (red for high activation)
    heatmap_colored = cv2.applyColorMap(
        (heatmap_resized * 255).astype(np.uint8),
        cv2.COLORMAP_JET
    )
    heatmap_colored = cv2.cvtColor(heatmap_colored, cv2.COLOR_BGR2RGB)
    
    # Overlay on image
    img_for_overlay = (img_array * 255).astype(np.uint8)
    overlaid = cv2.addWeighted(img_for_overlay, 1 - alpha, heatmap_colored, alpha, 0)
    
    return overlaid

print("Grad-CAM functions defined successfully!")

In [None]:
# Generate Grad-CAM example visualization
# Get a sample image from validation set
sample_images, sample_labels = next(iter(val_dataset.take(1)))
sample_img = sample_images[0].numpy()
sample_label = sample_labels[0].numpy()

# Generate Grad-CAM
layer_name = 'mobilenetv2_1_out_relu'  # Last layer before classification head
heatmap = generate_gradcam(model, sample_img, layer_name)
overlaid_img = overlay_gradcam(sample_img, heatmap, alpha=0.4)

# Get prediction
prediction = model.predict(np.expand_dims(sample_img, axis=0), verbose=0)[0][0]
pred_class = class_names[int(prediction > 0.5)]
pred_confidence = (prediction * 100) if prediction > 0.5 else ((1 - prediction) * 100)

# Visualize
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# Original image
axes[0].imshow(sample_img)
axes[0].set_title('Original Image', fontsize=12, fontweight='bold')
axes[0].axis('off')

# Heatmap
axes[1].imshow(heatmap, cmap='jet')
axes[1].set_title('Grad-CAM Heatmap', fontsize=12, fontweight='bold')
axes[1].axis('off')

# Overlaid image
axes[2].imshow(overlaid_img)
axes[2].set_title(f'Overlaid (Pred: {pred_class} - {pred_confidence:.2f}%)', 
                  fontsize=12, fontweight='bold')
axes[2].axis('off')

plt.tight_layout()
gradcam_path = Path("../model/gradcam_example.png")
plt.savefig(gradcam_path, dpi=150, bbox_inches='tight')
print(f"Grad-CAM example saved to: {gradcam_path}")
plt.show()