<h1 style = 'text-align: center'>Skin Cancer Detection Using Machine Learning</h1>

<h2 style = 'text-align: center'>Import Necessary Libraries</h2>

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

In [None]:
! pip install kaggle

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
! mkdir ~/.kaggle

In [None]:
!cp /content/drive/MyDrive/kaggle_API_credentials/kaggle.json ~/.kaggle/kaggle.json

In [None]:
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
! kaggle datasets download -d hasnainjaved/melanoma-skin-cancer-dataset-of-10000-images

In [None]:
! unzip /content/melanoma-skin-cancer-dataset-of-10000-images.zip

In [None]:
import numpy as np
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from tensorflow.keras import backend
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.applications import ResNet101V2, DenseNet121, InceptionV3
from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Flatten, Dense, Dropout

<h2 style = 'text-align : center'>Data Preprocessing</h2>

<h3 style = 'text-align : center'>Define data directory</h3>

In [None]:
# Define the directory containing the training dataset

data_dir = '/content/melanoma_cancer_dataset/train'

# Define the directory containing the testing dataset

test_dir = '/content/melanoma_cancer_dataset/test'

<h3 style = 'text-align : center'>Define batch size and image size</h3>

In [None]:
# Define the batch size for training

batch_size = 64

# Define the dimensions for the images

img_height = 224

img_width = 224

<h3 style = 'text-align : center'>Load and the training and validation dataset</h3>

In [None]:
# Load and preprocess the training dataset

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
data_dir,
validation_split = 0.2,
subset = 'training',
seed = 42,
image_size = (img_height, img_width),
batch_size = batch_size
)

In [None]:
# Load and preprocess the validation dataset

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
data_dir,
validation_split = 0.2,
subset = 'validation',
seed = 42,
image_size = (img_height, img_width),
batch_size = batch_size
)

In [None]:
# Load and preprocess the testing dataset

test_generator = ImageDataGenerator(

    rescale = 1.0 /255.0    # Rescale the pixel values to range 0 - 1
)

test_ds = test_generator.flow_from_directory(
    test_dir,
    target_size = (img_height, img_width),
    batch_size = batch_size,
    class_mode='binary',
    shuffle = False
)

<h3 style = 'text-align : center'>Data Visualisation</h3>

In [None]:
# Get the class names from the training dataset

class_names = train_ds.class_names

# Print the class names

print(class_names)

In [None]:
# Create a count plot of the classes in the skin cancer dataset

# Extract the labels from train_ds

labels = []

for idx, label in train_ds:
    labels.extend(label.numpy().tolist())

# Convert the labels to a NumPy array
labels_array = np.array(labels)

# Get the unique labels and their counts
unique_labels, label_counts = np.unique(labels_array, return_counts = True)

# Create a count plot using Matplotlib

plt.figure(figsize = (8, 6))

hist, bins, idx = plt.hist(labels_array, bins = np.arange(labels_array.min(), labels_array.max() +2) - 0.5,
                         rwidth = 0.8, alpha = 0.75, color = 'pink', edgecolor = 'k')

plt.xlabel("Dataset Classes")
plt.ylabel("Count")
plt.xticks(unique_labels, class_names)
plt.title("Count Plot of the Classes in the Train Set", y = 1.05)


# Add the total count for each class on top of each bar

for i, count in enumerate(label_counts):

    plt.text(unique_labels[i], count, str(count), ha = 'center', va = 'bottom')

plt.show()

In [None]:
# Print 16 images (4 x 4) with their labels from the training dataset

# Create a figure for displaying images and set size to (10, 10)


plt.figure(figsize = (10, 10))

# Iterate over the first batch of images and labels in the training dataset
for images, labels in train_ds.take(1):

  # Loop through each image in the batch
  for i in range(16):

    # Create a subplot to display each image
    ax = plt.subplot(4, 4, i + 1)

    # Display the image
    plt.imshow(images[i].numpy().astype("uint8"))

    # Set the title of the subplot to the corresponding class name
    plt.title(class_names[labels[i]])

    # Turn off axis labels
    plt.axis("off")

# Display the plot

plt.show()

<h3 style = 'text-align : center'>Data Augmentation</h3>

In [None]:
# Data augmentation for the train dataset

data_augmentation = tf.keras.Sequential([
tf.keras.layers.experimental.preprocessing.RandomFlip('horizontal'),  # Randomly flips the images horizontally.
tf.keras.layers.experimental.preprocessing.RandomRotation(0.2),       # Randomly rotates the images by up to 20%
tf.keras.layers.experimental.preprocessing.RandomZoom(0.2),           # Randomly zooms the images by up to 20%
tf.keras.layers.experimental.preprocessing.Rescaling(1.0 / 255)       # Rescale the pixel values to range 0 - 1
])

# Apply data augmentation to the train dataset

train_ds = train_ds.map(lambda x, y: (data_augmentation(x, training = True), y))

<h3 style = 'text-align : center'>Normalization</h3>

In [None]:
# Normalization for val dataset

val_normalization = tf.keras.layers.experimental.preprocessing.Rescaling(1./255)

# Apply normalization to the test dataset

val_ds = val_ds.map(lambda x, y: (val_normalization(x), y))

<h3 style = 'text-align : center'>Class Weights</h3>

In [None]:
# Calculate the class weights

# Getting the class labels in the dataset
labels = [label.numpy() for _, label in train_ds]

# Joining the labels together and converting to a list
labels = np.concatenate(labels).tolist()

# Using the compute_class_weight method from the sklearn module to calculate the class weights
class_weights = compute_class_weight(
                                        class_weight = "balanced",
                                        classes = np.unique(labels),
                                        y = labels
                                    )
# Create a dictionary with the class names as keys and corresponding weights
class_weights = dict(zip(np.unique(labels), class_weights))

class_weights

## CNN Model

#### Define the CNN Architecture

In [None]:
# Define the CNN architecture using Sequential model

model = Sequential([

    # The input layer
    layers.Conv2D(16, 3, padding = 'same', activation = 'relu', input_shape = (img_height,img_width, 3)),
    layers.MaxPooling2D(),

    # First hidden layer
    layers.Conv2D(32, 3, padding = 'same', activation = 'relu'),
    layers.MaxPooling2D(),

    # Second hidden layer
    layers.Conv2D(32, 3, padding = 'same', activation = 'relu'),
    layers.MaxPooling2D(),

    # Flattening layer
    layers.Flatten(),

    # Dense layer
    layers.Dense(64, activation = 'relu'),
    layers.Dropout(0.5), # Apply dropout regularisation

    # Output layer
    layers.Dense(1, activation = 'sigmoid')
])

# Print the model's architecture summary

model.summary()

#### Train the CNN Model

In [None]:
# Compile the CNN model

model.compile(optimizer = Adam(1e-5),
loss = 'BinaryCrossentropy',
metrics = ['accuracy'])

In [None]:
# Define the number of epochs for training

epochs = 50

# Train the model using the train dataset and validate using the val dataset

history = model.fit(
train_ds,
validation_data = val_ds,
class_weight = class_weights,
epochs = epochs
)

# Extract accuracy, validation accuracy, loss, and validation loss from the training history

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

# Define the range of epochs for plotting

epochs_range = range(epochs)

#### CNN Model Visualisation

In [None]:
# Plot for CNN Model Training and Validation Accuracy

plt.figure(figsize = (8, 8))

plt.plot(epochs_range, acc, label = 'Training Accuracy')
plt.plot(epochs_range, val_acc, label = 'Validation Accuracy')
plt.legend(loc = 'lower right')
plt.title('CNN Model Training and Validation Accuracy')

# Diplay the plot

plt.show()

In [None]:
# Plot for CNN ModelTraining and Validation Loss

plt.figure(figsize = (8, 8))

plt.plot(epochs_range, loss, label = 'Training Loss')
plt.plot(epochs_range, val_loss, label = 'Validation Loss')
plt.legend(loc = 'upper right')
plt.title('CNN ModelTraining and Validation Loss')

# Diplay the plot

plt.show()

In [None]:
# Define the class labels

class_labels = ['Benign', 'Malignant']

# Get the test labels

y_true = test_ds.labels

In [None]:
# Get the prediction values using the trained model on the test dataset

y_pred = model.predict(test_ds)

# Get the class labels of the prediction values

y_pred = np.round(y_pred).flatten()

In [None]:
# print the CNN model's classification report

print(classification_report(y_true, y_pred, target_names = class_labels))

In [None]:
# Plot the CNN model's confusion matrix

cnn_cm = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(8, 6))

sns.heatmap(cnn_cm, annot = True, fmt = "d", cmap = "Blues", cbar = True, xticklabels = class_labels,
             yticklabels = class_labels)

plt.title('CNN Model Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')


plt.show()

## Pretrained Models

In [None]:
# Define a class Finetune_pretrained_models

class Finetune_pretrained_models:

    # Method to initializes the class Finetune_pretrained_models
    def __init__(self, base_model):

        # Clears the background session before training a new model
        tf.keras.backend.clear_session()

        # Loads a pre-trained model
        self.base_model = base_model(weights = 'imagenet', include_top = False,
                                           input_shape = (224, 224, 3))
        self.model = None
        self.history = None
        self.finetuned_history = None
        self.model_name = None

    # Method to freeze the layers of the pre-trained model
    def freeze_pretrained_model_layers(self):

        self.base_model.trainable = False

    # Method to unfreeze the layers and compile the pre-trained model
    def unfreeze_pretrained_model_layers(self, learning_rate):

        self.base_model.trainable = True

        self.model.summary()

        self.model.compile(

            optimizer = Adam(learning_rate),
            loss = 'BinaryCrossentropy',
            metrics = ['accuracy']
        )

    # Method to define the architecture of the pretrained model
    def define_pretrained_model_architecture(self, dropout_value, learning_rate):

        inputs = tf.keras.Input(shape = (224, 224, 3))
        x = inputs
        x = self.base_model(x, training = False)
        x = tf.keras.layers.GlobalAveragePooling2D()(x)
        x = tf.keras.layers.Dropout(dropout_value)(x)

        outputs = tf.keras.layers.Dense(1, activation = 'sigmoid')(x)

        self.model = tf.keras.Model(inputs, outputs)

        self.model.compile(

            optimizer = Adam(learning_rate),
            loss = 'BinaryCrossentropy',
            metrics = ['accuracy']
        )

        return self.model.summary()


    # Method to fit the pretrained model on the dataset
    def fit_model(self, epochs):
        self.history = self.model.fit(

            train_ds,
            epochs = epochs,
            class_weight = class_weights,
            validation_data = val_ds
        )

    # Method to fit the finetuned pretrained model on the dataset
    def fit_fine_tuned_model(self, epochs):
        self.fine_tuned_history = self.model.fit(
            train_ds,
            epochs = epochs,
            class_weight = class_weights,
            validation_data = val_ds,
        )

    # Method to plot the training and validation accuracy and loss for the pretrained model
    def plot_accuracy_and_loss(self, model_name, acc_y_lower_lim, loss_y_upper_limit):

        initial_epochs = self.history.epoch[-1]

        acc = self.history.history['accuracy'] + self.fine_tuned_history.history['accuracy']
        val_acc = self.history.history['val_accuracy'] + self.fine_tuned_history.history['val_accuracy']
        loss = self.history.history['loss'] + self.fine_tuned_history.history['loss']
        val_loss = self.history.history['val_loss'] + self.fine_tuned_history.history['val_loss']

        # Calculate the y-axis tick positions for increments of 0.2
        acc_y_ticks = np.arange(acc_y_lower_lim, 1.02, 0.02)
        loss_y_ticks = np.arange(0, loss_y_upper_limit + 0.02, 0.02)



        plt.figure(figsize = (8, 8))
        plt.subplot(2, 1, 1)
        plt.plot(acc, label = 'Training Accuracy')
        plt.plot(val_acc, label = 'Validation Accuracy')

        plt.ylim([acc_y_lower_lim, 1])
        plt.plot([initial_epochs - 0.15, initial_epochs - 0.15],
        plt.ylim(), label = 'Start Fine Tuning')
        plt.legend(loc = 'lower right')
        plt.title(f'Training and Validation Accuracy for {model_name}')
        plt.subplot(2, 1, 2)
        plt.plot(loss, label = 'Training Loss')
        plt.plot(val_loss, label = 'Validation Loss')


        plt.ylim([0, loss_y_upper_limit])
        plt.plot([initial_epochs - 0.15,initial_epochs - 0.15],
        plt.ylim(), label = 'Start Fine Tuning')
        plt.legend(loc = 'upper right')
        plt.title(f'Training and Validation Loss for {model_name}')
        plt.xlabel('epoch')

        plt.show()

    # Method to print classification report and plot confusion matrix
    def classification_report_and_confusion_metrics(self, test_ds):
      y_true = test_ds.labels

      class_labels = ['Benign', 'Malignant']

      y_pred = self.model.predict(test_ds)

      y_pred = np.round(y_pred).flatten()

      print(classification_report(y_true, y_pred, target_names = class_labels, digits = 4))


      cnn_cm = confusion_matrix(y_true, y_pred)

      plt.figure(figsize=(10, 8))

      sns.heatmap(cnn_cm, annot = True, fmt = "d", cmap = "Blues", cbar = True, xticklabels = class_labels,
                  yticklabels = class_labels)

      plt.title('CNN Model Confusion Matrix')
      plt.xlabel('Predicted Labels')
      plt.ylabel('True Labels')

      plt.show()

#### VGG16

In [None]:
# Creates an instance of the class Finetune_pretrained_models for VGG16 model

VGG16 = Finetune_pretrained_models(tf.keras.applications.VGG16)

In [None]:
# Freeze all the layers of the VGG16 model

VGG16.freeze_pretrained_model_layers()

In [None]:
# Define the architecture and print the summary for the VGG16 model

VGG16.define_pretrained_model_architecture(0.2, 1e-4)

In [None]:
# Fit the VGG16 model on the skin cancer dataset

VGG16.fit_model(10)

In [None]:
# Finetune the VGG16 model

VGG16.unfreeze_pretrained_model_layers(1e-7)

In [None]:
# Fit the finetuned VGG16 model on the skin cancer dataset

VGG16.fit_fine_tuned_model(10)

In [None]:
# Plot the training and validation accuracy and loss

VGG16.plot_accuracy_and_loss('VGG16', 0.50, 1.0)

In [None]:
# Print the classification report and plot the confusion matrix

VGG16.classification_report_and_confusion_metrics(test_ds)

#### InceptionV3

In [None]:
# Creates an instance of the class Finetune_pretrained_models for InceptionV3 model

InceptionV3 = Finetune_pretrained_models(tf.keras.applications.InceptionV3)

In [None]:
# Freeze all the layers of the InceptionV3 model

InceptionV3.freeze_pretrained_model_layers()

In [None]:
# Define the architecture and print the summary for the InceptionV3 model

InceptionV3.define_pretrained_model_architecture(0.2, 1e-4)

In [None]:
# Fit the InceptionV3 model on the skin cancer dataset

InceptionV3.fit_model(10)

In [None]:
# Finetune the InceptionV3 model

InceptionV3.unfreeze_pretrained_model_layers(1e-7)

In [None]:
# Fit the finetuned InceptionV3 model on the skin cancer dataset

InceptionV3.fit_fine_tuned_model(10)

In [None]:
# Plot the training and validation accuracy and loss

InceptionV3.plot_accuracy_and_loss('InceptionV3', 0.50, 1.0)

In [None]:
# Print the classification report and plot the confusion matrix

InceptionV3.classification_report_and_confusion_metrics(test_ds)

#### ResNet101

In [None]:
# Creates an instance of the class Finetune_pretrained_models for ResNet101 model

ResNet101 = Finetune_pretrained_models(tf.keras.applications.ResNet101)

In [None]:
# Freeze all the layers of the ResNet101 model

ResNet101.freeze_pretrained_model_layers()

In [None]:
# Define the architecture and print the summary for the ResNet101 model

ResNet101.define_pretrained_model_architecture(0.2, 1e-4)

In [None]:
# Fit the ResNet101 model on the skin cancer dataset

ResNet101.fit_model(10)

In [None]:
# Finetune the ResNet101 model

ResNet101.unfreeze_pretrained_model_layers(1e-7)

In [None]:
# Fit the finetuned ResNet101 model on the skin cancer dataset

ResNet101.fit_fine_tuned_model(10)

In [None]:
# Plot the training and validation accuracy and loss

ResNet101.plot_accuracy_and_loss('ResNet101', 0.50, 1.0)

In [None]:
# Print the classification report and plot the confusion matrix

ResNet101.classification_report_and_confusion_metrics(test_ds)

#### MobileNetV2

In [None]:
# Creates an instance of the class Finetune_pretrained_models for MobileNetV2 model

MobileNetV2 = Finetune_pretrained_models(tf.keras.applications.MobileNetV2)

In [None]:
# Freeze all the layers of the MobileNetV2 model

MobileNetV2.freeze_pretrained_model_layers()

In [None]:
# Define the architecture and print the summary for the MobileNetV2 model

MobileNetV2.define_pretrained_model_architecture(0.2, 1e-4)

In [None]:
# Fit the MobileNetV2 model on the skin cancer dataset

MobileNetV2.fit_model(10)

In [None]:
# Finetune the MobileNetV2 model

MobileNetV2.unfreeze_pretrained_model_layers(1e-7)

In [None]:
# Fit the finetuned MobileNetV2 model on the skin cancer dataset

MobileNetV2.fit_fine_tuned_model(10)

In [None]:
# Plot the training and validation accuracy and loss

MobileNetV2.plot_accuracy_and_loss('MobileNetV2', 0.50, 1.0)

In [None]:
# Print the classification report and plot the confusion matrix

MobileNetV2.classification_report_and_confusion_metrics(test_ds)