# Importing Libraries

In [None]:
import tensorflow as tf
tf.random.set_seed(42)
import tensorflow_hub as hub
from tensorflow.keras import layers
import os
from glob import glob
import pandas as pd
import numpy as np
np.random.seed(42)
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator

# Data Processing

## Making Directory of images

In [None]:
# Base directory of the dataset
base_dir = "/content/drive/MyDrive/Deep Learning/Skin Cancer MNIST HAM10000"

# Merging images from both folders HAM10000_images_part1.zip and HAM10000_images_part2.zip into one dictionary
# where image name maps to image path

image_id_path_dict = {os.path.splitext(os.path.basename(x))[0]: x
                        for x in glob(os.path.join(base_dir, '*', '*.jpg'))}

## Renaming and Adding Labels

In [None]:
# Reading Labels
scd_df = pd.read_csv(os.path.join(base_dir, 'HAM10000_metadata.csv'))

# Renaming cell type names to make it more user friendly
renamed_cell_type = {
    'nv': 'Melanocytic nevi',
    'mel': 'Melanoma',
    'bkl': 'Benign keratosis-like lesions',
    'bcc': 'Basal cell carcinoma',
    'akiec': 'Actinic keratoses',
    'vasc': 'Vascular lesions',
    'df': 'Dermatofibroma'
}
scd_df['cell_type'] = scd_df['dx'].map(renamed_cell_type)

# Adding path column to the dataframe
scd_df['path'] = scd_df['image_id'].map(image_id_path_dict.get)

# Making an array of all unique cells
unique_cell = np.unique(scd_df['cell_type'])

unique_cell

## Filling null values

In [None]:
# Filling null values of 'age' column by their mean value
scd_df['age'].fillna((scd_df['age'].mean()), inplace=True)

## EDA

In [None]:
# In this we will explore different features of the dataset , their distrubtions and actual counts

fig, ax1 = plt.subplots(1, 1, figsize= (10, 5))
scd_df['cell_type'].value_counts().plot(kind='bar', ax=ax1)

scd_df['cell_type'].value_counts()

## Balancing data by cell_type

In [None]:
# Group the DataFrame by 'cell_type' column
groups = scd_df.groupby('cell_type')

number_of_sample = 1000

# Define a function for resampling each group
def resample_group(group):
    if len(group) >= number_of_sample:
        # Oversample the group to have 500 rows
        return group.sample(n=number_of_sample, replace=True)
    else:
        # Undersample the group to have 500 rows
        return group.sample(n=number_of_sample, replace=True, random_state=42)

# Apply the resample_group function to each group
scd_df_balanced = groups.apply(resample_group)

# Reset the index of the new DataFrame
scd_df_balanced = scd_df_balanced.reset_index(drop=True)

# Plotting the balanced dataframe
fig, ax1 = plt.subplots(1, 1, figsize= (10, 5))
scd_df_balanced['cell_type'].value_counts().plot(kind='bar', ax=ax1)

In [None]:
scd_df_balanced.head(3)

In [None]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Create an instance of the ImageDataGenerator class with the desired augmentations
datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30, # rotation
    zoom_range=0.2, # zoom
    horizontal_flip=True, # horizontal flip
    vertical_flip=True, # vertical flip
    width_shift_range=0.1, # horizontal shift
    height_shift_range=0.1, # vertical shift
    fill_mode='nearest', # fill mode for any new pixels created during augmentation
)

## Spliting the dataset into training and validation

In [None]:
# Split the data into training and testing sets
train_df, test_df = train_test_split(scd_df_balanced, test_size=0.2, random_state=42)

# Create a flow for the training set
train_dataset = datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='path',
    y_col='cell_type',
    target_size=(128, 128),
    batch_size=32,
    class_mode='categorical'
)

# Create a flow for the testing set
val_dataset = datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col='path',
    y_col='cell_type',
    target_size=(128, 128),
    batch_size=32,
    class_mode='categorical'
)

## Turning data into batches

In [None]:
BATCH_SIZE = 32

### Visualizing Data Batches

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Get a batch of 25 images and their corresponding labels from the train_dataset
images, labels = next(train_dataset)

# Get the class indices from the train_dataset object
class_indices = train_dataset.class_indices

# Invert the class_indices dictionary to get a dictionary with cell type labels as keys and index numbers as values
inv_class_indices = {v: k for k, v in class_indices.items()}

# Create a list of the cell type labels
cell_type_labels = [inv_class_indices[i] for i in range(len(inv_class_indices))]

# Define a function to plot the images and their cell types
def plot_images(images, cell_types):
    fig, axes = plt.subplots(nrows=5, ncols=5, figsize=(10, 10))
    axes = axes.flatten()
    for img, cell_type, ax in zip(images, cell_types, axes):
        ax.imshow(img)
        ax.set_title(cell_type_labels[cell_type])
        ax.axis('off')
    plt.tight_layout()
    plt.show()

# Call the plot_images() function with the images and their actual cell types
cell_types = [np.argmax(label) for label in labels]
plot_images(images, cell_types)

# Building a model

## Creating callback function

In [None]:
# TensorBoard callback

# Loading the TensorBoard notebook extension
%load_ext tensorboard

import datetime

# Create a function to build a TensorBoard callback
def create_tensorboard_callback(model_name):
    # Create a log directory for storing TensorBoard logs
    path = f"/content/drive/MyDrive/Deep Learning/Skin Cancer MNIST HAM10000/logs/{model_name}"
    logdir = os.path.join(path,datetime.datetime.now().strftime("%m%d-%H%M%S"))

    return tf.keras.callbacks.TensorBoard(logdir)

## Setting input and output and models

In [None]:
# Setting img size
IMG_SIZE = 128

# Setting input shape to the model
INPUT_SHAPE = [IMG_SIZE, IMG_SIZE, 3]

# Importing model url
efficientnet_url = "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_l/classification/2"
mobilenetV2_url = "https://tfhub.dev/google/imagenet/mobilenet_v2_050_128/feature_vector/5"
resnet50V2_url = "https://tfhub.dev/google/imagenet/resnet_v2_50/classification/5"

## Setting up Keras model learning function

In [None]:
# Create a function which builds a Keras model

def create_model(model_url):

    NUM_OF_LABELS = 7
    # Download the pretrained model and save it as a Keras layer
    feature_extractor_layer = hub.KerasLayer(model_url, trainable=False, name="feature_extraction_layer", input_shape=INPUT_SHAPE)

    # Create model
    model = tf.keras.Sequential([
        feature_extractor_layer,
        layers.Dense(NUM_OF_LABELS, activation="softmax", name="output-layer")
    ])
    # Compile the model
    model.compile(
        loss="categorical_crossentropy",
        optimizer=tf.keras.optimizers.Adam(),
        metrics=["accuracy"]
    )

    return model

## Creating TF Hub Feature Extraction model

In [None]:
NUM_EPOCHS = 50 #An epoch means training the neural network with all the training data for one cycle

# Build a function to train and return a trained model
def train_model(model_url, model_name):

    # Create a model
    model = create_model(model_url)

    # Fit the model to the data passing it the callbacks we created
    history = model.fit(x=train_dataset,
        epochs=NUM_EPOCHS,
        validation_data=val_dataset,
        validation_freq=1,
        callbacks=[create_tensorboard_callback(model_name), tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)])

    # Return the fitted model
    return model,history

In [None]:
# RUN MODEL
resnet50V2_model, resnet50V2_history = train_model(resnet50V2_url, "resnet50V2")
resnet50V2_model.save("/content/drive/MyDrive/Deep Learning/Skin Cancer MNIST HAM10000/Models/resnet")

In [None]:
mobilenetV2_model, mobilenetV2_history = train_model(mobilenetV2_url, "mobilenetV2")
mobilenetV2_model.save("/content/drive/MyDrive/Deep Learning/Skin Cancer MNIST HAM10000/Models/mobilenet")

In [None]:
efficientnet_model,efficientnet_history = train_model(efficientnet_url, "efficientnetV2-L")
efficientnet_model.save("/content/drive/MyDrive/Deep Learning/Skin Cancer MNIST HAM10000/Models/efficientnet")

In [None]:
import tensorflow as tf
import datetime
# Load the TensorBoard notebook extension
%load_ext tensorboard

# Start TensorBoard
%tensorboard --logdir /content/drive/MyDrive/Deep\ Learning/Skin\ Cancer\ MNIST\ HAM10000/logs


## Evaluation

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def plot_model_history(model_history, model_name, save_directory):
    """
    Plots the accuracy and loss curves for a given model history.

    Args:
        model_history (keras.callbacks.History): History object obtained from model training
        model_name (str): Name of the model for labeling the plots

    Returns:
        None
    """
    # Plot accuracy
    plt.figure(figsize=(12, 6))
    plt.plot(model_history.history['accuracy'], 'r-o', label='Training Accuracy')
    plt.plot(model_history.history['val_accuracy'], 'b-o', label='Validation Accuracy')
    plt.title(f'{model_name} Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(loc='best')
    plt.grid(True)
    # Save the plot as an image file
    file_name = f'{model_name}_accuracy.png'
    file_path = os.path.join(save_directory, file_name)
    plt.savefig(file_path)
    print(f'Successfully saved model accuracy plot to {file_path}.')
    plt.show()

    # Plot loss
    plt.figure(figsize=(12, 6))
    plt.plot(model_history.history['loss'], 'r-o', label='Training Loss')
    plt.plot(model_history.history['val_loss'], 'b-o', label='Validation Loss')
    plt.title(f'{model_name} Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(loc='best')
    plt.grid(True)
    # Save the plot as an image file
    file_name = f'{model_name}_loss.png'
    file_path = os.path.join(save_directory, file_name)
    plt.savefig(file_path)
    print(f'Successfully saved model loss plot to {file_path}.')
    plt.show()


save_path = "/content/drive/MyDrive/Deep Learning/Skin Cancer MNIST HAM10000/Evaluation"

plot_model_history(resnet50V2_history, "Resnet-50 V2", save_path)
plot_model_history(mobilenetV2_history, "Mobilenet V2", save_path)
plot_model_history(efficientnet_history, "Efficientnet V2", save_path)

## Ploting the confusion matrix and print the accuracies, precision, recall and f1 scores.

In [None]:
def plot_confusion_matrix(model, predictions, predicted_labels, true_labels, save_path):
    # Compute the confusion matrix
    cm = confusion_matrix(true_labels, predicted_labels)
    classes = unique_labels(true_labels, predicted_labels)
    # Plot the confusion matrix
    fig, ax = plt.subplots()
    im = ax.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    ax.figure.colorbar(im, ax=ax)
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           xticklabels=classes, yticklabels=classes,
           title=f'Confusion Matrix for {model_name}',
           ylabel='True label',
           xlabel='Predicted label')

    # Loop over data dimensions and create text annotations
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, cm[i, j],
                    ha="center", va="center",
                    color="white" if cm[i, j] > cm.max() / 2 else "black")

    # Save the plot as an image file
    file_name = f'{model_name}_cm.png'
    file_path = os.path.join(save_path, file_name)
    plt.savefig(file_path)

    plt.show()

def plot_roc(model, predictions, predicted_labels, true_labels, save_path):

    predicted_labels = label_binarize(np.argmax(predictions, axis=1), classes=np.arange(7))

    # Convert true labels to one-hot encoded format
    true_labels = label_binarize(test_labels, classes=np.arange(7))

    # Compute the ROC curve and AUC score for each class
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(7):
        fpr[i], tpr[i], _ = roc_curve(true_labels[:, i], predicted_labels[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Plot the ROC curves
    plt.figure()
    colors = cycle(['aqua', 'darkorange', 'cornflowerblue', 'purple', 'green', 'red', 'brown'])
    for i, color in zip(range(7), colors):
        plt.plot(fpr[i], tpr[i], color=color, lw=2, label=f'ROC curve class {i} (area = {roc_auc[i]:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'ROC Curve for {model_name}')
    plt.legend(loc="lower right")

    # Save the plot as an image file
    file_name = f'{model_name}_roc.png'
    file_path = os.path.join(save_path, file_name)
    plt.savefig(file_path)

    plt.show()


def plot_precision_recall(model, predictions, predicted_labels, true_labels, save_path):

    # Convert true labels to one-hot encoded format
    true_labels = label_binarize(test_labels, classes=np.arange(7))

    # Compute the precision-recall curve and average precision score for each class
    precision = dict()
    recall = dict()
    average_precision = dict()
    for i in range(7):
        precision[i], recall[i], _ = precision_recall_curve(true_labels[:, i], predictions[:, i])
        average_precision[i] = average_precision_score(true_labels[:, i], predictions[:, i])

    # Plot the precision-recall curves
    plt.figure()
    colors = cycle(['aqua', 'darkorange', 'cornflowerblue', 'purple', 'green', 'red', 'brown'])
    for i, color in zip(range(7), colors):
        plt.plot(recall[i], precision[i], color=color, lw=2, label=f'Precision-Recall curve class {i} (AP = {average_precision[i]:.2f})')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title(f'Precision-Recall Curve for {model_name}')
    plt.legend(bbox_to_anchor=(1.05, 1.0), loc='upper left')  # Move legend to top right

    # Save the plot as an image file
    file_name = f'{model_name}_precision_recall.png'
    file_path = os.path.join(save_path, file_name)
    plt.savefig(file_path, bbox_inches='tight')  # Save with tight layout

    plt.show()


def plot_classification_report(model, predictions, predicted_labels, test_labels, save_path):

    # Define the class names for your specific dataset
    class_names = unique_cell

    # Convert true labels to one-hot encoded format
    true_labels = label_binarize(test_labels, classes=np.arange(7))

    # Convert predictions to one-hot encoded format
    predicted_labels = label_binarize(np.argmax(predictions, axis=1), classes=np.arange(7))

    # Compute the classification report
    report = classification_report(true_labels, predicted_labels, target_names=class_names)

    # Create a Pandas data frame from the classification report
    # report_df = pd.read_csv(io.StringIO(report), sep='\s+')
    report_df = pd.read_csv(io.StringIO(report), delimiter=':', skiprows=2, header=None, names=class_names)

    # Save the data frame to an Excel file with the model name in the title
    file_name = f'{model_name}_classification_report.xlsx'
    file_path = os.path.join(save_path, file_name)
    report_df.to_excel(file_path, index=False)

    # Print the classification report to the console
    print(f'Classification Report for {model_name}:')
    print(report)

In [None]:
# Initialize empty lists for images and labels
images_list = []
labels_list = []

# Loop through the dataset and extract images and labels from each batch
for batch in val_dataset:
    images = batch[0]
    labels = batch[1]
    images_list.append(images)
    labels_list.append(labels)

    # Break the loop if all batches have been processed
    if len(images_list) * 32 >= val_dataset.samples:
        break

# Concatenate the lists to get the final arrays
test_images = np.concatenate(images_list, axis=0)
test_labels = np.concatenate(labels_list, axis=0)

# Print the shape of the arrays
print("Images shape:", test_images.shape)
print("Labels shape:", test_labels.shape)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import label_binarize
import os
import io
from sklearn.metrics import classification_report
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import precision_recall_curve, average_precision_score
from itertools import cycle
from sklearn.utils.multiclass import unique_labels

save_path_roc = "/content/drive/MyDrive/Deep Learning/Skin Cancer MNIST HAM10000/Evaluation/ROC"
save_path_cm = "/content/drive/MyDrive/Deep Learning/Skin Cancer MNIST HAM10000/Evaluation/CM"
save_path_precision_recall = "/content/drive/MyDrive/Deep Learning/Skin Cancer MNIST HAM10000/Evaluation/precision_recall"
save_path_classification_report = "/content/drive/MyDrive/Deep Learning/Skin Cancer MNIST HAM10000/Evaluation/classification_report"


# Define the models and their paths
models = {
    'resnet50-V2': '/content/drive/MyDrive/Deep Learning/Skin Cancer MNIST HAM10000/Models/resnet',
    'mobilenet-V2': '/content/drive/MyDrive/Deep Learning/Skin Cancer MNIST HAM10000/Models/mobilenet',
    'efficientnet-V2': '/content/drive/MyDrive/Deep Learning/Skin Cancer MNIST HAM10000/Models/efficientnet'
}

# Loop through the models
for model_name, model_path in models.items():
    # Load the model
    model = tf.keras.models.load_model(model_path)

    # Generate predictions using the test_images
    predictions = model.predict(test_images)
    # Convert predictions to one-hot encoded format
    predicted_labels = np.argmax(predictions, axis=1)
    # Convert true labels to one-hot encoded format
    true_labels = np.argmax(test_labels, axis=1)

    plot_confusion_matrix(model, predictions, predicted_labels, true_labels, save_path_cm)
    plot_roc(model, predictions, predicted_labels, true_labels, save_path_roc)
    plot_precision_recall(model, predictions, predicted_labels, true_labels, save_path_precision_recall)
    plot_classification_report(model, predictions, predicted_labels, true_labels, save_path_classification_report)