Importing necessary libraries

In [None]:
import matplotlib.pyplot as plt
from PIL import Image
import seaborn as sns
import numpy as np
import pandas as pd
import os
from tensorflow.keras.utils import to_categorical
from glob import glob

from sklearn.model_selection import train_test_split
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
import tensorflow as tf

from keras.utils import plot_model
from tensorflow.keras.metrics import Recall

from tensorflow.keras import layers
from keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.applications.densenet import DenseNet121
from tensorflow.keras.callbacks import *
from tensorflow.keras import backend as K

import pickle

# The Dataset



HAM10000_metadata.csv file is the main csv file that includes the data of all training images, the features of which are -
1. Lesion_id
2. Image_id
3. Dx
4.  Dx_type
5.  Age
6.  Sex
7.  Localization



In [None]:
import os
os.listdir("/kaggle/input")

# Reading the data from HAM_metadata.csv
df = pd.read_csv('/kaggle/input/ham10000/archive/HAM10000_metadata.csv')

In [None]:
df.head()

In [None]:
df.dtypes

In [None]:
df.describe()

A statistical analysis of numerical attributes of the dataset (age)

### Data Cleaning and Management

Removing NULL values from the dataset

In [None]:
df.isnull().sum()

There are 57 NULL values in 'age' feature.
Rather than dropping the values, replacing NULL values with the mean of 'age'.

In [None]:
df['age'].fillna( df['age'].mean()  , inplace = True)

In [None]:
df.isnull().sum()

All NULL values are now resolved.

Now, Making a lesion type dictionary to map the lesion type to a dataframe column.

In [None]:
lesion_type_dict = {
    'nv': 'Melanocytic nevi',
    'mel': 'Melanoma',
    'bkl': 'Benign keratosis-like lesions ',
    'bcc': 'Basal cell carcinoma',
    'akiec': 'Actinic keratoses',
    'vasc': 'Vascular lesions',
    'df': 'Dermatofibroma'
}

base_skin_dir = '/kaggle/input/ham10000/archive'

# Merge images from both folders into one dictionary

imageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x
                     for x in glob(os.path.join(base_skin_dir, '*', '*.jpg'))}

In [None]:
df['path'] = df['image_id'].map(imageid_path_dict.get)
df['cell_type'] = df['dx'].map(lesion_type_dict.get)
df['cell_type_idx'] = pd.Categorical(df['cell_type']).codes
df.head()

# Image Preprocessing

We have to resize the images from dimensions : 400 * 600 * 3 to dimensions : 150 * 120 * 3 as the model might take a lot of time to run on large dimensions of images.

In [None]:
df['image'] = df['path'].map(lambda x: np.asarray(Image.open(x).resize((224,224))))

Plotting some images from each class of dataset.

In [None]:
n_samples = 5
fig, m_axs = plt.subplots(7, n_samples, figsize = (4*n_samples, 3*7))
for n_axs, (type_name, type_rows) in zip(m_axs, df.sort_values(['cell_type']).groupby('cell_type')):
    n_axs[0].set_title(type_name)
    for c_ax, (_, c_row) in zip(n_axs, type_rows.sample(n_samples, random_state=2018).iterrows()):
        c_ax.imshow(c_row['image'])
        c_ax.axis('off')
fig.savefig('category_samples.png', dpi=300)

In [None]:
# to check the image size distribution - It returns one row that shows all images are uniform
df['image'].map(lambda x: x.shape).value_counts()

In [None]:
features = df.drop(columns = ['cell_type_idx'], axis = 1)
target = df['cell_type_idx']

features.head()

In [None]:
x_train_o, x_test_o, y_train_o, y_test_o = train_test_split(features, target, test_size=0.25,random_state=666)
unique_values = tf.unique(x_train_o.cell_type.values)

In [None]:
x_train = np.asarray(x_train_o['image'].tolist())
x_test = np.asarray(x_test_o['image'].tolist())

x_train_mean = np.mean(x_train)
x_train_std = np.std(x_train)

x_test_mean = np.mean(x_test)
x_test_std = np.std(x_test)

x_train = (x_train - x_train_mean)/x_train_std
x_test = (x_test - x_test_mean)/x_test_std

In [None]:
# Performing one-hot encoding on the labels
y_train = to_categorical(y_train_o, num_classes = 7)
y_test = to_categorical(y_test_o, num_classes = 7)
y_test

In [None]:
x_train, x_validate, y_train, y_validate = train_test_split(x_train, y_train, test_size = 0.1, random_state = 999)

# Reshaping images in 3 dimensions (height = 60, width = 75 , channel = 3)
x_train = x_train.reshape(x_train.shape[0], *(224,224,3))
x_test = x_test.reshape(x_test.shape[0], *(224,224,3))
x_validate = x_validate.reshape(x_validate.shape[0], *(224,224,3))

In [None]:
np.ma.shape(x_train)

In [None]:
np.ma.shape(x_test)

In [None]:
# With data augmentation to prevent overfitting

datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image
        width_shift_range=0.12,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.12,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=True)  # randomly flip images

datagen.fit(x_train)

In [None]:
np.ma.shape(x_train)

In [None]:
learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy',
                                            patience=3,
                                            verbose=1,
                                            factor=0.5,
                                            min_lr=0.00001)

# EfficientNet B1

In [None]:
from tensorflow.keras.applications import EfficientNetB1
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam

# Load the EfficientNetB1 model, without the top (final classification) layer
base_model = EfficientNetB1(weights='imagenet', include_top=False, input_shape=(224, 224, 3))  

# Add custom layers on top of EfficientNetB1
x = base_model.output
x = GlobalAveragePooling2D()(x)  # Global average pooling to reduce feature maps
x = Dense(1024, activation='relu')(x)  # Fully connected layer for additional learning
predictions = Dense(7, activation='softmax')(x)  # 7 classes for classification

# Combine base model and new custom layers
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze base model layers to retain pre-trained weights
for layer in base_model.layers:
    layer.trainable = False

# Compile the model with Adam optimizer
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Set up data generators for training and validation
train_datagen = ImageDataGenerator(
    rescale=1.0/255.0,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
)

# Train model
history = model.fit(x=x_train,
                    y=y_train,
                    epochs=10,
                    batch_size=32,
                    validation_data=(x_validate, y_validate),
                    callbacks=learning_rate_reduction
)

# Optionally, unfreeze some layers of EfficientNetB1 base model for fine-tuning
for layer in base_model.layers[-30:]:  # Unfreeze the last 30 layers
    layer.trainable = True

# Recompile the model after unfreezing layers
model.compile(optimizer=Adam(learning_rate=0.00001), loss='categorical_crossentropy', metrics=['accuracy'])

# Fine-tune model
history_finetune = model.fit(x=x_train,
                             y=y_train,
                             epochs=5,
                             batch_size=32,
                             validation_data=(x_validate, y_validate),
                             callbacks=learning_rate_reduction
)


In [None]:
accuracy = model.evaluate(x_train, y_train, verbose=1)[1]
print("Train accuracy = ", accuracy*100, "%")

In [None]:
accuracy = model.evaluate(x_test, y_test, verbose=1)[1]
print("Test: accuracy = ",accuracy*100,"%")

# EfficientNetB0

In [None]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam

# Load the EfficientNetB0 model without the top classification layer
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Add custom layers on top of EfficientNetB0
x = base_model.output
x = GlobalAveragePooling2D()(x)  # Global average pooling reduces each feature map to a single value
x = Dense(1024, activation='relu')(x)  # Dense layer to introduce trainable parameters
predictions = Dense(7, activation='softmax')(x)  # 7 classes for skin diseases

# Combine base model and new custom layers
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze base model layers to retain pre-trained weights
for layer in base_model.layers:
    layer.trainable = False

# Compile model with Adam optimizer
# instantiating the model in the strategy scope creates the model on the TPU


model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Set up data generators for training and validation
train_datagen = ImageDataGenerator(
    rescale=1.0/255.0,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
)

# Train model
history = model.fit(x=x_train,
                    y=y_train,
                    epochs=10,
                    batch_size=32,
                    validation_data=(x_validate, y_validate),
                    callbacks=learning_rate_reduction)

# Optionally, unfreeze some layers of the EfficientNetB0 base model for fine-tuning
for layer in base_model.layers[-30:]:  # Unfreeze the last 30 layers
    layer.trainable = True

# Recompile model after unfreezing layers

model.compile(optimizer=Adam(learning_rate=0.00001), loss='categorical_crossentropy', metrics=['accuracy'])

# Fine-tune model
history_finetune = model.fit(x=x_train,
                             y=y_train,
                             epochs=5,
                             batch_size=32,
                             validation_data=(x_validate, y_validate),
                             callbacks=learning_rate_reduction)


In [None]:
accuracy = model.evaluate(x_train, y_train, verbose=1)[1]
print("Train accuracy = ", accuracy*100, "%")

In [None]:
accuracy = model.evaluate(x_test, y_test, verbose=1)[1]
print("Test: accuracy = ",accuracy*100,"%")

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

# Get predictions for the test set
y_pred = model.predict(x_test)
y_pred_classes = np.argmax(y_pred, axis=1)  # Convert predictions to class labels
y_true_classes = np.argmax(y_test, axis=1)  # Convert one-hot encoded labels to class labels

# Compute the confusion matrix
cm = confusion_matrix(y_true_classes, y_pred_classes)

# Define the class names based on the dataset (modify according to your specific classes)
class_names = ['Melanocytic nevi', 'Melanoma', 'Benign keratosis-like lesions',
               'Basal cell carcinoma', 'Actinic keratoses', 'Vascular lesions', 'Dermatofibroma']

# Plot the confusion matrix as a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

# Optionally, print a classification report for more detailed metrics
print(classification_report(y_true_classes, y_pred_classes, target_names=class_names))

# InceptionV3

In [None]:
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam

# Load the InceptionV3 model, without the top (final classification) layer
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Add custom layers on top of InceptionV3
x = base_model.output
x = GlobalAveragePooling2D()(x)  # Global average pooling reduces each feature map to a single value
x = Dense(1024, activation='relu')(x)  # Add a dense layer for more learnable parameters
predictions = Dense(7, activation='softmax')(x)  # 7 classes for skin diseases

# Combine base model and new custom layers
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze base model layers to retain pre-trained weights
for layer in base_model.layers:
    layer.trainable = False

# Compile model with Adam optimizer
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Set up data generators for training and validation
train_datagen = ImageDataGenerator(
    rescale=1.0/255.0,  # Rescale pixel values from 0-255 to 0-1
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
)

# Train model
history = model.fit(x=x_train,
                    y=y_train,
                    epochs=10,
                    batch_size=32,
                    validation_data=(x_validate,y_validate),
                    callbacks=learning_rate_reduction
)

# Optionally, unfreeze some layers of the InceptionV3 base model for fine-tuning
for layer in base_model.layers[-30:]:  # Unfreeze the last 30 layers
    layer.trainable = True

# Recompile model after unfreezing layers
model.compile(optimizer=Adam(learning_rate=0.00001), loss='categorical_crossentropy', metrics=['accuracy'])

# Fine-tune model
history_finetune = model.fit(x=x_train,
                             y=y_train,
                             epochs=5,
                             batch_size=32,
                             validation_data=(x_validate,y_validate),
                             callbacks=learning_rate_reduction
)


In [None]:
accuracy = model.evaluate(x_train, y_train, verbose=1)[1]
print("Train accuracy = ", accuracy*100, "%")

In [None]:
accuracy = model.evaluate(x_test, y_test, verbose=1)[1]
print("Test: accuracy = ",accuracy*100,"%")

Saving the model

In [None]:
model.save('inceptionv3_model.h5')

Predicting the class of a sample image. image class = "Benign keratosis-like lesions"

In [None]:
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image

# Load the saved model
model = load_model('/content/inceptionv3_model.h5')

# Define the class indices dictionary
class_indices = {
    0: 'Melanocytic nevi',
    1: 'Melanoma',
    2: 'Benign keratosis-like lesions',
    3: 'Basal cell carcinoma',
    4: 'Actinic keratoses',
    5: 'Vascular lesions',
    6: 'Dermatofibroma'
}

# Load and preprocess the image
def preprocess_image(img_path):
    img = image.load_img(img_path, target_size=(224, 224))  # Target size for InceptionV3
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    img_array = img_array / 255.0  # Rescale pixel values to [0, 1]
    return img_array

# Predict the class of a new image
def predict_image_class(img_path):
    img_array = preprocess_image(img_path)
    predictions = model.predict(img_array)  # Predict the class probabilities
    predicted_class = np.argmax(predictions, axis=1)[0]  # Get the index of highest probability
    class_name = class_indices[predicted_class]  # Map index to class name
    return class_name

# Example usage
img_path = '/content/ISIC_0024313.jpg'
class_name = predict_image_class(img_path)
print(f"Predicted Class: {class_name}")


# MobileNet V2

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Set parameters
input_shape = (224, 224, 3)
num_classes = 7

# Load the MobileNetV2 model with pre-trained ImageNet weights, exclude the top layer
base_model = tf.keras.applications.MobileNetV2(weights='imagenet', include_top=False, input_shape=input_shape)

# Freeze the base model's layers
base_model.trainable = False

# Add custom layers for classification
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.2)(x)  # Adding dropout to reduce overfitting
x = Dense(1024, activation='relu')(x)
x = Dropout(0.2)(x)  # Additional dropout layer
predictions = Dense(num_classes, activation='softmax')(x)  # Final layer for classification

# Create the complete model
model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Data generators for training and validation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Train the model
epochs = 10  # You can adjust the number of epochs
history = model.fit(x=x_train,
                    y=y_train,
                    epochs=10,
                    batch_size=32,
                    validation_data=(x_validate,y_validate),
                    callbacks=learning_rate_reduction
)

# Unfreeze some layers and fine-tune
base_model.trainable = True
for layer in base_model.layers[:100]:  # Freeze the first 100 layers
    layer.trainable = False

# Re-compile and fine-tune
model.compile(optimizer=tf.keras.optimizers.Adam(1e-5), loss='categorical_crossentropy', metrics=['accuracy'])

fine_tune_epochs = 5
history_finetune = model.fit(x=x_train,
                             y=y_train,
                             epochs=5,
                             batch_size=32,
                             validation_data=(x_validate,y_validate),
                             callbacks=learning_rate_reduction
)


In [None]:
accuracy = model.evaluate(x_train, y_train, verbose=1)[1]
print("Train accuracy = ", accuracy*100, "%")

In [None]:
accuracy = model.evaluate(x_test, y_test, verbose=1)[1]
print("Test: accuracy = ",accuracy*100,"%")

Saving the model

In [None]:
model.save('MobileNetV2_model.h5')

# Implementation of CNN

The CNN model : DenseNet 121

Optimizer: SGD

Activation function used: Softmax

In [None]:
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam

# Load the DenseNet121 model without the top layer
base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Add custom layers on top of DenseNet121
x = base_model.output
x = GlobalAveragePooling2D()(x)  # Global average pooling reduces each feature map to a single value
x = Dense(1024, activation='relu')(x)  # Dense layer for more learnable parameters
predictions = Dense(7, activation='softmax')(x)  # 7 classes for skin diseases

# Combine base model and new custom layers
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze base model layers to retain pre-trained weights
for layer in base_model.layers:
    layer.trainable = False

# Compile model with Adam optimizer
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Set up data generators for training and validation
train_datagen = ImageDataGenerator(
    rescale=1.0/255.0,  # Rescale pixel values from 0-255 to 0-1
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
)

# Train the model
history = model.fit(
    x=x_train,
    y=y_train,
    epochs=10,
    batch_size=32,
    validation_data=(x_validate, y_validate),
    callbacks=[learning_rate_reduction]
)

# Optionally, unfreeze some layers of the DenseNet121 base model for fine-tuning
for layer in base_model.layers[-30:]:  # Unfreeze the last 30 layers
    layer.trainable = True

# Recompile model after unfreezing layers
model.compile(optimizer=Adam(learning_rate=0.00001), loss='categorical_crossentropy', metrics=['accuracy'])

# Fine-tune model
history_finetune = model.fit(
    x=x_train,
    y=y_train,
    epochs=5,
    batch_size=32,
    validation_data=(x_validate, y_validate),
    callbacks=[learning_rate_reduction]
)


In [None]:
accuracy = model.evaluate(x_train, y_train, verbose=1)[1]
print("Train accuracy = ", accuracy*100, "%")

In [None]:
accuracy = model.evaluate(x_test, y_test, verbose=1)[1]
print("Test: accuracy = ",accuracy*100,"%")

In [None]:
model.save('DenseNet121_model.h5')