In [None]:
import os
import numpy as np
import pandas as pd
from glob import glob
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
from sklearn.preprocessing import label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import itertools
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, callbacks
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import legacy as legacy_optimizers
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from keras.optimizers import Adam
import zipfile
from google.colab import drive
from tensorflow.keras.regularizers import l2

In [None]:
def plot_model_history(model_history):
    fig, axs = plt.subplots(1,2,figsize=(15,5))
    
    axs[0].plot(range(1,len(model_history.history['accuracy'])+1),model_history.history['accuracy'])
    axs[0].plot(range(1,len(model_history.history['val_accuracy'])+1),model_history.history['val_accuracy'])
    axs[0].set_title('Model Accuracy')
    axs[0].set_ylabel('Accuracy')
    axs[0].set_xlabel('Epoch')
    axs[0].set_xticks(np.arange(1, len(model_history.history['accuracy'])+1, len(model_history.history['accuracy'])//10))
    axs[0].legend(['train', 'val'], loc='best')
    
    axs[1].plot(range(1,len(model_history.history['loss'])+1),model_history.history['loss'])
    axs[1].plot(range(1,len(model_history.history['val_loss'])+1),model_history.history['val_loss'])
    axs[1].set_title('Model Loss')
    axs[1].set_ylabel('Loss')
    axs[1].set_xlabel('Epoch')
    axs[1].set_xticks(np.arange(1, len(model_history.history['loss'])+1, len(model_history.history['loss'])//10))
    axs[1].legend(['train', 'val'], loc='best')
    plt.show()

In [None]:
drive.mount('/content/drive')

In [None]:
base_skin_dir = os.path.join('/content/drive/MyDrive', 'skin_disease_dataset')

# Merging images from both datsets in two folders
imageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x
                     for x in glob(os.path.join(base_skin_dir, '*', '*.jpg'))}

lesion_type_dict = {
    'nv': 'Melanocytic nevi',
    'mel': 'Melanoma',
    'bkl': 'Benign keratosis-like lesions ',
    'bcc': 'Basal cell carcinoma',
    'akiec': 'Actinic keratoses',
    'vasc': 'Vascular lesions',
    'df': 'Dermatofibroma'
}

In [None]:
skin_df = pd.read_csv(os.path.join(base_skin_dir, 'HAM10000_metadata.csv'))

skin_df['path'] = skin_df['image_id'].map(imageid_path_dict.get)
skin_df['cell_type'] = skin_df['dx'].map(lesion_type_dict.get) 
skin_df['cell_type_idx'] = pd.Categorical(skin_df['cell_type']).codes

skin_df.head()

In [None]:
skin_df.isnull().sum()

In [None]:
skin_df['age'].fillna((skin_df['age'].mean()), inplace=True)
skin_df.isnull().sum()

In [None]:
print(skin_df.dtypes)

In [None]:
fig, ax1 = plt.subplots(1, 1, figsize= (10, 5))
colors = ['r', 'g', 'b', 'c', 'm', 'y', 'k']
skin_df['cell_type'].value_counts().plot(kind='bar', ax=ax1, color=colors)
ax1.set_xlabel('Cell Type')
ax1.set_ylabel('Count')
ax1.set_title('Distribution of Cell Types')

In [None]:
plt.figure(figsize=(8,6))
skin_df['dx_type'].value_counts().plot(kind='bar', color=['r', 'g', 'b', 'c', 'm'])
plt.xlabel('Diagnosis Type')
plt.ylabel('Count')
plt.title('Distribution of Diagnosis Types')
plt.show()

In [None]:
fig, ax1 = plt.subplots(1, 1, figsize=(10, 5))
sns.countplot(data=skin_df, x='localization', ax=ax1, palette='viridis')
ax1.set_xlabel('Localization')
ax1.set_ylabel('Count')
ax1.set_title('Distribution of Lesion Localizations')
ax1.tick_params(axis='x', rotation=90)
plt.show()

In [None]:
fig, ax1 = plt.subplots(1, 1, figsize=(10, 5))
sns.histplot(data=skin_df, x='age', kde=True, stat='density', ax=ax1, color='purple')
ax1.set_xlabel('Age')
ax1.set_ylabel('Density')
ax1.set_title('Age Distribution')
plt.show()

In [None]:
fig, ax1 = plt.subplots(1, 1, figsize= (10, 5))
sns.countplot(data=skin_df, x='sex', ax=ax1, palette='mako')
ax1.set_xlabel('Sex')
ax1.set_ylabel('Count')
ax1.set_title('Distribution of Patient Gender')
plt.show()

In [None]:
sns.scatterplot(x='age', y='cell_type_idx', hue='cell_type', data=skin_df)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.show()

In [None]:
# Define a function to read and resize images
def read_image(filepath, size):
    return np.asarray(Image.open(filepath).resize(size))

# Use the function to create a new 'image' column in the dataframe
skin_df['image'] = skin_df['path'].apply(lambda x: read_image(x, (100, 75)))

In [None]:
n_samples = 5
fig, m_axs = plt.subplots(7, n_samples, figsize = (4*n_samples, 3*7))
for ax_row, (type_name, type_rows) in zip(m_axs, skin_df.sort_values(['cell_type']).groupby('cell_type')):
    ax_row[0].set_title(type_name)
    for c_ax, (_, c_row) in zip(ax_row, type_rows.sample(n_samples, random_state=1234).iterrows()):
        c_ax.imshow(c_row['image'])
        c_ax.axis('off')
plt.suptitle('Sample Images for Each Cell Type', fontsize=20)
plt.tight_layout()
plt.subplots_adjust(top=0.9)
fig.savefig('category_samples.png', dpi=300)
plt.show()

In [None]:
skin_df['image'].map(lambda x: x.shape).value_counts()

In [None]:
features=skin_df.drop(columns=['cell_type_idx'],axis=1)
target=skin_df['cell_type_idx']

In [None]:
x_train_o, x_test_o, y_train_o, y_test_o = train_test_split(features, target, test_size=0.20,random_state=1234)

In [None]:
# Split data into train and test sets
x_train_o, x_test, y_train_o, y_test = train_test_split(features, target, test_size=0.20, random_state=1234)

# Split train set into train and validation sets
x_train, x_validate, y_train, y_validate = train_test_split(x_train_o, y_train_o, test_size=0.25, random_state=1234)

# Print the shapes of the resulting sets
print("Train set shape: ", x_train.shape, y_train.shape)
print("Validation set shape: ", x_validate.shape, y_validate.shape)
print("Test set shape: ", x_test.shape, y_test.shape)

In [None]:
x_train = np.asarray(x_train_o['image'].tolist())
x_test = np.asarray(x_test_o['image'].tolist())

x_train_mean = np.mean(x_train)
x_train_std = np.std(x_train)

x_test_mean = np.mean(x_test)
x_test_std = np.std(x_test)

x_train = (x_train - x_train_mean)/x_train_std
x_test = (x_test - x_test_mean)/x_test_std

In [None]:
# Perform one-hot encoding on the labels
y_train = to_categorical(y_train_o, num_classes = 7)
y_test = to_categorical(y_test_o, num_classes = 7)

In [None]:
x_train, x_validate, y_train, y_validate = train_test_split(x_train, y_train, test_size = 0.1, random_state = 2)

In [None]:
# Reshape image in 3 dimensions (height = 75px, width = 100px , canal = 3)
x_train = x_train.reshape(x_train.shape[0], *(75, 100, 3))
x_test = x_test.reshape(x_test.shape[0], *(75, 100, 3))
x_validate = x_validate.reshape(x_validate.shape[0], *(75, 100, 3))

In [None]:
input_shape = (75, 100, 3)
num_classes = 7
l2_regularizer = l2(0.001)
batch_size = 16
epochs = 100

In [None]:
model = models.Sequential()

model.add(layers.Conv2D(64, (3, 3), padding='same', input_shape=input_shape, kernel_regularizer=l2_regularizer))
model.add(layers.BatchNormalization())
model.add(layers.Activation('relu'))
model.add(layers.Conv2D(64, (3, 3), padding='same', kernel_regularizer=l2_regularizer))
model.add(layers.BatchNormalization())
model.add(layers.Activation('relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Dropout(0.25))

model.add(layers.Conv2D(128, (3, 3), padding='same', kernel_regularizer=l2_regularizer))
model.add(layers.BatchNormalization())
model.add(layers.Activation('relu'))
model.add(layers.Conv2D(128, (3, 3), padding='same', kernel_regularizer=l2_regularizer))
model.add(layers.BatchNormalization())
model.add(layers.Activation('relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Dropout(0.25))

model.add(layers.Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2_regularizer))
model.add(layers.BatchNormalization())
model.add(layers.Activation('relu'))
model.add(layers.Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2_regularizer))
model.add(layers.BatchNormalization())
model.add(layers.Activation('relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Dropout(0.25))

model.add(layers.Flatten())
model.add(layers.Dense(512, kernel_regularizer=l2_regularizer))
model.add(layers.BatchNormalization())
model.add(layers.Activation('relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(num_classes, activation='softmax'))
model.summary()


In [None]:
optimizer = Adam(lr=0.0001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Compile the model
model.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics=["accuracy"])

In [None]:
# Set a learning rate annealer
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc',
                                            patience=3,
                                            verbose=1,
                                            factor=0.5,
                                            min_lr=0.00001)

In [None]:
# With data augmentation to prevent overfitting
datagen = ImageDataGenerator(
        featurewise_center=False,
        samplewise_center=False,
        featurewise_std_normalization=False,
        samplewise_std_normalization=False,
        zca_whitening=False,
        rotation_range=10,
        zoom_range=0.1,
        width_shift_range=0.1,
        height_shift_range=0.1,
        horizontal_flip=True,
        vertical_flip=True,
        brightness_range=(0.9, 1.1),
        fill_mode='nearest')

datagen.fit(x_train)

In [None]:
# Fit the model
history = model.fit(datagen.flow(x_train, y_train, batch_size=batch_size),
                    epochs=epochs, validation_data=(x_validate, y_validate),
                    verbose=1, steps_per_epoch=x_train.shape[0] 
                    callbacks=[learning_rate_reduction])

In [None]:
loss, accuracy = model.evaluate(x_test, y_test, verbose=1)
loss_v, accuracy_v = model.evaluate(x_validate, y_validate, verbose=1)
print("Validation: accuracy = %f  ;  loss_v = %f" % (accuracy_v, loss_v))
print("Test: accuracy = %f  ;  loss = %f" % (accuracy, loss))
model.save('/content/drive/MyDrive/neww_dr_skin_model.h5')

In [None]:
plot_model_history(history)