In [None]:
from numpy.random import seed
seed(101)
import tensorflow as tf
tf.random.set_seed(101) 

import pandas as pd
import numpy as npy

import tensorflow
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

import os as op

from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
import itertools
import shutil
import matplotlib.pyplot as plots
%matplotlib inline
import time

In [None]:
op.listdir(r'C:\Users\Downloads\skin_cancer\archive')

In [None]:
root_directory = 'root_directory'
op.mkdir(root_directory)

training_directory = op.path.join(root_directory, 'training_directory')
op.mkdir(training_directory)


validation_directory = op.path.join(root_directory, 'validation_directory')
op.mkdir(validation_directory)


Melanocytic_nevi = op.path.join(training_directory, 'nv')
op.mkdir(Melanocytic_nevi)
Melanoma = op.path.join(training_directory, 'mel')
op.mkdir(Melanoma)
Benign_Keratosis = op.path.join(training_directory, 'bkl')
op.mkdir(Benign_Keratosis)
Basal_cell_carcinoma = op.path.join(training_directory, 'bcc')
op.mkdir(Basal_cell_carcinoma)
Actinic_Keratoses = op.path.join(training_directory, 'akiec')
op.mkdir(Actinic_Keratoses)
Vascular_skin_lesions = op.path.join(training_directory, 'vasc')
op.mkdir(Vascular_skin_lesions)
skn_dat = op.path.join(training_directory, 'df')
op.mkdir(skn_dat)

Melanocytic_nevi = op.path.join(validation_directory, 'nv')
op.mkdir(Melanocytic_nevi)
Melanoma = op.path.join(validation_directory, 'mel')
op.mkdir(Melanoma)
Benign_Keratosis = op.path.join(validation_directory, 'bkl')
op.mkdir(Benign_Keratosis)
Basal_cell_carcinoma = op.path.join(validation_directory, 'bcc')
op.mkdir(Basal_cell_carcinoma)
Actinic_Keratoses = op.path.join(validation_directory, 'akiec')
op.mkdir(Actinic_Keratoses)
Vascular_skin_lesions = op.path.join(validation_directory, 'vasc')
op.mkdir(Vascular_skin_lesions)
skn_dat = op.path.join(validation_directory, 'df')
op.mkdir(skn_dat)



In [None]:
skn_org = pd.read_csv('HAM10000_metadata.csv')

skn_org.head()

In [None]:
skn_dat = skn_org.groupby('lesion_id').count()

skn_dat = skn_dat[skn_dat['image_id'] == 1]

skn_dat.reset_index(inplace=True)

skn_dat.head()

In [None]:
skn_org['is_duplicate'] = skn_org['lesion_id'].duplicated(keep=False)\
.map({True: 'duplicated', False: 'not duplicated'})

In [None]:
skn_org.head()

In [None]:
skn_org.shape

In [None]:
skn_org['is_duplicate'].value_counts()

In [None]:
skn_dat = skn_org[skn_org['is_duplicate'] == 'not duplicated']

skn_dat.shape

In [None]:
y = skn_dat['dx']

_, valid_skn_dat = train_test_split(skn_dat, test_size=0.18, random_state= 11)


In [None]:
valid_skn_dat['dx'].value_counts()

In [None]:
def identify_val_rows(s):
    truncate_l = list(valid_skn_dat['image_id'])
    
    if str(s) in truncate_l:
        return 'val'
    else:
        return 'trn'

skn_org['validation_or_train'] = skn_org['image_id']
skn_org['validation_or_train'] = skn_org['validation_or_train'].apply(identify_val_rows)
   
skn_dat_train = skn_org[skn_org['validation_or_train'] == 'trn']


print("No. of imgs for training",len(skn_dat_train))
print("No. of imgs for validation",len(valid_skn_dat))

In [None]:
skn_dat_train['dx'].value_counts()

In [None]:
valid_skn_dat['dx'].value_counts()

In [None]:
skn_org.set_index('image_id', inplace=True)

In [None]:
part_1 = op.listdir(r'C:\Users\Downloads\skin_cancer\archive\HAM10000_images_part_1')
part_2 = op.listdir(r'C:\Users\Downloads\skin_cancer\archive\HAM10000_images_part_2')

images_training = list(skn_dat_train['image_id'])
images_validation = list(valid_skn_dat['image_id'])


for im in images_training:
    
    f_name = im + '.jpg'
    lbl = skn_org.loc[im,'dx']
    
    if f_name in part_1:
        srrc = op.path.join(r'C:\Users\Downloads\skin_cancer\archive\HAM10000_images_part_1', f_name)
        dest = op.path.join(training_directory, lbl, f_name)
        shutil.copyfile(srrc, dest)

    if f_name in part_2:
        srrc = op.path.join(r'C:\Users\Downloads\skin_cancer\archive\HAM10000_images_part_2', f_name)
        dest = op.path.join(training_directory, lbl, f_name)
        shutil.copyfile(srrc, dest)


for im in images_validation:
    
    f_name = im + '.jpg'
    lbl = skn_org.loc[im,'dx']
    
    if f_name in part_1:
        srrc = op.path.join(r'C:\Users\Downloads\skin_cancer\archive\HAM10000_images_part_1', f_name)
        dest = op.path.join(validation_directory, lbl, f_name)
        shutil.copyfile(srrc, dest)

    if f_name in part_2:
        srrc = op.path.join(r'C:\Users\\Downloads\skin_cancer\archive\HAM10000_images_part_2', f_name)
        dest = op.path.join(validation_directory, lbl, f_name)
        shutil.copyfile(srrc, dest)
        

In [None]:
print("Melanocytic Nevi ",len(op.listdir('root_directory/training_directory/nv')))
print("Melanoma ",len(op.listdir('root_directory/training_directory/mel')))
print("Benign Keratosis",len(op.listdir('root_directory/training_directory/bkl')))
print("Basal Cell Carcinoma ",len(op.listdir('root_directory/training_directory/bcc')))
print("Actinic Keratosis ",len(op.listdir('root_directory/training_directory/akiec')))
print("Vascular Lesion ",len(op.listdir('root_directory/training_directory/vasc')))
print("Dermatofibroma",len(op.listdir('root_directory/training_directory/df')))

In [None]:
print("Melanocytic Nevi ",len(op.listdir('root_directory/validation_directory/nv')))
print("Melanoma ",len(op.listdir('root_directory/validation_directory/mel')))
print("Benign Keratosis",len(op.listdir('root_directory/validation_directory/bkl')))
print("Basal Cell Carcinoma ",len(op.listdir('root_directory/validation_directory/bcc')))
print("Actinic Keratosis ",len(op.listdir('root_directory/validation_directory/akiec')))
print("Vascular Lesion ",len(op.listdir('root_directory/validation_directory/vasc')))
print("Dermatofibroma",len(op.listdir('root_directory/validation_directory/df')))

In [None]:
classes = ['mel','bkl','bcc','akiec','vasc','df']

for name in classes:
    augment_directory = 'augment_directory'
    op.mkdir(augment_directory)
    image_directory = op.path.join(augment_directory, 'image_dir')
    op.mkdir(image_directory)

    img_cls = name

    images_list = op.listdir('root_directory/training_directory/' + img_cls)

    for f_name in images_list:
            srrc = op.path.join('root_directory/training_directory/' + img_cls, f_name)
            dst = op.path.join(image_directory, f_name)
            shutil.copyfile(srrc, dst)


    augment_path = augment_directory
    savefile_path = 'root_directory/training_directory/' + img_cls

    datagenerator = ImageDataGenerator(
        width_shift_range=0.3,
        zoom_range=0.3,
        rotation_range=181,
        vertical_flip=True,
        horizontal_flip=True,
        fill_mode='nearest',
        height_shift_range=0.3
        )

    batchsize = 50
    image_size = 224

    augment_data_generator= datagenerator.flow_from_directory(augment_path,
                                           save_to_dir=savefile_path,
                                           save_format='jpg',
                                           target_size=(image_size,image_size),
                                           batch_size=batchsize)

    
    number_of_augmented_images = 6000 
    number_of_files = len(op.listdir(image_directory))
    number_of_batches = int(npy.ceil((number_of_augmented_images - number_of_files)/batchsize))

    for l in range(0,number_of_batches):

        images, labels = next(augment_data_generator)
        
    shutil.rmtree('augment_directory')

In [None]:
print("Number of images per class after data augmentation")
print("Melanocytic Nevi ",len(op.listdir('root_directory/training_directory/nv')))
print("Melanoma ",len(op.listdir('root_directory/training_directory/mel')))
print("Benign Keratosis",len(op.listdir('root_directory/training_directory/bkl')))
print("Basal Cell Carcinoma ",len(op.listdir('root_directory/training_directory/bcc')))
print("Actinic Keratosis ",len(op.listdir('root_directory/training_directory/akiec')))
print("Vascular Lesion ",len(op.listdir('root_directory/training_directory/vasc')))
print("Dermatofibroma",len(op.listdir('root_directory/training_directory/df')))

In [None]:
print("Melanocytic Nevi ",len(op.listdir('root_directory/validation_directory/nv')))
print("Melanoma ",len(op.listdir('root_directory/validation_directory/mel')))
print("Benign Keratosis",len(op.listdir('root_directory/validation_directory/bkl')))
print("Basal Cell Carcinoma ",len(op.listdir('root_directory/validation_directory/bcc')))
print("Actinic Keratosis ",len(op.listdir('root_directory/validation_directory/akiec')))
print("Vascular Lesion ",len(op.listdir('root_directory/validation_directory/vasc')))
print("Dermatofibroma",len(op.listdir('root_directory/validation_directory/df')))

In [None]:
import numpy as np
import matplotlib.pyplot as plot

def skin_square(images, figure_size=(18, 9), row_count=6, interp=False):
    if isinstance(images[0], npy.ndarray):
        images = npy.array(images).astype(npy.uint8)
        if images.shape[-1] != 3:
            images = images.transpose((0, 2, 3, 1))
    
    fig = plot.figure(figsize=figure_size)
    col_count = (len(images) - 1) // row_count + 1
    
    for i in range(len(images)):
        space = fig.add_subplot(row_count, col_count, i+1)
        space.axis('off')
        plot.imshow(images[i], interpolation=None if interp else 'none')
        
    plot.show()


skin_square(images)

In [None]:
train_imgs_path = 'root_directory/training_directory'
validation_imgs_path = 'root_directory/validation_directory'

total_train_images = len(skn_dat_train)
total_val_images = len(valid_skn_dat)
t_batchsize = 10
v_batchsize = 10
img_size = 224

steps_for_train = npy.ceil(total_train_images / t_batchsize)
steps_for_val = npy.ceil(total_val_images / v_batchsize)


In [None]:
datagenerator = ImageDataGenerator(preprocessing_function= tf.keras.applications.efficientnet.preprocess_input)

btch_trn = datagenerator.flow_from_directory(train_imgs_path,
                                            target_size=(img_size,img_size),
                                            batch_size=t_batchsize)

btch_val = datagenerator.flow_from_directory(validation_imgs_path,
                                            target_size=(img_size,img_size),
                                            batch_size=v_batchsize)

tst_btch = datagenerator.flow_from_directory(validation_imgs_path,
                                            target_size=(img_size,img_size),
                                            batch_size=1,
                                            shuffle=False)

In [None]:
from tensorflow.keras.metrics import categorical_accuracy, top_k_categorical_accuracy

def best3_acc(y_actual, y_predict_value):
    return top_k_categorical_accuracy(y_actual, y_predict_value, k=3)

def best2_acc(y_actual, y_predict_value):
    return top_k_categorical_accuracy(y_actual, y_predict_value, k=2)

In [None]:
height=224  # image height
width=224   # image width
channels=3
img_shape=(height, width, channels)

In [None]:
model_name='EfficientNetB4'
root_model=tf.keras.applications.EfficientNetB4(include_top=False, weights="imagenet",input_shape=img_shape, pooling='max') 
z=root_model.output
z=tensorflow.keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001 )(z)
z = Dense(256, kernel_regularizer = tf.keras.regularizers.l2(l = 0.016),activity_regularizer=tf.keras.regularizers.l1(0.006),
                bias_regularizer=tf.keras.regularizers.l1(0.006) ,activation='relu')(z)
z=Dropout(rate=.45, seed=123)(z)        
output_layer=Dense(7, activation='softmax')(z)
mdl=Model(inputs=root_model.input, outputs=output_layer)
mdl.compile(tf.keras.optimizers.Adamax(learning_rate=.001), loss='categorical_crossentropy', 
              metrics=[categorical_accuracy, best2_acc, best3_acc]) 

In [None]:
mdl.summary()

In [None]:
weights={
    0: 1.0, 
    1: 1.0, 
    2: 1.0, 
    3: 1.0, 
    4: 2.0, 
    5: 1.0, 
    6: 1.0, 
}

In [None]:
#pip install --user tensorflow==2.9.1

In [None]:
print(tf. __version__) 

In [None]:
file_path = "modeltensorefficentb4.h5"
ckpt = ModelCheckpoint(file_path, monitor='val_best3_acc',  
                             save_best_only=True, mode='max')

lr_scheduler = ReduceLROnPlateau(monitor='val_best3_acc', factor=0.5, patience=4, 
                                   mode='max')
                              
                              
callbacks = [ckpt, lr_scheduler]

model_cycle = mdl.fit(btch_trn, steps_per_epoch=128, 
                              class_weight=weights,
                    validation_data=btch_val,
                    validation_steps=steps_for_val,
                    epochs=50, verbose=1,
                   callbacks=callbacks)

In [None]:
mdl.metrics_names

In [None]:
validation_loss, validation_cat_accuracy, validation_top2_accuracy, validation_top3_accuracy = \
mdl.evaluate_generator(tst_btch, 
                        steps=len(valid_skn_dat))

print('The validation loss:', validation_loss)
print('The validation category accuracy:', validation_cat_accuracy)
print('The top two accuarcy:', validation_top2_accuracy)
print('The top three validation accuarcy', validation_top3_accuracy)

In [None]:
mdl.load_weights('modeltensorefficentb4.h5')

validation_loss, validation_cat_accuracy, validation_top2_accuracy, validation_top3_accuracy = \
mdl.evaluate_generator(tst_btch, 
                        steps=len(valid_skn_dat))

print('The validation loss:', validation_loss)
print('The validation category accuracy:', validation_cat_accuracy)
print('The top two accuarcy:', validation_top2_accuracy)
print('The top three validation accuarcy', validation_top3_accuracy)

In [None]:
import matplotlib.pyplot as plots

metrics = {
    'loss': ['Train loss', 'Validation loss'],
    'categorical_accuracy': ['Train catagorical accuracy', 'Validation catagorical accuracy'],
    'best2_acc': ['Train top 2 accuracy', 'Validation top 2 accuracy'],
    'best3_acc': ['Train top 3 accuracy', 'Validation top 3 accuracy']
}

fig, axes = plots.subplots(len(metrics), 1, figsize=(8, 6 * len(metrics)))

for i, (metric, titles) in enumerate(metrics.items()):
    train_metric = model_cycle.model_cycle[metric]
    validation_metric = model_cycle.model_cycle['val_' + metric]

    ax = axes[i]
    ax.plot(train_metric, 'bo', label=titles[0])
    ax.plot(val_metric, 'b', label=titles[1])
    ax.set_title(titles[0])
    ax.legend()

plots.tight_layout()
plots.show()

### Create a Confusion Matrix

In [None]:
test_labels = tst_btch.classes

In [None]:
test_labels

In [None]:
tst_btch.class_indices

In [None]:
predictions = model.predict(tst_btch, steps=len(valid_skn_dat), verbose=1)

In [None]:
predictions.shape

In [None]:
def cfsn_matrix(matrix, labels, name='Confusion_matrix', color_map='Blues'):
    
    print('Confusion matrix')

    plots.imshow(matrix, cmap=color_map, aspect='auto')
    plots.title(name)
    plots.colorbar()
    markers = npy.arange(len(labels))
    plots.xticks(markers, labels, rotation=45, ha='right')
    plots.yticks(markers, labels)

    form = 'd'
    threshold = matrix.max() / 2.
    for x, y in npy.ndindex(matrix.shape):
        colour = "white" if matrix[x, y] > threshold else "black"
        plots.text(y, x, format(matrix[x, y], form), ha='center', va='center', color=colour)

    plots.xlabel('Predicted label')
    plots.ylabel('True label')
    plots.tight_layout()
    plots.show()



In [None]:
test_labels.shape

In [None]:
confusion_matrix = confusion_matrix(test_labels, predictions.argmax(axis=1))

In [None]:
tst_btch.class_indices

In [None]:
confusion_matrix_labels = ['akiec', 'bcc', 'bkl', 'df', 'mel','nv', 'vasc']

cfsn_matrix(confusion_matrix, confusion_matrix_labels, name='Confusion Matrix')

In [None]:
y_predict_value = npy.argmax(predictions, axis=1)

y_actual = tst_btch.classes

In [None]:
from sklearn.metrics import classification_report

clf_rep = classification_report(y_actual, y_predict_value, target_names=confusion_matrix_labels)

print(clf_rep)

In [None]:
model_name='resnet101'
base_model=tf.keras.applications.resnet.ResNet101(include_top=False, weights="imagenet",input_shape=img_shape, pooling='max') 
zz=base_model.output
zz=tensorflow.keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001 )(zz)
zz = Dense(256, kernel_regularizer = tf.keras.regularizers.l2(l = 0.016),activity_regularizer=tf.keras.regularizers.l1(0.006),
                bias_regularizer=tf.keras.regularizers.l1(0.006) ,activation='relu')(zz)
zz=Dropout(rate=.45, seed=123)(zz)        
output=Dense(7, activation='softmax')(zz)
mdl=Model(inputs=base_model.input, outputs=output)
mdl.compile(tf.keras.optimizers.Adamax(learning_rate=.001), loss='categorical_crossentropy', 
              metrics=[categorical_accuracy, top_2_accuracy, top_3_accuracy]) 

In [None]:
mdl.summary()

In [None]:
file_path = "resnet101.h5"
ckpt = ModelCheckpoint(file_path, monitor='val_best3_acc', verbose=1, 
                             save_best_only=True, mode='max')

lr_scheduler = ReduceLROnPlateau(monitor='val_best3_acc', factor=0.5, patience=4, 
                                   verbose=1, mode='max')
                              
                              
callbacks = [ckpt, lr_scheduler]

model_cycle = mdl.fit(btch_trn, steps_per_epoch=128, 
                              class_weight=weights,
                    validation_data=btch_val,
                    validation_steps=val_steps,
                    epochs=1, verbose=1,
                   callbacks=callbacks)

In [None]:
mdl.load_weights('resnet101.h5')

validation_loss, validation_cat_accuracy, validation_top2_accuracy, validation_top3_accuracy = \
mdl.evaluate_generator(tst_btch, 
                        steps=len(valid_skn_dat))

print('The validation loss:', validation_loss)
print('The validation category accuracy:', validation_cat_accuracy)
print('The top two accuarcy:', validation_top2_accuracy)
print('The top three validation accuarcy', validation_top3_accuracy)