## Skin Cancer Classification baseline model

In [1]:
import os
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import sklearn

### Loading Dataset

In [2]:
os.mkdir("HAM_Dataset")
base = "HAM_Dataset"

os.mkdir(os.path.join(base, "MEL"))
os.mkdir(os.path.join(base, "NV"))
os.mkdir(os.path.join(base, "BCC"))
os.mkdir(os.path.join(base, "AKIEC"))
os.mkdir(os.path.join(base, "BKL"))
os.mkdir(os.path.join(base, "DF"))
os.mkdir(os.path.join(base, "VASC"))

print(os.listdir(base))

['BKL', 'NV', 'AKIEC', 'MEL', 'DF', 'BCC', 'VASC']


In [None]:
for image in os.listdir('ISIC2018_Task3_Training_Input'):
    if "jpg" not in image:
        os.remove('ISIC2018_Task3_Training_Input/'+image)

for image in os.listdir('ISIC2018_Task3_Training_Input'):
    if "jpg" not in image:
        print(image)

In [None]:
mapping = {0:"MEL", 1:"NV", 2:"BCC", 3:"AKIEC", 4:"BKL", 5:"DF", 6:"VASC"}

df_labels = pd.read_csv("../input/isictruth/ISIC2018GroundTruth.csv")
for i in range(len(df_labels)):
    labels = df_labels.iloc[i,1:]
    df_labels.loc[i,"label"] = mapping[list(labels).index(1)]

#df_labels["label"]=df_labels["label"].astype(int)
df_labels.set_index('image', inplace=True)

In [5]:
df_labels['label'].value_counts()

NV       6705
MEL      1113
BKL      1099
BCC       514
AKIEC     327
VASC      142
DF        115
Name: label, dtype: int64


### Computing class weights

In [10]:
from sklearn.utils import class_weight

class_weights = class_weight.compute_class_weight('balanced',
                                                  classes=['AKIEC', 'BCC', 'BKL', 'DF', 'MEL', 'NV', 'VASC'],
                                                  y=df_labels["label"])
class_wt_dict=dict(enumerate(class_weights))
class_wt_dict

{0: 4.375273044997815,
 1: 2.78349082823791,
 2: 1.301832835044846,
 3: 12.440993788819876,
 4: 1.2854575792581184,
 5: 0.21338020666879728,
 6: 10.075452716297788}


In [18]:
from tqdm import tqdm

images = os.listdir('ISIC2018_Task3_Training_Input')

for image in tqdm(images):
    fname=image[:-4]
    label=df_labels.loc[fname, "label"]
    src = os.path.join('ISIC2018_Task3_Training_Input', image)
    dst = os.path.join('HAM_Dataset', label, image)
    shutil.copyfile(src, dst)


100%|██████████| 10015/10015 [00:06<00:00, 1625.40it/s]



### Data Augmentation

In [11]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet_v2 import preprocess_input as base_preprocess

image_gen = ImageDataGenerator(rotation_range=30,
                               width_shift_range=0.1,
                               height_shift_range=0.1,
                               shear_range=0.1,
                               zoom_range=0.2,
                               horizontal_flip=True,
                               fill_mode='nearest',
                               rescale=1/255,
                               validation_split=0.15)

data_dir = 'HAM_Dataset'
batch_size = 128
target_size = (224,224)
train_image_gen = image_gen.flow_from_directory(data_dir, 
                                          target_size=target_size,
                                          color_mode='rgb',
                                          batch_size=batch_size,
                                          class_mode='categorical',
                                          subset="training")

test_image_gen = image_gen.flow_from_directory(data_dir, 
                                          target_size=target_size, 
                                          color_mode='rgb',
                                          batch_size=batch_size,
                                          class_mode='categorical',
                                          shuffle=False,
                                          subset="validation")

print(test_image_gen.class_indices)

Found 8516 images belonging to 7 classes.
 Found 1499 images belonging to 7 classes.
 {'AKIEC': 0, 'BCC': 1, 'BKL': 2, 'DF': 3, 'MEL': 4, 'NV': 5, 'VASC': 6}


### Baseline CNN Model

In [14]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout

model = Sequential()

model.add( Conv2D(filters=64, kernel_size=(3,3), input_shape=(224,224,3), activation="relu") )
model.add( MaxPool2D(pool_size=(2,2)) )

model.add( Conv2D(filters=64, kernel_size=(3,3), activation="relu") )
model.add( MaxPool2D(pool_size=(2,2)) )

model.add( Conv2D(filters=128, kernel_size=(3,3), activation="relu") )
model.add( MaxPool2D(pool_size=(2,2)) )

model.add( Conv2D(filters=256, kernel_size=(3,3), activation="relu") )
model.add( MaxPool2D(pool_size=(2,2)) )

model.add(Flatten())
model.add(Dense(128, activation="relu"))
model.add(Dropout(0.5))

model.add(Dense(7, activation="softmax"))

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.summary()


Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_12 (Conv2D)           (None, 222, 222, 64)      1792      
_________________________________________________________________
max_pooling2d_12 (MaxPooling (None, 111, 111, 64)      0         
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 109, 109, 64)      36928     
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 54, 54, 64)        0         
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 52, 52, 128)       73856     
_________________________________________________________________
max_pooling2d_14 (MaxPooling (None, 26, 26, 128)       0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 24, 24, 256)     

### Training and Validation

In [None]:
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint

lr_reduce = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=1,mode='max', min_lr=0.00001,verbose=1)
early_stop = EarlyStopping(monitor="val_loss", patience=2, verbose=1)
model_chkpt = ModelCheckpoint('best_model_dn121.hdf5',save_best_only=True, monitor='val_accuracy',verbose=1)

callback_list = [model_chkpt,lr_reduce]

In [19]:
history = model.fit(train_image_gen,
                    epochs=15, 
                    validation_data = test_image_gen,
                    class_weight=class_wt_dict,
                    callbacks=callback_list)


Epoch 42/45

Epoch 00001: val_accuracy improved from 0.61107 to 0.67578, saving model to best_model_dn121.hdf5
Epoch 43/45

Epoch 00002: val_accuracy improved from 0.67578 to 0.68512, saving model to best_model_dn121.hdf5
Epoch 44/45

Epoch 00003: val_accuracy improved from 0.68512 to 0.71114, saving model to best_model_dn121.hdf5
Epoch 45/45




### Model Evaluation

In [16]:
import sklearn
from sklearn.metrics import classification_report, confusion_matrix

cm = confusion_matrix(test_labels, predictions)
cm


array([[ 12,  16,   7,   1,   5,   8,   0],
       [  2,  46,   6,   4,   9,  10,   0],
       [  6,  23,  67,   2,  30,  34,   2],
       [  3,   4,   2,   6,   1,   1,   0],
       [  3,   4,  29,   0,  78,  51,   1],
       [  8,  27,  44,   6,  78, 836,   6],
       [  0,   3,   1,   0,   0,   2,  15]])



In [17]:
print(classification_report(test_labels, predictions))


precision    recall  f1-score   support

           0       0.35      0.24      0.29        49
           1       0.37      0.60      0.46        77
           2       0.43      0.41      0.42       164
           3       0.32      0.35      0.33        17
           4       0.39      0.47      0.43       166
           5       0.89      0.83      0.86      1005
           6       0.62      0.71      0.67        21

    accuracy                           0.71      1499
   macro avg       0.48      0.52      0.49      1499
weighted avg       0.73      0.71      0.71      1499

