In [0]:
#Mount Drive

from google.colab import drive
drive.mount("/content/drive") 

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
#Extract Zip Files

from zipfile import ZipFile
file_name = "/content/drive/My Drive/Ham10000/HAM10000_images_part_1.zip"

with ZipFile(file_name,'r') as zip:
  zip.extractall("/HAM10000_images_part_1")
  print('Done')

Done


In [0]:
from zipfile import ZipFile
file_name = "/content/drive/My Drive/Ham10000/HAM10000_images_part_2.zip"

with ZipFile(file_name,'r') as zip:
  zip.extractall("/HAM10000_images_part_2")
  print('Done')

Done


In [0]:
#Image Preprocessing for HAM10000_images_part_1

import cv2
import glob
import os,sys 

def process(image) :
    src = cv2.imread(image)

    print( src.shape )
    cv2.imshow("original Image" , src )


    # Convert the original image to grayscale
    grayScale = cv2.cvtColor( src, cv2.COLOR_RGB2GRAY )
    
    median = cv2.medianBlur(grayScale, 5)

    return(median)

    ## Get all the png image in the PATH_TO_IMAGES
imgnames = sorted(glob.glob("/base_skin_dir/input/HAM10000_images_part_1/*.jpg"))

for imgname in imgnames:
   
    res = preprocess(imgname)

    cv2.imwrite(imgname, res, [int(cv2.IMWRITE_JPEG_QUALITY), 90])

In [0]:
#Image Preprocessing for HAM10000_images_part_2

def process(image) :
    src = cv2.imread(image)

    print( src.shape )
    cv2.imshow("original Image" , src )


    # Convert the original image to grayscale
    grayScale = cv2.cvtColor( src, cv2.COLOR_RGB2GRAY )
    
    median = cv2.medianBlur(grayScale, 5)

    return(median)

    # Get all the png image in the PATH_TO_IMAGES
imgnames = sorted(glob.glob("/base_skin_dir/input/HAM10000_images_part_2/*.jpg"))

for imgname in imgnames:
   
    res = preprocess(imgname)

    cv2.imwrite(imgname, res, [int(cv2.IMWRITE_JPEG_QUALITY), 90])

In [0]:
#Libraries

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from glob import glob
import seaborn as sns
from PIL import Image
np.random.seed(123)
from sklearn.preprocessing import label_binarize
from sklearn.metrics import confusion_matrix
import itertools

import keras
from keras.utils.np_utils import to_categorical # used for converting labels to one-hot-encoding
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras import backend as K
import itertools
from keras.layers.normalization import BatchNormalization
from keras.utils.np_utils import to_categorical # convert to one-hot-encoding

from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau
from sklearn.model_selection import train_test_split

In [0]:
#Making Dictionary of images and labels

base_skin_dir = os.path.join('/base_skin_dir','input')

imageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x
                     for x in glob(os.path.join(base_skin_dir, '*', '*.jpg'))}

lesion_type_dict = {
    'nv': 'Melanocytic nevi',
    'mel': 'Melanoma',  #cancer
    'bkl': 'Benign keratosis-like lesions ',
    'bcc': 'Basal cell carcinoma',  #cancer
    'akiec': 'Actinic keratoses',   #pre-cancer
    'vasc': 'Vascular lesions',
    'df': 'Dermatofibroma'
}

In [0]:
#Reading & Processing data

skin_df = pd.read_csv(os.path.join(base_skin_dir, '/base_skin_dir/HAM10000_metadata.csv'))

# We have read the csv by adding the path of the image file
skin_df['path'] = skin_df['image_id'].map(imageid_path_dict.get)
skin_df['cell_type'] = skin_df['dx'].map(lesion_type_dict.get) 

# We have categorized the lesion type into codes from 0 to 6
skin_df['cell_type_idx'] = pd.Categorical(skin_df['cell_type']).codes

# head() function shows first 5 rows.
skin_df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,path,cell_type,cell_type_idx
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,/base_skin_dir/input/HAM10000_images_part_1/IS...,Benign keratosis-like lesions,2
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,/base_skin_dir/input/HAM10000_images_part_1/IS...,Benign keratosis-like lesions,2
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,/base_skin_dir/input/HAM10000_images_part_1/IS...,Benign keratosis-like lesions,2
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,/base_skin_dir/input/HAM10000_images_part_1/IS...,Benign keratosis-like lesions,2
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,/base_skin_dir/input/HAM10000_images_part_2/IS...,Benign keratosis-like lesions,2


In [0]:
# Resizing of images
#The original dimension of images are 450 x 600 x3 which TensorFlow can't handle, so that’s why we resize it into 100 x 75

skin_df['image'] = skin_df['path'].map(lambda x: np.asarray(Image.open(x).resize((100,75))))


In [0]:
skin_df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,path,cell_type,cell_type_idx,image
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,/base_skin_dir/input/HAM10000_images_part_1/IS...,Benign keratosis-like lesions,2,"[[[191, 152, 195], [192, 154, 195], [191, 153,..."
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,/base_skin_dir/input/HAM10000_images_part_1/IS...,Benign keratosis-like lesions,2,"[[[23, 13, 23], [24, 14, 24], [25, 14, 29], [3..."
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,/base_skin_dir/input/HAM10000_images_part_1/IS...,Benign keratosis-like lesions,2,"[[[185, 127, 137], [189, 133, 147], [193, 136,..."
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,/base_skin_dir/input/HAM10000_images_part_1/IS...,Benign keratosis-like lesions,2,"[[[23, 11, 17], [26, 13, 22], [38, 21, 32], [5..."
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,/base_skin_dir/input/HAM10000_images_part_2/IS...,Benign keratosis-like lesions,2,"[[[137, 92, 115], [148, 102, 125], [159, 115, ..."


In [0]:
#We have specified features(inputs) columns without the cell_type_idx column. The cell_type_idx column is the column we want to predict. It will be output.

features=skin_df.drop(columns=['cell_type_idx'],axis=1)

target=skin_df['cell_type_idx']

In [0]:
#We have split the dataset into training and testing set in the ratio of 80:20.

x_train_o, x_test_o, y_train_o, y_test_o = train_test_split(features, target, test_size=0.20,random_state=123)


In [0]:
#Normalization with z-score normalization.

x_train = np.asarray(x_train_o['image'].tolist())
x_test = np.asarray(x_test_o['image'].tolist())

x_train_mean = np.mean(x_train)
x_train_std = np.std(x_train)

x_test_mean = np.mean(x_test)
x_test_std = np.std(x_test)

x_train = (x_train - x_train_mean)/x_train_std
x_test = (x_test - x_test_mean)/x_test_std

In [0]:
# Perform one-hot encoding on the labels

# We will encode labels which are 7 different classes of skin cancer types from 0 to 6
y_train = to_categorical(y_train_o, num_classes = 7)
y_test = to_categorical(y_test_o, num_classes = 7)

In [0]:
# We split the train data set as validation set and train set in the ratio of 90:10.
# Validation set is used to prevent overfitting
x_train, x_validate, y_train, y_validate = train_test_split(x_train, y_train, test_size = 0.1, random_state = 2)


In [0]:
# Reshape image in 3 dimensions (height = 75px, width = 100px , canal = 3)
x_train = x_train.reshape(x_train.shape[0], *(75, 100, 3))
x_test = x_test.reshape(x_test.shape[0], *(75, 100, 3))
x_validate = x_validate.reshape(x_validate.shape[0], *(75, 100, 3))

In [0]:
#Model Building

input_shape = (75, 100, 3)
num_classes = 7

model = Sequential()
#Input Layer
#Convolutional Layer
model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',padding = 'Same',input_shape=input_shape))
model.add(Conv2D(32,kernel_size=(3, 3), activation='relu',padding = 'Same',))
#Pooling Layer
model.add(MaxPool2D(pool_size = (2, 2)))
#Dropout Layer
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation='relu',padding = 'Same'))
model.add(Conv2D(64, (3, 3), activation='relu',padding = 'Same'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.40))

#Flattening Layer
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_13 (Conv2D)           (None, 75, 100, 32)       896       
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 75, 100, 32)       9248      
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 37, 50, 32)        0         
_________________________________________________________________
dropout_10 (Dropout)         (None, 37, 50, 32)        0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 37, 50, 64)        18496     
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 37, 50, 64)        36928     
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 18, 25, 64)       

In [0]:
# Define the optimizer
#Optimizer=rate of train, 
optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)

# Compile the model
#categorical_crossentropy for classification
#accuracy is used to evaluate the performance our model
model.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics=["accuracy"])

# Set a learning rate annealer
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

In [0]:
# With data augmentation to prevent overfitting 

datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)  # randomly flip images

datagen.fit(x_train)

In [0]:
# Fitting the model
#small batch size is more efficiently for training

epochs = 50 
batch_size = 10
history = model.fit_generator(datagen.flow(x_train,y_train, batch_size=batch_size),
                              epochs = epochs, validation_data = (x_validate,y_validate),
                              verbose = 1, steps_per_epoch=x_train.shape[0] // batch_size
                              , callbacks=[learning_rate_reduction])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [0]:
loss, accuracy = model.evaluate(x_test, y_test, verbose=1)
loss_v, accuracy_v = model.evaluate(x_validate, y_validate, verbose=1)
print("Validation: accuracy = %f  ;  loss_v = %f" % (accuracy_v, loss_v))
print("Test: accuracy = %f  ;  loss = %f" % (accuracy, loss))
model.save("appDermisModel.h5")

Validation: accuracy = 0.773067  ;  loss_v = 0.638849
Test: accuracy = 0.762356  ;  loss = 0.638496
