In [2]:
# https://www.kaggle.com/bertramdhooge/mole-detection-becode

# For this project I heavily used some other kaggle projects as help and inspiration, specifically:
# https://www.kaggle.com/code/udayasrimandadapu/cnn-sc
# https://www.kaggle.com/code/rslu2000/skin-cancer-model-97-88-accuracy

# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import matplotlib as mpl
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import os
import cv2
from glob import glob
import seaborn as sns
from PIL import Image
from sklearn.preprocessing import label_binarize
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import itertools
import tensorflow
import tensorflow.keras
from tensorflow.keras.applications import ResNet152, Xception,VGG16,EfficientNetB4
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Activation,Dense, Dropout, Flatten, Conv2D, MaxPool2D,AveragePooling2D,GlobalMaxPooling2D
from tensorflow.keras import backend as K
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras import regularizers
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

#for dirname, _, filenames in os.walk('/kaggle/input'):
 #   for filename in filenames:
  #      print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [4]:
base_skin_dir = os.path.join('..', 'input')
skin_df = pd.read_csv(os.path.join(base_skin_dir,"skin-cancer-mnist-ham10000/",'HAM10000_metadata.csv'))
skin_df.head()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_4 (Conv2D)           (None, 120, 120, 16)      448       
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 60, 60, 16)       0         
 2D)                                                             
                                                                 
 conv2d_5 (Conv2D)           (None, 60, 60, 32)        4640      
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 30, 30, 32)       0         
 2D)                                                             
                                                                 
 conv2d_6 (Conv2D)           (None, 30, 30, 64)        18496     
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 15, 15, 64)      

In [None]:
images = []

for image in skin_df.image_id:
    if f"{image}.jpg" in os.listdir(os.path.join(base_skin_dir, "skin-cancer-mnist-ham10000/", "HAM10000_images_part_1/")):
        images.append(f"../input/skin-cancer-mnist-ham10000/HAM10000_images_part_1/{image}.jpg")
    elif f"{image}.jpg" in os.listdir(os.path.join(base_skin_dir, "skin-cancer-mnist-ham10000/", "HAM10000_images_part_2/")):
        images.append(f"../input/skin-cancer-mnist-ham10000/HAM10000_images_part_2/{image}.jpg")

In [None]:
from skimage.filters import unsharp_mask
skin_df["path"] = images
skin_df["images"] = skin_df["path"].map(lambda x: unsharp_mask(cv2.medianBlur(np.asarray((Image.open(x).resize((120,120)))),5)))                                    

In [None]:
# As seen in https://www.kaggle.com/code/udayasrimandadapu/cnn-sc
lesion_type_dict = {'akiec': 'Actinic keratoses',
                    'bcc': 'Basal cell carcinoma',
                    'bkl': 'Benign keratosis-like lesions ',
                    'df': 'Dermatofibroma',
                    'nv': 'Melanocytic nevi',
                    'mel': 'Melanoma',
                    'vasc': 'Vascular lesions'}

skin_df["cell_type"] = skin_df["dx"].map(lambda x: lesion_type_dict.get(x))
# skin_df["cell_type"] = [lesion_type_dict.get(x) for x in skin_df["dx"]]
skin_df["cell_type_idx"] = pd.Categorical(skin_df["cell_type"]).codes

features=skin_df.drop(columns=['cell_type_idx'],axis=1)
target=skin_df['cell_type_idx']

from tensorflow.keras.utils import to_categorical
x_train_o, x_test_o, y_train_o, y_test_o = train_test_split(features, target, test_size=0.2,random_state=666)
x_train = np.asarray(x_train_o['images'].tolist())
x_test = np.asarray(x_test_o['images'].tolist())
x_train_mean = np.mean(x_train)
x_train_std = np.std(x_train)
x_test_mean = np.mean(x_test)
x_test_std = np.std(x_test)
x_train = (x_train - x_train_mean)/x_train_std
x_test = (x_test - x_test_mean)/x_test_std
y_train = to_categorical(y_train_o, num_classes = 7)
y_test = to_categorical(y_test_o, num_classes = 7)
x_train, x_validate, y_train, y_validate = train_test_split(x_train, y_train, test_size = 0.1, random_state = 999)
print(x_test_o)

In [None]:
#Reshaping the Images into 3 channels (RGB)
x_train = x_train.reshape(x_train.shape[0], *(120, 120, 3))
x_test = x_test.reshape(x_test.shape[0], *(120, 120, 3))
x_validate = x_validate.reshape(x_validate.shape[0], *(120, 120, 3))

In [None]:
input_shape = (120, 120, 3)
num_classes = 7
optimizer = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', patience=6, verbose=1, factor=0.5, min_learning_rate=0.00001)

mcp = tf.keras.callbacks.ModelCheckpoint(monitor='val_accuracy', patience=6, verbose=1,filepath='./model.h5',save_best_only=True,mode='auto')

model = Sequential()
model.add(Conv2D(16, kernel_size = (3,3), input_shape = input_shape, activation = 'relu', padding = 'same'))
model.add(MaxPool2D(pool_size = (2,2)))

model.add(Conv2D(32, kernel_size = (3,3), activation = 'relu', padding = 'same'))
model.add(MaxPool2D(pool_size = (2,2), padding = 'same'))

model.add(Conv2D(64, kernel_size = (3,3), activation = 'relu', padding = 'same'))
model.add(MaxPool2D(pool_size = (2,2), padding = 'same'))
model.add(Conv2D(128, kernel_size = (3,3), activation = 'relu', padding = 'same'))
model.add(MaxPool2D(pool_size = (2,2), padding = 'same'))

model.add(Flatten())
model.add(Dense(64, activation = 'relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss = 'categorical_crossentropy',
             optimizer = optimizer,
              metrics = ['accuracy'])

In [None]:
datagen = ImageDataGenerator(
        featurewise_center=False, 
        samplewise_center=False, 
        featurewise_std_normalization=False,
        samplewise_std_normalization=False,
        zca_whitening=False,
        rotation_range=90,
        zoom_range = 0.1, 
        width_shift_range=0.1,
        height_shift_range=0.1,
        horizontal_flip=True,
        vertical_flip=True,
        shear_range = 10) 
datagen.fit(x_train)

In [None]:
early_stop = EarlyStopping(monitor='val_loss', patience=10, verbose=1, 
                           mode='auto')
                           #, restore_best_weights=True)

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, 
                          verbose=1, mode='auto')

history = model.fit(x_train,
                    y_train,
                    validation_data = (x_validate,y_validate),
                    batch_size = 64,
                    epochs = 100,
                    callbacks = [reduce_lr, early_stop])

In [None]:
model.save("model")