In [46]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical

In [47]:
ham10000_metadata = pd.read_csv("dataset/HAM10000_metadata.csv")

ham10000_metadata.head(10)

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear
5,HAM_0001466,ISIC_0027850,bkl,histo,75.0,male,ear
6,HAM_0002761,ISIC_0029176,bkl,histo,60.0,male,face
7,HAM_0002761,ISIC_0029068,bkl,histo,60.0,male,face
8,HAM_0005132,ISIC_0025837,bkl,histo,70.0,female,back
9,HAM_0005132,ISIC_0025209,bkl,histo,70.0,female,back


In [48]:
IMAGE_NAME = 1
ILLNESS = 2
DIAGNOSTIC = 3
AGE = 4
SEX = 5
LOCALIZATION = 6

In [49]:
ham10000_metadata = ham10000_metadata.dropna(how='any',axis=0) 


In [50]:
import os

from glob import glob

base_skin_dir = "dataset/HAM10000_images"

imageid_path_dict = {
    os.path.splitext(os.path.basename(x))[0]: x for x in glob(os.path.join(base_skin_dir, "*.jpg"))
}

ham10000_metadata["path"] = ham10000_metadata["image_id"].map(imageid_path_dict.get)

In [51]:
lesion_type_dict = {
    "nv": "Melanocytic nevi",
    "mel": "Melanoma",
    "bkl": "Benign keratosis-like lesions ",
    "bcc": "Basal cell carcinoma",
    "akiec": "Actinic keratoses",
    "vasc": "Vascular lesions",
    "df": "Dermatofibroma"
}

ham10000_metadata["cell_type"] = ham10000_metadata["dx"].map(lesion_type_dict.get)
ham10000_metadata["cell_type_idx"] = pd.Categorical(ham10000_metadata["cell_type"]).codes

In [52]:
ham10000_metadata.head(10)

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,path,cell_type,cell_type_idx
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,dataset/HAM10000_images/ISIC_0027419.jpg,Benign keratosis-like lesions,2
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,dataset/HAM10000_images/ISIC_0025030.jpg,Benign keratosis-like lesions,2
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,dataset/HAM10000_images/ISIC_0026769.jpg,Benign keratosis-like lesions,2
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,dataset/HAM10000_images/ISIC_0025661.jpg,Benign keratosis-like lesions,2
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,dataset/HAM10000_images/ISIC_0031633.jpg,Benign keratosis-like lesions,2
5,HAM_0001466,ISIC_0027850,bkl,histo,75.0,male,ear,dataset/HAM10000_images/ISIC_0027850.jpg,Benign keratosis-like lesions,2
6,HAM_0002761,ISIC_0029176,bkl,histo,60.0,male,face,dataset/HAM10000_images/ISIC_0029176.jpg,Benign keratosis-like lesions,2
7,HAM_0002761,ISIC_0029068,bkl,histo,60.0,male,face,dataset/HAM10000_images/ISIC_0029068.jpg,Benign keratosis-like lesions,2
8,HAM_0005132,ISIC_0025837,bkl,histo,70.0,female,back,dataset/HAM10000_images/ISIC_0025837.jpg,Benign keratosis-like lesions,2
9,HAM_0005132,ISIC_0025209,bkl,histo,70.0,female,back,dataset/HAM10000_images/ISIC_0025209.jpg,Benign keratosis-like lesions,2


In [53]:
from PIL import Image


ham10000_metadata["image"] = ham10000_metadata["path"].map(lambda x: np.asarray(Image.open(x).resize((200,150))))

In [54]:
from sklearn.model_selection import train_test_split

x = ham10000_metadata.drop(columns=["cell_type_idx"],axis=1)
y = ham10000_metadata["cell_type_idx"]

x_train_o, x_test_o, y_train_o, y_test_o = train_test_split(x, y, test_size=0.20, random_state=123)

In [55]:
print("train shape:", x_train_o.shape)
print("test shape:", x_test_o.shape)

print("Distribuição de classes em train:", y_train_o.nunique())
print("Distribuição de classes em test:", y_test_o.nunique())

train shape: (7966, 10)
test shape: (1992, 10)
Distribuição de classes em train: 7
Distribuição de classes em test: 7


In [56]:
x_train = np.asarray(x_train_o["image"].tolist())
x_test = np.asarray(x_test_o["image"].tolist())

y_train = to_categorical(y_train_o, num_classes = y_train_o.nunique())
y_test = to_categorical(y_test_o, num_classes = y_test_o.nunique())

x_train, x_validate, y_train, y_validate = train_test_split(x_train, y_train, test_size=0.1, random_state=123)

x_train = x_train.reshape(x_train.shape[0], *(150, 200, 3))
x_test = x_test.reshape(x_test.shape[0], *(150, 200, 3))
x_validate = x_validate.reshape(x_validate.shape[0], *(150, 200, 3))

In [57]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from keras.callbacks import ReduceLROnPlateau, EarlyStopping

epochs = 100
batch_size = 2032

early_stopping_monitor = EarlyStopping(patience=100,monitor='val_accuracy')
model_checkpoint_callback = ModelCheckpoint(filepath='model.keras',
                                            save_weights_only=False,
                                            monitor='val_accuracy',
                                            mode='auto',
                                            save_best_only=True,
                                            verbose=1)
batch_size = 32
epochs = 100
optimizer = Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-3)
model.compile(optimizer = optimizer, loss = 'categorical_crossentropy', metrics=['accuracy'])

datagen = ImageDataGenerator(zoom_range = 0.2, horizontal_flip=True, shear_range=0.2)

datagen.fit(x_train)

def history(model):
    model.compile(
        optimizer="adam",
        loss="categorical_crossentropy",
        metrics=["accuracy"]
    )
    history = model.fit(
        datagen.flow(x_train, y_train),
        epochs=epochs,
        batch_size=batch_size,
        shuffle=True,
        callbacks=[early_stopping_monitor,model_checkpoint_callback],
        validation_data=(x_validate, y_validate)
    )

    loss, accuracy = model.evaluate(x_test, y_test, verbose=0)
    predictions = model.predict(x_test)
    loss_v, accuracy_v = model.evaluate(x_validate, y_validate, verbose=0)
    loss_t, accuracy_t = model.evaluate(x_train, y_train, verbose=0)

    return (predictions, accuracy_t, accuracy_v, accuracy)

In [58]:
from keras.models import Sequential
from keras.layers import Activation, Dense, Dropout, Flatten, Conv2D, AveragePooling2D
from tensorflow.keras.layers import BatchNormalization

model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation="relu", padding="Same", input_shape=(150, 200, 3)))
model.add(BatchNormalization())

model.add(Conv2D(64, (3, 3), activation="relu", padding="Same"))
model.add(BatchNormalization())
model.add(AveragePooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation="relu", padding="Same"))
model.add(BatchNormalization())

model.add(Conv2D(64, (3, 3), activation="relu", padding="Same"))
model.add(BatchNormalization())
model.add(AveragePooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation="relu", padding="Same"))
model.add(BatchNormalization())
model.add(AveragePooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation="relu", padding="Same"))
model.add(BatchNormalization())

model.add(Conv2D(64, (3, 3), activation="relu", padding="Same"))
model.add(BatchNormalization())
model.add(AveragePooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())

model.add(BatchNormalization())
model.add(Dense(128, activation="relu"))
model.add(Activation("relu"))
model.add(Dropout(0.25))

#Output
model.add(BatchNormalization())
model.add(Dense(y_train_o.nunique(), activation="softmax"))

y_pred, accuracy_t,accuracy_v,accuracy = history(model)
print("Training: accuracy = %f" % (accuracy_t))
print("Validation: accuracy = %f" % (accuracy_v))
print("Test: accuracy = %f" % (accuracy))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100


  self._warn_if_super_not_called()


[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.4962 - loss: 1.6619
Epoch 1: val_accuracy improved from -inf to 0.67629, saving model to model.keras
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m434s[0m 2s/step - accuracy: 0.4966 - loss: 1.6604 - val_accuracy: 0.6763 - val_loss: 1.1884
Epoch 2/100
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.6984 - loss: 0.8949
Epoch 2: val_accuracy did not improve from 0.67629
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m429s[0m 2s/step - accuracy: 0.6984 - loss: 0.8948 - val_accuracy: 0.6324 - val_loss: 1.1134
Epoch 3/100
[1m 59/225[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m5:35[0m 2s/step - accuracy: 0.7085 - loss: 0.8114