In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from glob import glob

from sklearn.model_selection import train_test_split
from keras.preprocessing import image

In [2]:
from tensorflow.keras import layers
from tensorflow import keras

In [3]:
target_size_W = 225
target_size_H = 300

In [25]:
import tensorflow as tf

In [4]:
df = pd.read_csv('HAM10000_metadata.csv')

In [5]:
labels_df = df[['image_id','dx']]

In [6]:
def get_label(id_img):
    label = labels_df[labels_df['image_id'] == id_img]['dx'].iloc[0]
    return label

In [7]:
'''
Actinic keratoses and intraepithelial carcinoma / Bowen's disease (akiec), basal cell carcinoma (bcc),
benign keratosis-like lesions (solar lentigines / seborrheic keratoses and lichen-planus like keratoses, bkl),
dermatofibroma (df),
melanoma (mel), 
melanocytic nevi (nv) ,
vascular lesions (angiomas, angiokeratomas, pyogenic granulomas and hemorrhage, vasc)
'''

"\nActinic keratoses and intraepithelial carcinoma / Bowen's disease (akiec), basal cell carcinoma (bcc),\nbenign keratosis-like lesions (solar lentigines / seborrheic keratoses and lichen-planus like keratoses, bkl),\ndermatofibroma (df),\nmelanoma (mel), \nmelanocytic nevi (nv) ,\nvascular lesions (angiomas, angiokeratomas, pyogenic granulomas and hemorrhage, vasc)\n"

In [8]:
glob('HAM10000_images_part_1\*')[0]

'HAM10000_images_part_1\\ISIC_0024306.jpg'

In [9]:
images = []
labels = []
errors = []

In [10]:
def process_image(url, normalize = 1):
    id_img = url.split('\\')[1].split('.')[0]
    try:
        label = get_label(id_img)
    except:
        errors.append(id_img)
        return (0)
    img = image.load_img(url, target_size=(target_size_W,target_size_H,3))
    img = image.img_to_array(img)
    if normalize == 1:
        img = img/255
    images.append(img)
    labels.append(label)
    return 1

In [11]:
for file in tqdm(glob('HAM10000_images_part_1\*')):
    process_image(file)

100%|██████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:52<00:00, 95.90it/s]


In [12]:
for file in tqdm(glob('HAM10000_images_part_2\*')):
    process_image(file)

100%|██████████████████████████████████████████████████████████████████████████████| 5015/5015 [01:28<00:00, 56.70it/s]


In [13]:
X = np.array(images)

In [14]:
label_df = pd.DataFrame(labels, columns=['tipo'])

In [15]:
y = np.array(pd.get_dummies(label_df, columns=['tipo'],prefix=''))

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.1)

In [17]:
#X.shape

In [18]:
#y.shape

# Model

In [19]:
model = keras.Sequential([
    layers.Conv2D(filters=16, kernel_size=(5, 5), activation="relu", input_shape=(target_size_W,target_size_H,3)),
    layers.Dropout(0.25),
    layers.Conv2D(filters=32, kernel_size=(5, 5), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Dropout(0.25),
    layers.Conv2D(filters=64, kernel_size=(5, 5), activation="relu"),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Dropout(0.25),
    layers.Conv2D(filters=64, kernel_size=(5, 5), activation="relu"),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Dropout(0.25),
    layers.Flatten(),
    layers.Dense(128,activation="relu"),
    layers.Dropout(0.5),
    layers.Dense(64,activation="relu"),
    layers.Dropout(0.5),
    layers.Dense(7,activation="sigmoid")
])


In [20]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 221, 296, 16)      1216      
_________________________________________________________________
dropout (Dropout)            (None, 221, 296, 16)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 217, 292, 32)      12832     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 108, 146, 32)      0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 108, 146, 32)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 104, 142, 64)      51264     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 52, 71, 64)        0

In [21]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [22]:
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test), batch_size=64)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x178c546c588>

# Saving Model

In [23]:
model.save('models/my_model') 

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: models/my_model\assets


# Saving Tflite

In [26]:
converter = tf.lite.TFLiteConverter.from_saved_model('models/my_model')
tflite_model = converter.convert()
open("models/premodel.tflite", "wb").write(tflite_model)


26662320