In [1]:
import os

import tensorflow as tf
from tensorflow.keras.metrics import categorical_accuracy, top_k_categorical_accuracy

from PIL import Image
import pandas as pd

import numpy as np
import sys

print(tf.__version__)

2.9.1


In [2]:
# modelName = input("Model Name: ")

# Import metadata for age, sex, and localization stats
database = pd.read_csv(r"C:\Users\Aiden\Desktop\Disease Recognition\dataverse_files\HAM10000_metadata.csv")

# Get all the unique values for localization
values = list(set(database.localization.values))
localizations = {values[i]: i for i in range(len(values))}

sex = {"female": 1, "male": 2, "unknown": 0}

metaData_x = [[int(database.age.values[i] if not pd.isna(database.age.values[i]) else -1),
               sex[database.sex.values[i]],
               localizations[database.localization.values[i]],
               database.image_id.values[i]

               ] for i in range(len(database.age.values))
              ]

# Numerical ids for output diseases
values = list(set(database.dx.values))
values.sort()
diseases = {values[i]: i for i in range(len(values))}
invDiseases = {v: k for k, v in diseases.items()}
print(diseases)
dataset_y_Sparse = [diseases[database.dx.values[i]] for i in range(len(database.dx.values))]

# One hot encode the output
dataset_y = tf.keras.utils.to_categorical(dataset_y_Sparse, len(diseases))

{'akiec': 0, 'bcc': 1, 'bkl': 2, 'df': 3, 'mel': 4, 'nv': 5, 'vasc': 6}


In [3]:
# Load all images in the dataset
print("\nLoading Images")
dataset_x = []
dataset_yS = []
metaDat_x = []
metaDataCount = 0
totalImages = sum([len(files) for r, d, files in
                   os.walk(r"C:\Users\Aiden\Desktop\Disease Recognition\dataverse_files\HAM10000_categories")])
for i in range(7):
    for j in os.listdir(r"C:\Users\Aiden\Desktop\Disease Recognition\dataverse_files\HAM10000_categories\{}".format(
            invDiseases[i])):
        sys.stdout.write("\rLoading image {} of {} of category {}. Total Completion: {}%".format(
            os.listdir(r"C:\Users\Aiden\Desktop\Disease Recognition\dataverse_files\HAM10000_categories\{}".format(
                invDiseases[i])).index(j),
            len(os.listdir(r"C:\Users\Aiden\Desktop\Disease Recognition\dataverse_files\HAM10000_categories\{}".format(
                invDiseases[i]))),
            invDiseases[i],
            round((metaDataCount / totalImages) * 100, 2))
        )
        img = Image.open(r"C:\Users\Aiden\Desktop\Disease Recognition\dataverse_files\HAM10000_categories\{}".format(
            invDiseases[i]) + "\\" + j)
        img = img.resize((50, 50))

        # Normalize image
        img = np.array(img)
        img = img / 255.0
        dataset_x.append(img)
        ind = 0
        for l in metaData_x:
            if l[3] in j:
                metaDat_x.append(l[:3])
                ind = metaData_x.index(l)
                metaDataCount += 1
                break
        dataset_yS.append(dataset_y[ind])

dataset_y = dataset_yS

metaData_x = metaDat_x

if len(dataset_x) == len(dataset_y) == len(metaData_x) == metaDataCount:
    print("\n\n~------ CHECK PASSED ------~\n\n")
else:
    print("ERROR: Data not equal in length. Terminating due to data corruption.")
    exit(0)


Loading Images
Loading image 141 of 142 of category vasc. Total Completion: 99.99%%

~------ CHECK PASSED ------~




In [4]:
# Randomize Data
randomize = np.arange(len(metaData_x))
np.random.shuffle(randomize)
metaData_x = np.array(metaData_x)[randomize]
dataset_x = np.array(dataset_x)[randomize]
dataset_y = np.array(dataset_y)[randomize]

In [5]:
# Split data into training and testing
split = int(len(metaData_x) * 0.9)
train_x = dataset_x[:split]
train_y = dataset_y[:split]
train_meta = metaData_x[:split]

test_x = dataset_x[split:]
test_y = dataset_y[split:]
test_meta = metaData_x[split:]

In [12]:
# Model
inputConv = tf.keras.layers.Input(shape=(50, 50, 3), name="img")
xConv = tf.keras.layers.Conv2D(256, (3, 3), activation='relu')(inputConv)
xConv = tf.keras.layers.Dropout(0.2)(xConv)
xConv = tf.keras.layers.MaxPooling2D((2, 2))(xConv)
xConv = tf.keras.layers.BatchNormalization()(xConv)
xConv = tf.keras.layers.Conv2D(512, (3, 3), activation='relu')(xConv)
xConv = tf.keras.layers.Dropout(0.2)(xConv)
xConv = tf.keras.layers.MaxPooling2D((2, 2))(xConv)
xConv = tf.keras.layers.BatchNormalization()(xConv)
xConv = tf.keras.layers.Conv2D(1024, (3, 3), activation='relu')(xConv)
xConv = tf.keras.layers.Dropout(0.2)(xConv)
xConv = tf.keras.layers.MaxPooling2D((1, 1))(xConv)
xConv = tf.keras.layers.Conv2D(1024, (3, 3), activation='relu')(xConv)
xConv = tf.keras.layers.Dropout(0.2)(xConv)
xConv = tf.keras.layers.MaxPooling2D((1, 1))(xConv)
xConv = tf.keras.layers.BatchNormalization()(xConv)
xConv = tf.keras.layers.Conv2D(1024, (3, 3), activation='relu')(xConv)
xConv = tf.keras.layers.Dropout(0.2)(xConv)
xConv = tf.keras.layers.MaxPooling2D((1, 1))(xConv)
xConv = tf.keras.layers.BatchNormalization()(xConv)
xConv = tf.keras.layers.Flatten()(xConv)
xConv = tf.keras.layers.Dense(256, activation='relu')(xConv)
xConv = tf.keras.layers.Dropout(0.4)(xConv)

inputMeta = tf.keras.layers.Input(shape=(3,), name="meta")
xMeta = tf.keras.layers.Dense(32, activation='relu')(inputMeta)
xMeta = tf.keras.layers.Dropout(0.2)(xMeta)

concat = tf.keras.layers.concatenate([xConv, xMeta])
xCombined = tf.keras.layers.Dense(128, activation='leaky_relu')(concat)
xCombined = tf.keras.layers.Dropout(0.3)(xCombined)
output = tf.keras.layers.Dense(7, activation='softmax')(xCombined)

model = tf.keras.Model(inputs=[inputConv, inputMeta], outputs=output)

def accuracyK(y_true, y_pred):
    return top_k_categorical_accuracy(y_true, y_pred, k=2)


model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss='categorical_crossentropy',
              metrics=["accuracy"])

In [13]:
# Setup early stopping
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy', min_delta=0, patience=5, verbose=2,
    mode='max', baseline=None, restore_best_weights=True
)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_accuracy',
    factor=0.5,
    patience=2,
    verbose=1,
    cooldown=0,
    mode='auto',
    min_delta=0.0001,
    min_lr=0)

# Train Model
class_weights = {0: 4.375273044997816, 1: 2.7834908282379103, 2: 1.301832835044846, 3: 12.440993788819876, 4: 1.2854575792581184, 5: 0.21338020666879728, 6: 10.075452716297788}
class_weights = {0:1,1:0.5,2:1,3:1,4:1,5:1,6:1}
model.fit([train_x, train_meta], train_y,
          epochs=100,
          class_weight=class_weights,
          validation_data=([test_x, test_meta], test_y),
          callbacks=[early_stopping, reduce_lr])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 6: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 11: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 12/100
Epoch 13/100
Epoch 13: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 14/100
Epoch 14: early stopping


<keras.callbacks.History at 0x2bb94ec5310>

In [9]:
# Evaluate
model.evaluate([test_x[:20], test_meta[:20]], test_y[:20])

# Test for overfit
model.predict([test_x[:5], test_meta[:5]])
print(test_y[:5])



[2.3450992107391357, 0.6986027956008911]

In [19]:
# Save Model
save = input("Save Model? (y/n): ")
if save != "y":
    print("\nModel not saved..\n")
    exit(0)
print("\nSaving Model")
model.save(input("Model Name: ") + ".h5")


Saving Model
