In [1]:
import pandas as pd
from tensorflow.keras.preprocessing import image

import tensorflow as tf

import matplotlib.pyplot as plt

from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.resnet_v2 import ResNet152V2


from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras import activations
from tqdm import tqdm
import numpy as np

In [None]:
df = pd.read_csv('./data/HAM10000_Metadata')

In [2]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


(Warning for Sunlight, increased risk of cancer) Actinic keratoses and intraepithelial carcinoma / Bowen's disease (__akiec__), 0

(Skin cancer) basal cell carcinoma (__bcc__), 1

(HARMLESS) benign keratosis-like lesions (solar lentigines / seborrheic keratoses and lichen-planus like keratoses, __bkl__), 2

(HARMLESS) dermatofibroma (__df__),  3

melanoma (__mel__),  4

(Not entirely harmless) melanocytic nevi (__nv__)   5

(HARMLESS) vascular lesions (angiomas, angiokeratomas, pyogenic granulomas and hemorrhage, __vasc__). 6 


More than 50% of lesions are confirmed through histopathology (histo), the ground truth for the rest of the cases is either follow-up examination (follow_up), expert consensus (consensus), or confirmation by in-vivo confocal microscopy (confocal). The dataset includes lesions with multiple images, which can be tracked by the lesion_id-column within the HAM10000_metadata file.

In [None]:
# Number of cases where we can use LSTM

(df['lesion_id'].value_counts() > 1).sum()

In [None]:
df.head()

In [None]:
df = df.sample(frac = 1).reset_index()

df.head()

In [None]:
df['localization'].unique()

In [None]:
temp = []

for i in range(0, 10015):
    loc = df.iloc[i]['localization']
    if loc == 'abdomen':
        temp.append(tf.one_hot(0, 15))
    elif loc == 'scalp':
        temp.append(tf.one_hot(1, 15))
    elif loc == 'lower extremity':
        temp.append(tf.one_hot(2, 15))
    elif loc == 'trunk':
        temp.append(tf.one_hot(3, 15))
    elif loc == 'upper extremity':
        temp.append(tf.one_hot(4, 15))
    elif loc == 'back':
        temp.append(tf.one_hot(5, 15))
    elif loc == 'neck':
        temp.append(tf.one_hot(6, 15))
    elif loc == 'face':
        temp.append(tf.one_hot(7, 15))
    elif loc == 'chest':
        temp.append(tf.one_hot(8, 15))
    elif loc == 'foot':
        temp.append(tf.one_hot(9, 15))
    elif loc == 'ear':
        temp.append(tf.one_hot(10, 15))
    elif loc == 'unknown':
        temp.append(tf.one_hot(11, 15))
    elif loc == 'hand':
        temp.append(tf.one_hot(12, 15))
    elif loc == 'acral':
        temp.append(tf.one_hot(13, 15))
    elif loc == 'genital':
        temp.append(tf.one_hot(14, 15))

In [None]:
one_hot_loc = np.array(temp)

one_hot_loc

In [None]:
# VGG-16
preprocessed = []

VGG_load = VGG16(weights='imagenet', include_top=False)
model = models.Sequential()
model.add(VGG_load)
model.add(layers.Flatten())


for i in tqdm(range(0, 10015)):
    file = df.iloc[i]['image_id']
    ohl = one_hot_loc[i]
    
    img = image.load_img('./Data/HAM10000_images/' + file + '.jpg', target_size= (224,224))
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = tf.keras.applications.vgg16.preprocess_input(img)
    preds = model.predict(img).reshape(25088)

    preprocessed.append(np.concatenate((preds, ohl)))
    
df['VGG16'] = preprocessed

In [None]:
# Inception
preprocessed = []

IV3_load = InceptionV3(weights='imagenet', include_top=False)
model = models.Sequential()
model.add(IV3_load)
model.add(layers.Flatten())

for i in tqdm(range(0, 10015)):
    file = df.iloc[i]['image_id']
    ohl = one_hot_loc[i]
    
    img = image.load_img('./Data/HAM10000_images/' + file + '.jpg', target_size= (299,299))
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = tf.keras.applications.inception_v3.preprocess_input(img)
    preds = model.predict(img).reshape(131072)

    preprocessed.append(np.concatenate((preds, ohl)))
    
df['IV3'] = preprocessed

In [None]:
# ResNet
preprocessed = []

resnet_load = ResNet152V2(weights='imagenet', include_top=False)
model = models.Sequential()
model.add(resnet_load)
model.add(layers.Flatten())


for i in tqdm(range(0, 10015)):
    file = df.iloc[i]['image_id']
    ohl = one_hot_loc[i]    
    
    img = image.load_img('./Data/HAM10000_images/' + file + '.jpg', target_size= (224,224))
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = tf.keras.applications.resnet_v2.preprocess_input(img)
    preds = model.predict(img).reshape(100352)

    preprocessed.append(np.concatenate((preds, ohl)))
    
df['resnet'] = preprocessed

In [None]:
labels = []
for i in range(0, 10015):
    dx = df.iloc[i]['dx']
    if dx == 'akiec':
        labels.append(tf.one_hot(0, 7))
    elif dx == 'bcc':
        labels.append(tf.one_hot(1, 7))
    elif dx == 'bkl':
        labels.append(tf.one_hot(2, 7))
    elif dx == 'df':
        labels.append(tf.one_hot(3, 7))
    elif dx == 'mel':
        labels.append(tf.one_hot(4, 7))
    elif dx == 'nv':
        labels.append(tf.one_hot(5, 7))
    elif dx == 'vasc':
        labels.append(tf.one_hot(6, 7))
        
df['one_hot'] = labels

In [None]:
df.head()

(Warning for Sunlight, increased risk of cancer) Actinic keratoses and intraepithelial carcinoma / Bowen's disease (__akiec__), 0

(Skin cancer) basal cell carcinoma (__bcc__), 1

(HARMLESS) benign keratosis-like lesions (solar lentigines / seborrheic keratoses and lichen-planus like keratoses, __bkl__), 2

(HARMLESS) dermatofibroma (__df__),  3

melanoma (__mel__),  4

(Not entirely harmless) melanocytic nevi (__nv__)   5

(HARMLESS) vascular lesions (angiomas, angiokeratomas, pyogenic granulomas and hemorrhage, __vasc__). 6 


More than 50% of lesions are confirmed through histopathology (histo), the ground truth for the rest of the cases is either follow-up examination (follow_up), expert consensus (consensus), or confirmation by in-vivo confocal microscopy (confocal). The dataset includes lesions with multiple images, which can be tracked by the lesion_id-column within the HAM10000_metadata file.

In [None]:
risk = []

for i in range(0, 10015):
    if df.iloc[i]['dx'] in ['bkl', 'df', 'vasc']:
        risk.append(0.0)
    elif df.iloc[i]['dx'] == 'nv':
        risk.append(1.0)
    elif df.iloc[i]['dx'] == 'akiec':
        risk.append(2.0)
    elif df.iloc[i]['dx'] in ['mel', 'bcc']:
        risk.append(3.0)
        
df['risk'] = risk

df.head()

In [None]:
df[df['risk'] == 3.0]['dx'].value_counts()

In [None]:
VGG_X = np.array(df['VGG16'])
IV3_X = np.array(df['IV3'])
RES_X = np.array(df['resnet'])

In [None]:
#y = np.array(df['one_hot'])

y = np.array(df['risk'])

In [None]:
vgg_model = models.Sequential()
vgg_model.add(layers.Dense(8192, activation='relu'))
vgg_model.add(layers.Dense(4096, activation='relu'))
vgg_model.add(layers.Dense(2048, activation='relu'))
vgg_model.add(layers.Dense(1024, activation='relu'))
vgg_model.add(layers.Dense(512, activation='relu'))
vgg_model.add(layers.Dense(128, activation='relu'))
vgg_model.add(layers.Dense(4))

In [None]:
new_VGG = np.asarray(new_VGG).astype('float32')

In [None]:
new_VGG.shape

In [None]:
new_y = []
for ele in y:
    new_y.append(np.array(ele))
    
new_y = np.array(new_y)

new_y.shape

In [None]:
model.compile(optimizer='Adam',
              loss=tf.nn.softmax_cross_entropy_with_logits,
              metrics=['accuracy'])
model.fit(new_VGG, new_y, epochs=10, validation_split=0.1)

In [None]:
res_model = models.Sequential()
res_model.add(layers.Dense(8192, activation='relu'))
res_model.add(layers.Dense(4096, activation='relu'))
res_model.add(layers.Dense(2048, activation='relu'))
res_model.add(layers.Dense(1024, activation='relu'))
res_model.add(layers.Dense(512, activation='relu'))
res_model.add(layers.Dense(128, activation='relu'))
res_model.add(layers.Dense(4))

In [None]:
res_model.compile(optimizer='Adam',
              loss=tf.nn.softmax_cross_entropy_with_logits,
              metrics=['accuracy'])
res_model.fit(new_RES, new_y, epochs=10, validation_split=0.1)

In [None]:
iv3_model = models.Sequential()
iv3_model.add(layers.Dense(8192, activation='relu'))
iv3_model.add(layers.Dense(4096, activation='relu'))
iv3_model.add(layers.Dense(2048, activation='relu'))
iv3_model.add(layers.Dense(1024, activation='relu'))
iv3_model.add(layers.Dense(512, activation='relu'))
iv3_model.add(layers.Dense(128, activation='relu'))
iv3_model.add(layers.Dense(4))

In [None]:
iv3_model.compile(optimizer='Adam',
              loss=tf.nn.softmax_cross_entropy_with_logits,
              metrics=['accuracy'])
iv3_model.fit(new_IV3, new_y, epochs=10, validation_split=0.1)

# New Original Model

In [3]:
df = pd.read_csv('./data/HAM10000_Metadata')

In [4]:
df = df.sample(frac = 1).reset_index()

df.head()

Unnamed: 0,index,lesion_id,image_id,dx,dx_type,age,sex,localization,dataset
0,161,HAM_0005386,ISIC_0033973,bkl,histo,45.0,male,back,vidir_modern
1,6143,HAM_0005593,ISIC_0029658,nv,follow_up,45.0,male,back,vidir_molemax
2,2738,HAM_0005757,ISIC_0026496,bcc,histo,70.0,male,lower extremity,rosendahl
3,9515,HAM_0005115,ISIC_0033766,nv,consensus,35.0,male,upper extremity,vidir_modern
4,1257,HAM_0004720,ISIC_0027277,mel,histo,40.0,male,lower extremity,vidir_modern


In [5]:
risk = []

for i in range(0, 10015):
    if df.iloc[i]['dx'] in ['bkl', 'df', 'vasc']:
        risk.append(0.0)
    elif df.iloc[i]['dx'] == 'nv':
        risk.append(1.0)
    elif df.iloc[i]['dx'] == 'akiec':
        risk.append(2.0)
    elif df.iloc[i]['dx'] in ['mel', 'bcc']:
        risk.append(3.0)
        
df['risk'] = risk

df.head()

Unnamed: 0,index,lesion_id,image_id,dx,dx_type,age,sex,localization,dataset,risk
0,161,HAM_0005386,ISIC_0033973,bkl,histo,45.0,male,back,vidir_modern,0.0
1,6143,HAM_0005593,ISIC_0029658,nv,follow_up,45.0,male,back,vidir_molemax,1.0
2,2738,HAM_0005757,ISIC_0026496,bcc,histo,70.0,male,lower extremity,rosendahl,3.0
3,9515,HAM_0005115,ISIC_0033766,nv,consensus,35.0,male,upper extremity,vidir_modern,1.0
4,1257,HAM_0004720,ISIC_0027277,mel,histo,40.0,male,lower extremity,vidir_modern,3.0


In [6]:
X = []

for i in tqdm(range(0, 10015)):
    file = df.iloc[i]['image_id']
    
    img = image.load_img('./Data/HAM10000_images/' + file + '.jpg', target_size= (150,200))
    img = image.img_to_array(img) / 255.0
    
    
    X.append(img)

100%|███████████████████████████████████████████████████████████████████████████| 10015/10015 [00:36<00:00, 274.00it/s]


In [7]:
X = np.array(X)

X.shape

(10015, 150, 200, 3)

In [8]:
risk = []

for ele in df['risk']:
    risk.append(tf.one_hot(int(ele), 4))
    
y = np.array(risk)
y.shape

(10015, 4)

In [19]:
ori_model = models.Sequential()
ori_model.add(layers.Conv2D(64, (3, 3), activation='tanh', input_shape=(150,200,3)))
ori_model.add(layers.BatchNormalization())
ori_model.add(layers.Activation(activations.elu))
ori_model.add(layers.MaxPooling2D(2,2))


ori_model.add(layers.Conv2D(32, (3, 3), activation='tanh'))
ori_model.add(layers.BatchNormalization())
ori_model.add(layers.Activation(activations.elu))
ori_model.add(layers.MaxPooling2D(2,2))


ori_model.add(layers.Conv2D(16, (3, 3), activation='tanh'))
ori_model.add(layers.BatchNormalization())
ori_model.add(layers.Activation(activations.elu))
ori_model.add(layers.MaxPooling2D(2,2))


ori_model.add(layers.Flatten(name="feature_output"))

ori_model.add(layers.Dense(1024, activation='relu'))
ori_model.add(layers.Dense(256, activation='relu'))
ori_model.add(layers.Dense(64, activation='relu'))
ori_model.add(layers.Dense(4))

In [20]:
ori_model.compile(optimizer='Adam',
              loss=tf.nn.softmax_cross_entropy_with_logits,
              metrics=['accuracy'])
ori_model.fit(X, y, epochs=25, validation_split=0.1, batch_size=16)

Train on 9013 samples, validate on 1002 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<tensorflow.python.keras.callbacks.History at 0x1854f2b0e08>

In [None]:
loss = ori_model.history.history['loss']
val_loss = ori_model.history.history['val_loss']
epochs = range(1, len(loss) + 1)
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.savefig('Origin_Loss.png', dpi = 300)
plt.show()

In [None]:
plt.clf()
acc = ori_model.history.history['accuracy']
val_acc = ori_model.history.history['val_accuracy']
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Acc')
plt.legend()
plt.savefig('Origin_Acc.png', dpi = 300)
plt.show()

In [10]:
feature_vector = []

for i in tqdm(range(0, 10015)):
    sex = 0 if df.iloc[i]['sex'] == 'male' else 1
    age = df.iloc[i]['age']
    loc = df.iloc[i]['localization']
    
    feat = np.array([sex, age])
    
    if loc == 'abdomen':
        feat = np.concatenate((feat, tf.one_hot(0, 15)))
    elif loc == 'scalp':
        feat = np.concatenate((feat, tf.one_hot(1, 15)))
    elif loc == 'lower extremity':
        feat = np.concatenate((feat, tf.one_hot(2, 15)))
    elif loc == 'trunk':
        feat = np.concatenate((feat, tf.one_hot(3, 15)))
    elif loc == 'upper extremity':
        feat = np.concatenate((feat, tf.one_hot(4, 15)))
    elif loc == 'back':
        feat = np.concatenate((feat,tf.one_hot(5, 15)))
    elif loc == 'neck':
        feat = np.concatenate((feat,tf.one_hot(6, 15)))
    elif loc == 'face':
        feat = np.concatenate((feat,tf.one_hot(7, 15)))
    elif loc == 'chest':
        feat = np.concatenate((feat,tf.one_hot(8, 15)))
    elif loc == 'foot':
        feat = np.concatenate((feat,tf.one_hot(9, 15)))
    elif loc == 'ear':
        feat = np.concatenate((feat,tf.one_hot(10, 15)))
    elif loc == 'unknown':
        feat = np.concatenate((feat,tf.one_hot(11, 15)))
    elif loc == 'hand':
        feat = np.concatenate((feat,tf.one_hot(12, 15)))
    elif loc == 'acral':
        feat = np.concatenate((feat,tf.one_hot(13, 15)))
    elif loc == 'genital':
        feat = np.concatenate((feat,tf.one_hot(14, 15)))

    feature_vector.append(feat)

100%|██████████████████████████████████████████████████████████████████████████| 10015/10015 [00:05<00:00, 1719.31it/s]


In [11]:
feature_vector = np.array(feature_vector)

In [12]:
feature_vector.shape

(10015, 17)

In [21]:
feature_extractor = tf.keras.Model(
    inputs=ori_model.inputs,
    outputs=ori_model.get_layer(name="feature_output").output,
)

In [22]:
image_features = feature_extractor(X)

ResourceExhaustedError: OOM when allocating tensor with shape[10015,148,198,64] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:Conv2D]

In [None]:
X = []
for img_feat, feat in zip(image_features, feature_vector):
    X.append(np.concatenate((img_feat, feat)))

X = np.array(X)

In [None]:
model = models.Sequential()

model.add(layers.Dense(1024, activation='relu'))
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(4))

In [None]:
model.compile(optimizer='Adam',
              loss=tf.nn.softmax_cross_entropy_with_logits,
              metrics=['accuracy'])
model.fit(X, y, epochs=25, validation_split=0.1, batch_size=16)