In [1]:
import pandas as pd
from tensorflow.keras.preprocessing import image

import tensorflow as tf

from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.resnet_v2 import ResNet152V2


from tensorflow.keras import layers
from tensorflow.keras import models
from tqdm import tqdm
import numpy as np

In [2]:
df = pd.read_csv('./Data/HAM10000_Metadata')

Actinic keratoses and intraepithelial carcinoma / Bowen's disease (__akiec__), 0

basal cell carcinoma (__bcc__), 1

benign keratosis-like lesions (solar lentigines / seborrheic keratoses and lichen-planus like keratoses, __bkl__), 2

dermatofibroma (__df__),  3

melanoma (__mel__),  4

melanocytic nevi (__nv__)   5

vascular lesions (angiomas, angiokeratomas, pyogenic granulomas and hemorrhage, __vasc__). 6 


More than 50% of lesions are confirmed through histopathology (histo), the ground truth for the rest of the cases is either follow-up examination (follow_up), expert consensus (consensus), or confirmation by in-vivo confocal microscopy (confocal). The dataset includes lesions with multiple images, which can be tracked by the lesion_id-column within the HAM10000_metadata file.

In [3]:
df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,dataset
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,vidir_modern
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,vidir_modern
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,vidir_modern
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,vidir_modern
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,vidir_modern


In [4]:
img = image.load_img('./Data/HAM10000_images/' + 'ISIC_0027419' + '.jpg', target_size= (224,224))
img = image.img_to_array(img) / 255.0

In [5]:
img = np.expand_dims(img, axis=0)

In [6]:
# VGG-16
preprocessed = []

model = VGG16(weights='imagenet', include_top=False)

for file in tqdm(df['image_id']):
    img = image.load_img('./Data/HAM10000_images/' + file + '.jpg', target_size= (224,224))
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = tf.keras.applications.vgg16.preprocess_input(img)
    preds = model.predict(img)
    preprocessed.append(preds)
    
df['VGG16'] = preprocessed

  0%|          | 4/10015 [00:04<3:22:16,  1.21s/it]


KeyboardInterrupt: 

In [7]:
# Inception
preprocessed = []

model = InceptionV3(weights='imagenet', include_top=False)

for file in tqdm(df['image_id']):
    img = image.load_img('./Data/HAM10000_images/' + file + '.jpg', target_size= (299,299))
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = tf.keras.applications.inception_v3.preprocess_input(img)
    preds = model.predict(img)
    preprocessed.append(preds)
    
df['IV3'] = preprocessed

  0%|          | 7/10015 [00:07<2:57:45,  1.07s/it] 


KeyboardInterrupt: 

In [8]:
# ResNet
preprocessed = []

model = ResNet152V2(weights='imagenet', include_top=False)

for file in tqdm(df['image_id']):
    img = image.load_img('./Data/HAM10000_images/' + file + '.jpg', target_size= (224,224))
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = tf.keras.applications.resnet_v2.preprocess_input(img)
    preds = model.predict(img)
    preprocessed.append(preds)
    
df['resnet'] = preprocessed

  0%|          | 0/10015 [00:08<?, ?it/s]


KeyboardInterrupt: 

In [12]:
labels = []
for i in range(0, 10015):
    dx = df.iloc[i]['dx']
    if dx == 'akiec':
        labels.append(tf.one_hot(0, 7))
    elif dx == 'bcc':
        labels.append(tf.one_hot(1, 7))
    elif dx == 'bkl':
        labels.append(tf.one_hot(2, 7))
    elif dx == 'df':
        labels.append(tf.one_hot(3, 7))
    elif dx == 'mel':
        labels.append(tf.one_hot(4, 7))
    elif dx == 'nv':
        labels.append(tf.one_hot(5, 7))
    elif dx == 'vasc':
        labels.append(tf.one_hot(6, 7))
        
df['one_hot'] = labels

In [13]:
df.to_csv('./Data/data.csv')