In [1]:
# Dependencies
import matplotlib.pyplot as plt
%matplotlib inline

import os
import numpy as np
import tensorflow as tf
import pandas as pd

import keras
from keras.preprocessing import image
import operator

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
 # Load the model
from keras.models import load_model
model = load_model("models/deep21.h5")

In [3]:
# Keys for diagnoses in original data and English
dx = ['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']
diagnoses = ["Bowen's Disease", "Basal Cell Carcinoma", "Benign Keratosis-like Lesion", "Dermatofibroma", "Melanoma", "Melanocytic Nevi", "Vascular Lesions"]
dx_legend = {dx[i]:diagnoses[i] for i in range(7)}
print(dx_legend)

{'akiec': "Bowen's Disease", 'bcc': 'Basal Cell Carcinoma', 'bkl': 'Benign Keratosis-like Lesion', 'df': 'Dermatofibroma', 'mel': 'Melanoma', 'nv': 'Melanocytic Nevi', 'vasc': 'Vascular Lesions'}


In [4]:
# Read in DataFrame of stored images and types
metadata_df=pd.read_csv("Data/HAM10000_metadata.csv")
metadata_df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
0,HAM_0000118,ISIC_0027419.jpg,bkl,histo,80.0,male,scalp
1,HAM_0000118,ISIC_0025030.jpg,bkl,histo,80.0,male,scalp
2,HAM_0002730,ISIC_0026769.jpg,bkl,histo,80.0,male,scalp
3,HAM_0002730,ISIC_0025661.jpg,bkl,histo,80.0,male,scalp
4,HAM_0001466,ISIC_0031633.jpg,bkl,histo,75.0,male,ear


In [5]:
 # Default Image Size for sklesion
image_size = (200, 200)

In [6]:
def predict(image_path):
    """Read in image and return unlabeled list of confidence"""
    img = image.load_img(image_path, target_size=image_size)
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    predictions = list(model.predict(x)[0])
    return predictions

In [7]:
def predictdx(predictions):
    """Read in list of confidence and return tuple of diagnosis and confidence"""
    ibest = predictions.index(max(predictions))
    return (dx[ibest], predictions[ibest])

In [8]:
def accu_rate(sampledx, predictions):
    """Evaluate accuracy of prediction against sample"""
    idx = dx.index(sampledx)
    return predictions[idx]

In [9]:
def dxtranslate(sampledx):
    """Translate dx into English"""
    return diagnoses_legend[sampledx]

In [10]:
def map_predict(predictions):
    """Map predictions list to dictionary"""
    pred_map={}
    for i in range(7):
        pred_map[dx[i]]=predictions[i]
    return pred_map

In [11]:
def age_bin(age):
    if age < 19:
        age_range = "0-18"
    elif age < 45:
        age_range = "19-44"
    elif (age < 65 or str(age)=='nan'):
        age_range = "45-64"
    else:
        age_range = "65+"
    return age_range

In [12]:
all_preds =[]
age_ranges = []

entries=len(metadata_df)
for sample in range(entries):
    sampledx = metadata_df["dx"][sample]
    age = metadata_df["age"][sample]
    image_path = os.path.join("Images/HAM10000_images", metadata_df["image_id"][sample])
    predictions = predict(image_path)
    pred_map = map_predict(predictions)
    age_range = age_bin(age)
    if (sample % 100)==0:
        print(f"{sample} samples predicted")
    all_preds.append(pred_map)
    age_ranges.append(age_range)

all_preds_df =pd.DataFrame(all_preds)
age_ranges_df = pd.DataFrame({"age_range":age_ranges})

0 samples predicted
100 samples predicted
200 samples predicted
300 samples predicted
400 samples predicted
500 samples predicted
600 samples predicted
700 samples predicted
800 samples predicted
900 samples predicted
1000 samples predicted
1100 samples predicted
1200 samples predicted
1300 samples predicted
1400 samples predicted
1500 samples predicted
1600 samples predicted
1700 samples predicted
1800 samples predicted
1900 samples predicted
2000 samples predicted
2100 samples predicted
2200 samples predicted
2300 samples predicted
2400 samples predicted
2500 samples predicted
2600 samples predicted
2700 samples predicted
2800 samples predicted
2900 samples predicted
3000 samples predicted
3100 samples predicted
3200 samples predicted
3300 samples predicted
3400 samples predicted
3500 samples predicted
3600 samples predicted
3700 samples predicted
3800 samples predicted
3900 samples predicted
4000 samples predicted
4100 samples predicted
4200 samples predicted
4300 samples predicted


In [13]:
metadata_predict_df = metadata_df.merge(age_ranges_df,left_index=True,right_index=True)\
.merge(all_preds_df,left_index=True,right_index=True)
metadata_predict_df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,age_range,akiec,bcc,bkl,df,mel,nv,vasc
0,HAM_0000118,ISIC_0027419.jpg,bkl,histo,80.0,male,scalp,65+,0.0,9.054522e-09,0.007991,0.0,0.0,0.992009,0.0
1,HAM_0000118,ISIC_0025030.jpg,bkl,histo,80.0,male,scalp,65+,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,HAM_0002730,ISIC_0026769.jpg,bkl,histo,80.0,male,scalp,65+,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,HAM_0002730,ISIC_0025661.jpg,bkl,histo,80.0,male,scalp,65+,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,HAM_0001466,ISIC_0031633.jpg,bkl,histo,75.0,male,ear,65+,2.05461e-25,0.01015431,0.989846,0.0,0.0,0.0,0.0


In [14]:
metadata_predict_df.to_csv("Data/model2_train_data.csv")