In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from glob import glob
from PIL import Image
np.random.seed(123)

import tensorflow as tf

### Load Data

In [8]:
image_dir = os.path.join("./HAM10000")

imageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x
                     for x in glob(os.path.join(image_dir, '*.jpg'))}

lesion_type_dict = {
    'nv': 'Melanocytic nevi',
    'mel': 'Melanoma',
    'bkl': 'Benign keratosis-like lesions ',
    'bcc': 'Basal cell carcinoma',
    'akiec': 'Actinic keratoses',
    'vasc': 'Vascular lesions',
    'df': 'Dermatofibroma'
}

input_dims = (32, 32)
input_shape = input_dims + (3,)

In [3]:

try:
    del loaded_data
    del tile_df
except:
    pass

if not os.path.exists("preprocessed_data.npz"):
    print("Loading from raw...")
    tile_df = pd.read_csv(
                    "./HAM10000_metadata.csv"
                )

    print("Adding data")
    tile_df["path"] = tile_df["image_id"].map(imageid_path_dict.get)
    tile_df["cell_type"] = tile_df["dx"].map(lesion_type_dict.get)
    tile_df["cell_type_index"] = pd.Categorical(tile_df["cell_type"]).codes

    print("Loading images...")
    tile_df["image"] = tile_df["path"].map(lambda x: np.asarray(Image.open(x).resize(input_dims)))

    # Save to npz file
    print("Saving to npz")
    np.savez_compressed("preprocessed_data", data=tile_df.values)

else:
    print("Loading from npz...")
    loaded_data = np.load("preprocessed_data.npz", allow_pickle=True) # Loaded as ndarray
    tile_df = pd.DataFrame(loaded_data["data"], columns=["lesion_id","image_id","dx","dx_type","age","sex","localization","dataset", "path", "cell_type", "cell_type_index", "image"])


print("Done")

Loading from npz...
Done


In [4]:
tile_df.tail()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,dataset,path,cell_type,cell_type_index,image
10010,HAM_0002867,ISIC_0033084,akiec,histo,40.0,male,abdomen,vidir_modern,./HAM10000\ISIC_0033084.jpg,Actinic keratoses,0,"[[[181, 164, 179], [179, 162, 176], [180, 163,..."
10011,HAM_0002867,ISIC_0033550,akiec,histo,40.0,male,abdomen,vidir_modern,./HAM10000\ISIC_0033550.jpg,Actinic keratoses,0,"[[[4, 5, 3], [24, 22, 21], [101, 88, 88], [128..."
10012,HAM_0002867,ISIC_0033536,akiec,histo,40.0,male,abdomen,vidir_modern,./HAM10000\ISIC_0033536.jpg,Actinic keratoses,0,"[[[132, 119, 120], [157, 139, 138], [177, 158,..."
10013,HAM_0000239,ISIC_0032854,akiec,histo,80.0,male,face,vidir_modern,./HAM10000\ISIC_0032854.jpg,Actinic keratoses,0,"[[[160, 123, 144], [163, 131, 152], [166, 128,..."
10014,HAM_0003521,ISIC_0032258,mel,histo,70.0,female,back,vidir_modern,./HAM10000\ISIC_0032258.jpg,Melanoma,5,"[[[175, 141, 121], [180, 149, 131], [180, 149,..."


### Actual Deep Learning  
  
##### Preprocessing  
- Mean Subtraction
- Normalization

In [10]:
from functools import partial

tensor_creation = partial(tf.convert_to_tensor, dtype=tf.int32)
tensors = tile_df["image"].map(tensor_creation)