# **Skin Lesion Analyzer with Deep Learning**



### **Outline**
Use these links to jump to specific sections of this project.

1. Import Packages
2. Load and Transform the Dataset
3. Model Development
4. Model Training 
5. Prediction and Evaluation

### 1. Import Packages

In [None]:
# python libraties
import os
import glob
import pandas as pd

# tensorflow libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import DenseNet121

# sklearn libraries
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

### 2. Load and Transform the Dataset

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

class DatasetBuilder():
    def __init__(self, base_dir, csv_file):
        self.base_dir = base_dir
        self.csv_file = csv_file
        
    def split_and_transform_df(self, base_dir, csv_file):
        df = pd.read_csv(csv_file)
        
        image_path_dict = {os.path.splitext(os.path.basename(x))[0]: x 
                       for x in glob.glob(os.path.join(base_dir, '*', '*.jpg'))}
        
        label_dict = {'akiec': 0, 'bcc': 1, 'bkl': 2, 'df': 3,
                      'mel': 4, 'nv': 5, 'vasc': 6}
        
        df['image_path'] = df['image_id'].map(image_path_dict.get)
        df['label_id'] = df['dx'].map(label_dict.get)
    
        train_df, val_df = train_test_split(df, test_size=0.15)
        train_df, test_df = train_test_split(train_df, test_size=0.10)
        return train_df, val_df
    
    def decode_image(self, filename, label, image_size=(224, 224)):
        bits = tf.io.read_file(filename)
        image = tf.image.decode_jpeg(bits, channels=3)
        image = tf.image.convert_image_dtype(image, tf.float32)
        image = tf.image.resize(image, image_size)
        return image, label
    
    def input_fn(self, dataframe, batch_size=32):
        image_list = dataframe.image_path.values
        label_list = dataframe.label_id.values
    
        ds = (tf.data.Dataset
                .from_tensor_slices((image_list, label_list))
                .map(self.decode_image, num_parallel_calls=AUTOTUNE)
                .cache()
                .repeat()
                .shuffle(buffer_size = 10 * batch_size)
                .batch(batch_size)
                .prefetch(AUTOTUNE))
        return ds
    
    def create_dataset(self):
        train_df, val_df = self.split_and_transform_df(self.base_dir, self.csv_file)
        train_ds = self.input_fn(train_df)
        val_ds = self.input_fn(val_df)
        return train_ds, val_ds

### 3. Model Development

In [None]:
image_shape = (224, 224, 3)
base_learning_rate = 0.0001

base_model = DenseNet121(input_shape=image_shape, 
                         include_top=False,
                         weights='imagenet')
base_model.trainable = True

image_input = keras.Input(shape=image_shape)
x = base_model(image_input)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(7, activation='sigmoid')(x)
model = keras.Model(inputs=image_input, outputs=x)

model.compile(optimizer=keras.optimizers.Adam(lr=base_learning_rate/10),
                loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                metrics=['accuracy'])

### 4. Model Training

In [None]:
def train_and_evaluate(params):
    csv_file = params['csv_file']
    base_dir = params['base_dir']
    batch_size = params['batch_size']
    num_epochs = params['num_epochs']
    train_steps = params['train_steps']
    val_steps = params['val_steps']

    builder = DatasetBuilder(base_dir, csv_file)
    train_ds, val_ds = builder.create_dataset()
    
    history = model.fit(train_ds,
                        batch_size=batch_size,
                        epochs=num_epochs,
                        validation_data=val_ds,
                        steps_per_epoch=train_steps,
                        validation_steps=val_steps)
    return history

In [None]:
params = {
    'csv_file': '../input/skin-cancer-mnist-ham10000/HAM10000_metadata.csv',
    'base_dir': '../input/skin-cancer-mnist-ham10000',
    'batch_size': 32,
    'num_epochs': 10,
    'train_steps': 239,
    'val_steps': 46
}

In [None]:
train_and_evaluate(params)

### 5. Prediction and Evaluation 