# Setup

In [None]:
# Install efficientnet package
!pip install -q efficientnet

In [None]:
# Import packages
import re
import cv2
import math
import time
import random
import sklearn
import itertools
import numpy as np
import pandas as pd
import tensorflow as tf
from functools import partial
import matplotlib.pyplot as plt
import efficientnet.tfkeras as efn
import tensorflow.keras.backend as K
from tqdm import tqdm_notebook as tqdm
from kaggle_datasets import KaggleDatasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_curve, confusion_matrix, classification_report, roc_curve, auc, ConfusionMatrixDisplay

In [None]:
# Connect to TPU
try:
    AUTOTUNE = tf.data.experimental.AUTOTUNE
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
    print("Running on TPU ", tpu.cluster_spec().as_dict()["worker"])
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
except ValueError:
    print("Not connected to a TPU runtime. Using CPU/GPU strategy")
    strategy = tf.distribute.MirroredStrategy()
    
REPLICAS = strategy.num_replicas_in_sync

# Parameters

In [None]:
# Hyperparameters
SEED = 21
DIM = 384
EPOCHS = 16
NUM_CLASSES = 1
BATCH_SIZE = 256
VERBOSE_LEVEL = 1
SAVE_OUTPUT = True
LABEL_SMOOTHING = 0.05

# LR SCHEDULE 
LR_MAX = 1e-4
LR_MIN = 1e-7
LR_START = 1e-4
MODE = "triangular2"
STEP_SIZE = 4

# The 2019 Data may decrease the model performance. More Infos here: https://www.kaggle.com/c/siim-isic-melanoma-classification/discussion/168028 
EXCLUDE_2019 = True

In [None]:
# Config for image augmentation
AUGMENTATION_CONFIG = {
    'ROT_': 180.0,
    'SHR_': 6,
    'HZOOM_': 10.0,
    'WZOOM_': 10.0,
    'HSHIFT_': 10.0,
    'WSHIFT_': 10.0,
    'CROP': 0.90,
    'PROBABILITY_DROPOUT': 0.75
}

In [None]:
# Seed is used to save the state of a random function, 
# so that it can generate the same random numbers on multiple executions
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

# Data loading & preparation & augmentation

## Data loading

In [None]:
# Load the train and test csv files
df_train = pd.read_csv('../input/siim-isic-melanoma-classification/train.csv')
df_test = pd.read_csv('../input/siim-isic-melanoma-classification/test.csv')

In [None]:
# Countes the items inside a list of tf records
def count_data_items(filenames):
    n = [int(re.compile(r"-([0-9]*)\.").search(filename).group(1)) for filename in filenames]
    return np.sum(n)

In [None]:
# Load the datasets
GCS_PATH_2020 = KaggleDatasets().get_gcs_path('melanoma-384x384')
GCS_PATH_OLD = KaggleDatasets().get_gcs_path('isic2019-384x384')

TRAINING_FILENAMES = tf.io.gfile.glob(GCS_PATH_2020 + '/train*.tfrec')
TEST_FILENAMES = tf.io.gfile.glob(GCS_PATH_2020 + '/test*.tfrec')

print("# TRAINING_FILENAMES", len(TRAINING_FILENAMES))
print("# TEST_FILENAMES", len(TEST_FILENAMES))

In [None]:
OLD_COMP_FILENAMES = tf.io.gfile.glob(GCS_PATH_OLD + '/*.tfrec')
print("# OLD_COMP_FILENAMES", len(OLD_COMP_FILENAMES))

In [None]:
# Exclude the 2019 if needed
OLD_COMP_FILENAMES_TMP = []
if EXCLUDE_2019:
     for i in range(0, len(OLD_COMP_FILENAMES), 2):
         OLD_COMP_FILENAMES_TMP.append(OLD_COMP_FILENAMES[i])

     OLD_COMP_FILENAMES = OLD_COMP_FILENAMES_TMP
     print("# OLD_COMP_FILENAMES", len(OLD_COMP_FILENAMES))

In [None]:
# Only use data from 2020 for validation
TRAINING_FILENAMES, VALIDATION_FILENAMES = train_test_split(TRAINING_FILENAMES, test_size = 0.20, random_state = SEED)
TRAINING_FILENAMES = list(TRAINING_FILENAMES) + list(OLD_COMP_FILENAMES)

random.shuffle(TRAINING_FILENAMES)
random.shuffle(VALIDATION_FILENAMES)

In [None]:
# Test if TRAINING and VALIDATION files are valid
for x in TRAINING_FILENAMES:
    if x in VALIDATION_FILENAMES:
        raise Exception("TRAIN AND TEST FILES ARE NOT VALID!")

In [None]:
print("# TRAINING_FILENAMES", len(TRAINING_FILENAMES))
print("# VALIDATION_FILENAMES", len(VALIDATION_FILENAMES))

TRAINING_IMAGES = count_data_items(TRAINING_FILENAMES)
VALIDATION_IMAGES = count_data_items(VALIDATION_FILENAMES)

print("# TRAINING_IMAGES", TRAINING_IMAGES)
print("# VALIDATION_IMAGES", VALIDATION_IMAGES)

In [None]:
# Calculate an initial bias for the output layer based on the number
# of malignant and benign cases of the 2020 train data
benign_cases = df_train['benign_malignant'].value_counts().benign
malignant_cases = df_train['benign_malignant'].value_counts().malignant

initial_bias = np.log([malignant_cases/benign_cases])

print("initial_bias", initial_bias)

## Augmentation

In [None]:
# Here we apply some manual augmentations that cannot be done with tf.image, 
# such as shearing, zooming and translation. Rotation can be done in tf.image but only in factors of 90 degrees, 
# so we do it manually instead.
# Source: https://www.kaggle.com/teyang/melanoma-detection-using-effnet-and-meta-data#5.-Train-and-Evaluate-Model

ROT_ = AUGMENTATION_CONFIG['ROT_']
SHR_ = AUGMENTATION_CONFIG['SHR_']
HZOOM_ = AUGMENTATION_CONFIG['HZOOM_']
WZOOM_ = AUGMENTATION_CONFIG['WZOOM_']
HSHIFT_ = AUGMENTATION_CONFIG['HSHIFT_']
WSHIFT_ = AUGMENTATION_CONFIG['WSHIFT_']

def get_mat(rotation, shear, height_zoom, width_zoom, height_shift, width_shift):
    # returns 3x3 transformmatrix which transforms indicies
        
    # CONVERT DEGREES TO RADIANS
    rotation = math.pi * rotation / 180.
    shear    = math.pi * shear    / 180.

    def get_3x3_mat(lst):
        return tf.reshape(tf.concat([lst],axis=0), [3,3])
    
    # ROTATION MATRIX
    c1   = tf.math.cos(rotation)
    s1   = tf.math.sin(rotation)
    one  = tf.constant([1],dtype='float32')
    zero = tf.constant([0],dtype='float32')
    
    rotation_matrix = get_3x3_mat([c1,   s1,   zero, 
                                   -s1,  c1,   zero, 
                                   zero, zero, one])    
    # SHEAR MATRIX
    c2 = tf.math.cos(shear)
    s2 = tf.math.sin(shear)    
    
    shear_matrix = get_3x3_mat([one,  s2,   zero, 
                                zero, c2,   zero, 
                                zero, zero, one])        
    # ZOOM MATRIX
    zoom_matrix = get_3x3_mat([one/height_zoom, zero,           zero, 
                               zero,            one/width_zoom, zero, 
                               zero,            zero,           one])    
    # SHIFT MATRIX
    shift_matrix = get_3x3_mat([one,  zero, height_shift, 
                                zero, one,  width_shift, 
                                zero, zero, one])
    
    return K.dot(K.dot(rotation_matrix, shear_matrix), 
                 K.dot(zoom_matrix,     shift_matrix))


def transform(image, DIM=DIM):    
    # input image - is one image of size [dim,dim,3] not a batch of [b,dim,dim,3]
    # output - image randomly rotated, sheared, zoomed, and shifted
    XDIM = DIM%2 #fix for size 331
    
    rot = ROT_ * tf.random.normal([1], dtype='float32')
    shr = SHR_ * tf.random.normal([1], dtype='float32') 
    h_zoom = 1.0 + tf.random.normal([1], dtype='float32') / HZOOM_
    w_zoom = 1.0 + tf.random.normal([1], dtype='float32') / WZOOM_
    h_shift = HSHIFT_ * tf.random.normal([1], dtype='float32') 
    w_shift = WSHIFT_ * tf.random.normal([1], dtype='float32') 

    # GET TRANSFORMATION MATRIX
    m = get_mat(rot,shr,h_zoom,w_zoom,h_shift,w_shift) 

    # LIST DESTINATION PIXEL INDICES
    x   = tf.repeat(tf.range(DIM//2, -DIM//2,-1), DIM)
    y   = tf.tile(tf.range(-DIM//2, DIM//2), [DIM])
    z   = tf.ones([DIM*DIM], dtype='int32')
    idx = tf.stack( [x,y,z] )
    
    # ROTATE DESTINATION PIXELS ONTO ORIGIN PIXELS
    idx2 = K.dot(m, tf.cast(idx, dtype='float32'))
    idx2 = K.cast(idx2, dtype='int32')
    idx2 = K.clip(idx2, -DIM//2+XDIM+1, DIM//2)
    
    # FIND ORIGIN PIXEL VALUES           
    idx3 = tf.stack([DIM//2-idx2[0,], DIM//2-1+idx2[1,]])
    d    = tf.gather_nd(image, tf.transpose(idx3))
        
    return tf.reshape(d,[DIM, DIM,3])

In [None]:
def dropout(image, PROBABILITY = AUGMENTATION_CONFIG['PROBABILITY_DROPOUT'], CT = 6, SZ = 0.20):
    # Source: https://www.kaggle.com/cdeotte/tfrecord-experiments-upsample-and-coarse-dropout
    # input image - is one image of size [dim,dim,3] not a batch of [b,dim,dim,3]
    # output - image with CT squares of side size SZ*DIM removed
    P = tf.cast( tf.random.uniform([],0,1)<PROBABILITY, tf.int32)
    if (P==0)|(CT==0)|(SZ==0): return image
    
    for k in range(CT):
        # CHOOSE RANDOM LOCATION
        x = tf.cast( tf.random.uniform([],0,DIM),tf.int32)
        y = tf.cast( tf.random.uniform([],0,DIM),tf.int32)
        # COMPUTE SQUARE 
        WIDTH = tf.cast( SZ*DIM,tf.int32) * P
        ya = tf.math.maximum(0,y-WIDTH//2)
        yb = tf.math.minimum(DIM,y+WIDTH//2)
        xa = tf.math.maximum(0,x-WIDTH//2)
        xb = tf.math.minimum(DIM,x+WIDTH//2)
        # DROPOUT IMAGE
        one = image[ya:yb,0:xa,:]
        two = tf.zeros([yb-ya,xb-xa,3]) 
        three = image[ya:yb,xb:DIM,:]
        middle = tf.concat([one,two,three],axis=1)
        image = tf.concat([image[0:ya,:,:],middle,image[yb:DIM,:,:]],axis=0)
            
    # RESHAPE HACK SO TPU COMPILER KNOWS SHAPE OF OUTPUT TENSOR 
    image = tf.reshape(image,[DIM,DIM,3])
    return image

In [None]:
def color(x):
    """Color augmentation
    Args:
        x: Image

    Returns:
        Augmented image
    """        
    x = tf.image.random_saturation(x, 0.7, 1.3, seed=SEED)
    x = tf.image.random_contrast(x, 0.7, 1.3, seed=SEED)
    x = tf.image.random_brightness(x, 0.1, seed=SEED)
    return x

def flip(x):
    """Flip augmentation
    Args:
        x: Image to flip

    Returns:
        Augmented image
    """
    x = tf.image.random_flip_left_right(x, seed=SEED)
    x = tf.image.random_flip_up_down(x, seed=SEED)
    return x

def rotate(x):
    """Rotate augmentation
    Args:
        x: Image to flip

    Returns:
        Augmented image
    """
    x = tf.image.rot90(x,k=np.random.randint(4))
    return x

def random_crop(x):
    """Random crop augmentation
    Args:
        x: Image to flip

    Returns:
        Augmented image
    """
    x = tf.image.random_crop(x, size=[round(DIM*AUGMENTATION_CONFIG['CROP']), round(DIM*AUGMENTATION_CONFIG['CROP']), 3], seed=SEED)
    x = tf.image.resize(x, [DIM, DIM])
    x = tf.reshape(x, [DIM, DIM, 3]) 
    return x

def central_crop(x):
    """Central crop augmentation
    Args:
        x: Image to flip

    Returns:
        Augmented image
    """
    x = tf.image.central_crop(x, DIM*AUGMENTATION_CONFIG['CROP'] / DIM)
    x = tf.image.resize(x, [DIM, DIM])
    x = tf.reshape(x, [DIM, DIM, 3]) 
    return x

In [None]:
# Apply every augmentation function inside the augmentations list to the image
def augment_image(image, augment=True):  
    augmentations = [color, flip, rotate, random_crop, transform, dropout] 
    if augment:
        for f in augmentations:
            image = f(image)
        
    return image

## Preparation

In [None]:
# Normalize image pixels to values between 0 and 1
normalization_layer = tf.keras.layers.experimental.preprocessing.Rescaling(1./255)

# Resize image to given dimensions
resizing_layer = tf.keras.layers.experimental.preprocessing.Resizing(DIM, DIM)

# Decode the image so we can use it for training
def decode_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.cast(image, tf.float32)
    return image


def read_tfrecord(example, labeled):
    tfrecord_format = {
        "image": tf.io.FixedLenFeature([], tf.string),
        "target": tf.io.FixedLenFeature([], tf.int64)
    } if labeled else {
        "image": tf.io.FixedLenFeature([], tf.string),
        "image_name": tf.io.FixedLenFeature([], tf.string)
    }
    example = tf.io.parse_single_example(example, tfrecord_format)
    image = decode_image(example['image'])
    if labeled:
        label = tf.cast(example['target'], tf.int32)
        return image, label
    idnum = example['image_name']
    return image, idnum


def load_dataset(filenames, labeled=True, ordered=False):
    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False # disable order, increase speed
    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTOTUNE) # automatically interleaves reads from multiple files
    dataset = dataset.with_options(ignore_order) # uses data as soon as it streams in, rather than in its original order
    dataset = dataset.cache() # cache ds for performance gains
    dataset = dataset.map(partial(read_tfrecord, labeled=labeled), num_parallel_calls=AUTOTUNE)

    # normalize the image so the values are between 0 and 1
    dataset = dataset.map(lambda x, y: (normalization_layer(x), y), num_parallel_calls=AUTOTUNE) 
    
    # resize the images to the same height and width
    dataset = dataset.map(lambda x, y: (resizing_layer(x), y), num_parallel_calls=AUTOTUNE) 

    # returns a dataset of (image, label) pairs if labeled=True or (image, id) pairs if labeled=False
    return dataset


def get_training_dataset(files=TRAINING_FILENAMES, augment=True, shuffle=True):
    dataset = load_dataset(files, labeled=True)
    if augment:
        dataset = dataset.map(lambda x, y: (augment_image(x, augment=augment), y), num_parallel_calls=AUTOTUNE)
    dataset = dataset.repeat()
    
    if shuffle: 
        dataset = dataset.shuffle(1024 * REPLICAS, reshuffle_each_iteration=True)
        opt = tf.data.Options()
        opt.experimental_deterministic = False
        dataset = dataset.with_options(opt)
    
    dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset


def get_validation_dataset(files=VALIDATION_FILENAMES, ordered=False, repeat=False):
    dataset = load_dataset(files, labeled=True, ordered=ordered)
    if repeat:
        dataset = dataset.repeat()
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset


def get_test_dataset(ordered=True):
    dataset = load_dataset(TEST_FILENAMES, labeled=False, ordered=ordered)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset

# Data Validation
Let's look at the data and check if we did everything correct so far

In [None]:
# Helper function to display a certain number of images from a list of images
def plot_transform(num_images, images, augment = False):
    plt.figure(figsize=(30,10))
    for i in range(1, num_images+1):
        image = images[0 if augment else i]
        plt.subplot(1 ,num_images + 1, i)
        plt.axis('off')
        if augment:
            img = augment_image(image)
        else:
            img = image
        plt.imshow(np.clip(img, 0, 1))

In [None]:
example_dataset = get_training_dataset(files=TRAINING_FILENAMES, augment=False)
example_dataset = example_dataset.unbatch().batch(15)
example_batch = iter(example_dataset) 
image_batch, label_batch = next(example_batch)

In [None]:
images = [(x) for x in image_batch]

In [None]:
plot_transform(7, images, augment=False)

In [None]:
plot_transform(7, images, augment=True)

In [None]:
# Print the pixel values and the data type
for i in range(10):
    image = image_batch[i]
    print("min:", np.min(image), " -  max:", np.max(image))

print(image.dtype)

# Model

In [None]:
# Number of training steps for each epoch
steps_per_epoch = TRAINING_IMAGES // BATCH_SIZE

# Number of validation steps for each epoch
validation_steps_per_epoch = VALIDATION_IMAGES // BATCH_SIZE

# Number of steps for each batch execution
steps_per_execution = 5

training_dataset = get_training_dataset(augment=True)
validation_dataset = get_validation_dataset(repeat=True)

print("steps_per_epoch", steps_per_epoch)
print("validation_steps_per_epoch", validation_steps_per_epoch)
print("steps_per_execution", steps_per_execution)

In [None]:
# Helper function to get the model parameters and functions
def get_model_parameters(steps_per_epoch, lr, epochs):
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
    loss = tf.keras.losses.BinaryCrossentropy(label_smoothing=LABEL_SMOOTHING)
    metrics = [
        tf.keras.metrics.BinaryAccuracy(name='accuracy'),
        tf.keras.metrics.AUC(name='auc'),
    ]

    return loss, metrics, optimizer

# Helper function to compile the model
def compile_model(model):
    loss, metrics, optimizer = get_model_parameters(steps_per_epoch, LR_START, EPOCHS)
    if tpu:
        model.compile(
            loss=loss,
            metrics=metrics,
            optimizer=optimizer,
            # Reduce python overhead, and maximize the performance of your TPU
            # Anything between 2 and `steps_per_epoch` could help here.
            steps_per_execution=steps_per_execution,
        )
    else:
        model.compile(
            loss=loss,
            metrics=metrics,
            optimizer=optimizer,
        )

    return model

In [None]:
# Clear the session - this helps when we are creating multiple models
K.clear_session()

# Creating the model in the strategy scope places the model on the TPU
with strategy.scope():
    output_bias = None
    if initial_bias is not None:
        output_bias = tf.keras.initializers.Constant(initial_bias)

    base_model = efn.EfficientNetB5(
        include_top=False, 
        weights='noisy-student', 
        input_shape=[DIM,DIM,3]
    )

    base_model.trainable = False
    
    model = tf.keras.models.Sequential([
        base_model,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(8, activation='relu'),
        tf.keras.layers.Dense(NUM_CLASSES, activation='sigmoid', bias_initializer=output_bias)
    ])
    model = compile_model(model)

In [None]:
model.summary()

# Initial Training

In [None]:
history = model.fit(
    training_dataset,
    epochs=3,
    steps_per_epoch=steps_per_epoch,
    validation_data=validation_dataset,
    validation_steps=validation_steps_per_epoch,
    verbose=VERBOSE_LEVEL
)

# Full Training

Train for 16 Epochs with a cyclical learning rate schedule

In [None]:
K.clear_session()
with strategy.scope():
    # Make the whole model trainable
    base_model.trainable = True
    # We need to compile the model again after changing the layers
    model = compile_model(model)
    
model.summary()

In [None]:
# Create the cyclical learning rate scheduler

from tensorflow.python.framework import ops
from tensorflow.python.ops import math_ops
from tensorflow.python.eager import context

def cyclic_learning_rate(global_step,
                         learning_rate=0.01,
                         max_lr=0.1,
                         step_size=20.,
                         gamma=0.99994,
                         mode='triangular',
                         name=None):
    """
    Function to define the learning rate schedule
    Source: https://www.pyimagesearch.com/2019/07/29/cyclical-learning-rates-with-keras-and-deep-learning/
    """
    if global_step is None:
        raise ValueError("global_step is required for cyclic_learning_rate.")

    learning_rate = ops.convert_to_tensor(
        learning_rate, name="learning_rate")

    dtype = learning_rate.dtype
    global_step = math_ops.cast(global_step, dtype)
    step_size = math_ops.cast(step_size, dtype)

    def cyclic_lr():
        """Helper to recompute learning rate; most helpful in eager-mode."""
        # computing: cycle = floor( 1 + global_step / ( 2 * step_size ) )
        double_step = math_ops.multiply(2., step_size)
        global_div_double_step = math_ops.divide(global_step, double_step)
        cycle = math_ops.floor(math_ops.add(1., global_div_double_step))
        # computing: x = abs( global_step / step_size – 2 * cycle + 1 )
        double_cycle = math_ops.multiply(2., cycle)
        global_div_step = math_ops.divide(global_step, step_size)
        tmp = math_ops.subtract(global_div_step, double_cycle)
        x = math_ops.abs(math_ops.add(1., tmp))
        # computing: clr = learning_rate + ( max_lr – learning_rate ) * max( 0, 1 - x )
        a1 = math_ops.maximum(0., math_ops.subtract(1., x))
        a2 = math_ops.subtract(max_lr, learning_rate)
        clr = math_ops.multiply(a1, a2)
        if mode == 'triangular2':
            clr = math_ops.divide(clr, math_ops.cast(math_ops.pow(2, math_ops.cast(
                cycle-1, tf.int32)), tf.float32))
        if mode == 'exp_range':
            clr = math_ops.multiply(math_ops.pow(gamma, global_step), clr)
        return math_ops.add(clr, learning_rate, name=name)

    if not context.executing_eagerly():
        cyclic_lr = cyclic_lr()

    return cyclic_lr


# Helper function to create a tf callback from the learning rate scheduler
def get_lr_callback(mode, learning_rate, max_lr, step_size):
    """
    Returns the LearningRateScheduler function for the clr
    """
    def lrfn(epoch):
        return float(
            cyclic_learning_rate(
                epoch,
                mode=mode,
                learning_rate=learning_rate,
                max_lr=max_lr,
                step_size=step_size,
            )().numpy()
        )
    lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=2)
    return lr_callback


# Helper function to plot the cyclical learning rate
def plot_clr(mode, learning_rate, max_lr, step_size, epochs):
    """
    Plots the learning rate for each epoch
    """
    rates = []
    for i in range(0, epochs):
        x = cyclic_learning_rate(
            i,
            mode=mode,
            learning_rate=learning_rate,
            max_lr=max_lr,
            step_size=step_size,
        )().numpy()
        rates.append(x)

    plt.xlabel('Iterations (epochs)')
    plt.ylabel('Learning rate')
    plt.plot(range(epochs), rates)

In [None]:
# Helper function to display the training plots
def display_training_curves(training, validation, title, subplot):
    """
    Plots the training process
    Source: https://www.kaggle.com/mgornergoogle/getting-started-with-100-flowers-on-tpu
    """
    if subplot%10==1: # set up the subplots on the first call
        plt.subplots(figsize=(20,15), facecolor='#F0F0F0')
        plt.tight_layout()
    ax = plt.subplot(subplot)
    ax.set_facecolor('#F8F8F8')
    ax.plot(training)
    ax.plot(validation)
    ax.set_title('model '+ title)
    ax.set_ylabel(title)
    ax.set_xlabel('epoch')
    ax.legend(['train', 'valid.'])

In [None]:
# Create and plot the cyclical learning rate
lr_callback = get_lr_callback(mode=MODE, learning_rate=LR_MIN, max_lr=LR_MAX, step_size=STEP_SIZE)
plot_clr(MODE, LR_MIN, LR_MAX, STEP_SIZE, EPOCHS)

callbacks = [lr_callback]

In [None]:
history = model.fit(
    training_dataset,
    epochs=EPOCHS,
    callbacks=callbacks,
    steps_per_epoch=steps_per_epoch,
    validation_data=validation_dataset,
    validation_steps=validation_steps_per_epoch,
    verbose=VERBOSE_LEVEL
)

In [None]:
# Let's look a the training process so far
display_training_curves(
    history.history['loss'], 
    history.history['val_loss'], 
    'loss', 
    311
)
display_training_curves(
    history.history['auc'], 
    history.history['val_auc'], 
    'auc', 
    312
)

In [None]:
model.save('model.h5')

Train with ReduceLROnPlateau schedule for max. 16 Epochs or until model does not further improve.

In [None]:
earlyStopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', 
    patience=5, 
    verbose=VERBOSE_LEVEL,
    restore_best_weights=True
)

lr_callback = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=1, verbose=VERBOSE_LEVEL)

callbacks = [lr_callback, earlyStopping]

In [None]:
history = model.fit(
    training_dataset,
    epochs=EPOCHS,
    callbacks=callbacks,
    steps_per_epoch=steps_per_epoch,
    validation_data=validation_dataset,
    validation_steps=validation_steps_per_epoch,
    verbose=VERBOSE_LEVEL
)

In [None]:
model.save('model.h5')

# Evaluation

In [None]:
val_dataset = get_validation_dataset(repeat=False, ordered=True)
num_images = count_data_items(VALIDATION_FILENAMES)

# Get the images from the dataset 
val_dataset_images = val_dataset.map(lambda image, image_name: image)

# Get the labels from the dataset
val_dataset_image_name = val_dataset.map(lambda image, image_name: image_name).unbatch()
labels = next(iter(val_dataset_image_name.batch(num_images))).numpy().astype('U')
labels = [int(x) for x in labels]

In [None]:
# Predict on the images
predictions = model.predict(val_dataset_images, verbose=1, steps=math.ceil(len(labels) / BATCH_SIZE))

In [None]:
# Helper function to calculate the F1 Score
def calc_f1(prec, recall):
    return 2*(prec*recall)/(prec+recall) if recall and prec else 0

In [None]:
# Calculate the precision, recall and the thresholds from the labels and predictions
precision, recall, thresholds = precision_recall_curve(labels, predictions)

# Calculate the f1 score for each threshold
f1score = [calc_f1(precision[i], recall[i]) for i in range(len(thresholds))]

# Get the highest f1score
idx = np.argmax(f1score)

# Get the highest precision, recall, threshold and f1score
precision = round(precision[idx], 4)
recall = round(recall[idx], 4)
threshold = round(thresholds[idx], 4)
f1score = round(f1score[idx], 4)

print('Precision:', precision)
print('Recall:', recall)
print('Threshold:', threshold)
print('F1 Score:', f1score)

In [None]:
# Plot the ROC/AUC
fpr, tpr, thresholds = roc_curve(labels, predictions, pos_label=1)
fig, c_ax = plt.subplots(1, 1, figsize=(8, 8))
c_ax.plot(fpr, tpr, label='%s (AUC:%0.2f)' % ('Target', auc(fpr, tpr)))
c_ax.plot([0, 1], [0, 1], color='navy', lw=1, linestyle='--')
c_ax.legend()
c_ax.set_xlabel('False Positive Rate')
c_ax.set_ylabel('True Positive Rate')

In [None]:
# Plot a confusion matrix
binary_preds = [0 if x < threshold else 1 for x in predictions]
cm = confusion_matrix(labels, binary_preds)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot()

# Grad-Cam

We can visually confirm where our network is looking with Grad-CAM, ensuring that it looks at the right patterns in the image and activating around them.
Source: https://keras.io/examples/vision/grad_cam & https://arxiv.org/abs/1610.02391

In [None]:
%%capture
# Download melanoma image from wikipedia
!wget https://upload.wikimedia.org/wikipedia/commons/thumb/6/6c/Melanoma.jpg/512px-Melanoma.jpg

In [None]:
# Get the CNNs output layer
efficientnet_model = False
for layer in model.layers:
    if layer.name == "efficientnet-b5":
        efficientnet_model = layer

In [None]:
IMAGE_PATH = "./512px-Melanoma.jpg"
img = tf.keras.preprocessing.image.load_img(IMAGE_PATH, target_size=(DIM, DIM))
plt.imshow(img)
origin_img = img

In [None]:
# Get the prediction for the image
prediction = model.predict(np.expand_dims(img, axis=0))
binary_prediction = [0 if x < 0.5 else 1 for x in prediction]
print("Prediction: " + ("Benign" if binary_prediction == 0 else "Malignant"))

In [None]:
# First, we create a model that maps the input image to the activations
# of the last conv layer as well as the output predictions
grad_model = tf.keras.models.Model(
    [efficientnet_model.inputs], [efficientnet_model.get_layer('top_conv').output, efficientnet_model.output]
)

# Then, we compute the gradient of the top predicted class for our input image
# with respect to the activations of the last conv layer
with tf.GradientTape() as tape:
    last_conv_layer_output, preds = grad_model(np.expand_dims(img, axis=0))
    class_channel = preds[:, round(np.mean(tf.argmax(preds[0]).numpy()))]

# This is the gradient of the output neuron (top predicted or chosen)
# with regard to the output feature map of the last conv layer
grads = tape.gradient(class_channel, last_conv_layer_output)

# This is a vector where each entry is the mean intensity of the gradient
# over a specific feature map channel
pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

# We multiply each channel in the feature map array
# by "how important this channel is" with regard to the top predicted class
# then sum all the channels to obtain the heatmap class activation
last_conv_layer_output = last_conv_layer_output[0]
heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
heatmap = tf.squeeze(heatmap)

# For visualization purpose, we will also normalize the heatmap between 0 & 1
heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
heatmap = heatmap.numpy()
heatmap_array = heatmap

In [None]:
# Display heatmap
plt.matshow(heatmap)
plt.show()

In [None]:
import matplotlib.cm as cm

# Load the original image
o_img = tf.keras.preprocessing.image.load_img(IMAGE_PATH, target_size=(DIM, DIM))

# Rescale heatmap to a range 0-255
heatmap = np.uint8(255 * heatmap)

# Use jet colormap to colorize heatmap
jet = cm.get_cmap("jet")

# Use RGB values of the colormap
jet_colors = jet(np.arange(256))[:, :3]
jet_heatmap = jet_colors[heatmap]

# Create an image with RGB colorized heatmap
jet_heatmap = tf.keras.preprocessing.image.array_to_img(jet_heatmap)
jet_heatmap = jet_heatmap.resize((DIM,DIM))
jet_heatmap = tf.keras.preprocessing.image.img_to_array(jet_heatmap)

# Superimpose the heatmap on original image
superimposed_img = jet_heatmap * 0.4 + o_img
superimposed_img = tf.keras.preprocessing.image.array_to_img(superimposed_img)

In [None]:
# Plot both images
fig = plt.figure(figsize = (12, 8))

# Create subplot with the grad cam image
ax1 = fig.add_subplot(1, 2, 1)
ax1 = ax1.imshow(superimposed_img)

# Create subplot with the origin image
ax2 = fig.add_subplot(1, 2, 2)
ax2.imshow(origin_img)

# Add a colorbar
cmap = mpl.cm.jet
norm = mpl.colors.Normalize(vmin=np.min(heatmap_array), vmax=np.max(heatmap_array))
fig.colorbar(mpl.cm.ScalarMappable(norm=norm, cmap=cmap), orientation='horizontal', label='Output layer activation')

# Submission

In [None]:
test_dataset = get_test_dataset(ordered=True)
num_test_images = count_data_items(TEST_FILENAMES)

test_dataset_images = test_dataset.map(lambda image, image_name: image)
test_dataset_image_name = test_dataset.map(lambda image, image_name: image_name).unbatch()
test_ids = next(iter(test_dataset_image_name.batch(num_test_images))).numpy().astype('U')

In [None]:
predictions = model.predict(test_dataset_images, verbose=1, steps=math.ceil(len(test_ids) / BATCH_SIZE))

In [None]:
pred_df = pd.DataFrame({'image_name': test_ids, 'target': np.concatenate(predictions)})
pred_df.head()

In [None]:
pd.Series(np.round(pred_df['target'].values)).value_counts()

In [None]:
pred_df.to_csv('submission.csv', index=False)