In [None]:
root_path = '../input/isic-2019'

In [None]:
import os
import numpy as np
import pandas as pd

IMAGE_DIR = os.path.join(root_path, 'ISIC_2019_Training_Input/ISIC_2019_Training_Input')
panda_path = os.path.join(root_path, 'ISIC_2019_Training_GroundTruth.csv')

In [None]:
print(len(os.listdir(IMAGE_DIR)))

In [None]:
print(f'This is the image dir: {IMAGE_DIR}')
print(f'This is the csv filepath: {panda_path}')

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.applications.vgg16 import preprocess_input, VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout, GlobalAveragePooling2D, BatchNormalization, Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential

In [None]:
def preprocess(df):
    for index, img in enumerate(df.image):
        img = img+'.jpg'
        df.image[index]=img
    df.drop(['UNK'], axis=1, inplace=True)
    return df

In [None]:
def preprocess_delete(df):
    inds_NV = []
    for index, img in enumerate(df.image):
        img = img+'.jpg'
        df.image[index]=img
        if df.iloc[index][2]==1.0:
            inds_NV.append(index)
    df.drop(['UNK'], axis=1, inplace=True)
    df.drop(inds_NV[:8500], axis=0, inplace=True)
    df = df.reset_index().drop(['index'], axis=1)
    return df

In [None]:
def train_val_test_split(df, test_len=1000, val_ratio=0.2):
  test_rows = (np.random.rand(1000)*df.shape[0]).astype(int)
  test_df =  df.iloc[test_rows]
  test_df = test_df.reset_index().drop(['index'], axis=1)
  df.drop(test_rows, axis=0, inplace=True)
  df = df.reset_index().drop(['index'], axis=1)
  val_rows = (np.random.rand(int(val_ratio*df.shape[0]))*df.shape[0]).astype(int)
  val_df = df.iloc[val_rows]
  df.drop(val_rows, axis=0, inplace=True)
  test_df = test_df.reset_index().drop(['index'], axis=1)
  df = df.reset_index().drop(['index'], axis=1)
  return df, val_df, test_df


In [None]:
full_df = pd.read_csv(panda_path)
full_df = preprocess(full_df)
train_df, val_df, test_df = train_val_test_split(full_df)
labels=list(train_df.columns[1:])
print(labels)
train_df.head()

In [None]:
for string in labels:
    print(f'{string}: {sum(train_df[string])}')


In [None]:
def basic_vgg(input_shape=(224, 224, 3), num_classes=8):
  new_input = Input(shape=input_shape)
  model = VGG16(weights=None, input_tensor=new_input, classes=num_classes)
  model.compile(optimizer=Adam(0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
  return model

In [None]:
from keras import backend as K
def focal_loss(gamma=2., alpha=4.):

    gamma = float(gamma)
    alpha = float(alpha)

    def focal_loss_fixed(y_true, y_pred):
        """Focal loss for multi-classification
        FL(p_t)=-alpha(1-p_t)^{gamma}ln(p_t)
        Notice: y_pred is probability after softmax
        gradient is d(Fl)/d(p_t) not d(Fl)/d(x) as described in paper
        d(Fl)/d(p_t) * [p_t(1-p_t)] = d(Fl)/d(x)
        Focal Loss for Dense Object Detection
        https://arxiv.org/abs/1708.02002

        Arguments:
            y_true {tensor} -- ground truth labels, shape of [batch_size, num_cls]
            y_pred {tensor} -- model's output, shape of [batch_size, num_cls]

        Keyword Arguments:
            gamma {float} -- (default: {2.0})
            alpha {float} -- (default: {4.0})

        Returns:
            [tensor] -- loss.
        """
        epsilon = 1.e-9
        y_true = tf.convert_to_tensor(y_true, tf.float32)
        y_pred = tf.convert_to_tensor(y_pred, tf.float32)

        model_out = tf.add(y_pred, epsilon)
        ce = tf.multiply(y_true, -K.log(model_out))
        weight = tf.multiply(y_true, tf.pow(tf.subtract(1., model_out), gamma))
        fl = tf.multiply(alpha, tf.multiply(weight, ce))
        reduced_fl = tf.reduce_max(fl, axis=1)
        return tf.reduce_mean(reduced_fl)
    return focal_loss_fixed

In [None]:
base_model = basic_vgg()
base_model.summary()

In [None]:
#sequential API
def vgg_model(input_shape=(224, 224, 3), loss_func='binary_crossentropy'):
  model = Sequential()
  model.add(VGG16(include_top=False, weights='imagenet', input_shape=input_shape))
  
  model.add(GlobalAveragePooling2D())
  #model.add(Flatten())

  model.add(Dense(512, activation='relu'))
  model.add(Dropout(0.25))

  model.add(Dense(1024))
  model.add(BatchNormalization())
  model.add(Activation('relu')) 
  model.add(Dropout(0.5))

  model.add(Dense(8, activation='sigmoid'))
  model.compile(optimizer=Adam(learning_rate=1e-4), loss=loss_func, metrics=['accuracy'])
  print('Model has compiled')
  return model

In [None]:
vgg16_model = vgg_model(input_shape=(224, 224, 3), loss_func=focal_loss())

In [None]:
vgg16_model.summary()

In [None]:
def get_train_gen(df, img_path=IMAGE_DIR, target_size=(224, 224)):
  data_gen = ImageDataGenerator(preprocessing_function=preprocess_input,
                                horizontal_flip=True,
                                width_shift_range=0.2,
                                height_shift_range=0.2)
  return data_gen.flow_from_dataframe(dataframe=df, directory=img_path, 
                                      x_col='image', y_col=list(df.columns)[1:],
                                      batch_size=64, shuffle=True, class_mode='raw', 
                                      target_size=target_size)

def get_val_test_gen(val_df, test_df, img_path=IMAGE_DIR, target_size=(224, 224)):
  data_gen = ImageDataGenerator(preprocessing_function=preprocess_input)
  val = data_gen.flow_from_dataframe(dataframe=val_df, directory=img_path, 
                                      x_col='image', y_col=list(val_df.columns)[1:],
                                      batch_size=64, shuffle=True, class_mode='raw', 
                                      target_size=target_size)
  test = data_gen.flow_from_dataframe(dataframe=test_df, directory=img_path, 
                                      x_col='image', batch_size=1, shuffle=True, class_mode=None, 
                                      target_size=target_size)
  return val, test

In [None]:
train_generator = get_train_gen(train_df)
valid_generator, test_generator = get_val_test_gen(val_df, test_df)

In [None]:
#callbacks
from tensorflow.keras.callbacks import EarlyStopping

callbacks = [(EarlyStopping('val_loss', patience=8))]


In [None]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
history = vgg16_model.fit(train_generator, steps_per_epoch=STEP_SIZE_TRAIN, validation_data=valid_generator,
                        validation_steps=STEP_SIZE_VALID, epochs=30)

In [None]:
#copied from Coursera util package
from keras.preprocessing import image
from sklearn.metrics import roc_auc_score, roc_curve
from tensorflow.compat.v1.logging import INFO, set_verbosity
import cv2

def get_roc_curve(labels, predicted_vals, generator):
    auc_roc_vals = []
    for i in range(len(labels)):
        try:
            gt = generator.labels[:, i]
            pred = predicted_vals[:, i]
            auc_roc = roc_auc_score(gt, pred)
            auc_roc_vals.append(auc_roc)
            fpr_rf, tpr_rf, _ = roc_curve(gt, pred)
            plt.figure(1, figsize=(10, 10))
            plt.plot([0, 1], [0, 1], 'k--')
            plt.plot(fpr_rf, tpr_rf,
                     label=labels[i] + " (" + str(round(auc_roc, 3)) + ")")
            plt.xlabel('False positive rate')
            plt.ylabel('True positive rate')
            plt.title('ROC curve')
            plt.legend(loc='best')
        except:
            print(
                f"Error in generating ROC curve for {labels[i]}. "
                f"Dataset lacks enough examples."
            )
    plt.show()
    return auc_roc_vals

In [None]:
preds = vgg16_model.predict_generator(valid_generator)

In [None]:
import matplotlib.pyplot as plt
auc_rocs = get_roc_curve(labels, preds, valid_generator)


In [None]:
def tp_fp_tn_fn(y_true, y_pred, class_num):
#     truth = []
#     for arr in y_true:
#         if np.argmax(arr) == class_num:
#             truth.append(True)
#         else:
#             truth.append(False)
#     preds = []
#     for arr in y_pred:
#         if np.argmax(arr) == class_num:
#             preds.append(True)
#         else:
#             preds.append(False)
    truth = [np.argmax(arr)==class_num for arr in y_true]
    preds = [np.argmax(arr)==class_num for arr in y_pred]
    tp=0
    fp=0
    tn=0
    fn=0
    for val_t, val_p in zip(truth, preds):
        if val_p and val_t:
            tp += 1
        elif val_p==True and val_t==False:
            fp += 1
        elif val_p==False and val_t==False:
            tn += 1
        else:
            fn += 1
    return tp, fp, tn, fn

In [None]:
for num in range(len(labels)):
    tp, fp, tn, fn = tp_fp_tn_fn(valid_generator.labels, preds, num)
    print(f'{labels[num]}: True Positives = {tp}, False Positive = {fp}, True Negatives = {tn}, False Negatives = {fn}')