In [None]:
%load_ext nb_black
import os
from pathlib import Path
from tensorflow.keras.applications import (
    MobileNetV2,
    NASNetLarge,
    NASNetMobile,
    InceptionV3,
)

In [None]:
# !pip install efficientnet

In [None]:
# from efficientnet.tfkeras import EfficientNetB0, EfficientNetB4, EfficientNetB2

In [None]:
# dataset_name = "santander-customer-satisfaction"
# IMAGE_SIZE = 640
# dataset_name = "springleaf-marketing-response"
# IMAGE_SIZE = 1408
# dataset_name = "segment"
# IMAGE_SIZE = 160
# dataset_name = "rl"
# IMAGE_SIZE = 160
# dataset_name = "open-payments"
# IMAGE_SIZE = 96
# dataset_name = "bank-marketing"
# IMAGE_SIZE = 160
# dataset_name = "springleaf-marketing-response"
# IMAGE_SIZE = 1408
# dataset_name = "bnp-cardif"
# IMAGE_SIZE = 384
# dataset_name = "albert"
# IMAGE_SIZE = 288
dataset_name = "open-payments"
IMAGE_SIZE = 96

DATASET_FOLDER = Path(os.getcwd()) / f"data/{dataset_name}"

BATCH_SIZE = 32
PREFETCH = 2


ONE_CHANNEL = False

# Target size for model to use
TARGET_SIZE = 224
PRETRAINED_MODEL = (
    NASNetMobile  # EfficientNetB2  # EfficientNetB0  # MobileNetV2  # NASNetMobile
)
FROM_LAYER_RETRAIN = 0  # 119
OUTPUT_LAYER = (
    None  # -4  # Only for efficient Net, since include tops does not work None
)

epochs_1 = 30
epochs_2 = 30
patience = 2

# Model name => image size, last_block retrain
# NASNetMobile => 224,
# NASNetLarge => 331,
# MobileNetV2 => 96, ... 160..... 224, last layer => 128 (3 blocks), 137(2 blocks), 146(1 block)
# InceptionV3 => 299,
# Xception => 299,

## Import + utilities

In [None]:
import gc
import json
import tensorflow as tf

from tensorflow.keras.callbacks import EarlyStopping, Callback
from tensorflow_addons.optimizers import RectifiedAdam, Lookahead
from tensorflow_addons.activations import mish
from concurrent.futures import ProcessPoolExecutor as PoolExecutor

import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score, accuracy_score

np.random.seed(0)

import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow

%matplotlib inline

In [None]:
classnames = None
with (DATASET_FOLDER / "prep_data" / str(IMAGE_SIZE) / "classnames.json").open() as fp:
    classnames = np.array(json.load(fp))
OUTPUT_DIM = len(classnames)
LOSS = "binary_crossentropy" if OUTPUT_DIM == 2 else "categorical_crossentropy"
METRIC = "AUC" if OUTPUT_DIM == 2 else "accuracy"

In [None]:
def plot_metric(history, metric):
    # Plot training & validation loss values
    plt.plot(history.history[metric])
    plt.plot(history.history[f"val_{metric}"])
    plt.title(f"Model {metric}")
    plt.ylabel(f"{metric}")
    plt.xlabel("Epoch")
    plt.legend(["Train", "Test"], loc="upper left")
    plt.show()

## Preparing datasets

In [None]:
# https://www.tensorflow.org/tutorials/load_data/images

In [None]:
file_list = None
with (DATASET_FOLDER / "prep_data" / str(IMAGE_SIZE) / "file_list.json").open() as fp:
    file_list = json.load(fp)
file_list["train"][:10]

In [None]:
def build_process_path(classnames):
    def process_path(file_path):
        label = tf.strings.split(file_path, os.path.sep)[-2] == classnames
        # load the raw data from the file as a string
        img = tf.io.read_file(file_path)
        img = tf.io.decode_compressed(img, "GZIP")
        # convert the compressed string to a 3D uint8 tensor
        img = tf.image.decode_jpeg(img, channels=3)
        return img, label
    return process_path

In [None]:
classnames

In [None]:
process_path = build_process_path(classnames)

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

def build_dataset(file_list, process_path, *, repeat, batch_size, prefetch):
    dataset = file_list
    if repeat:
        dataset = file_list.repeat()
        
    dataset = file_list.map(process_path, num_parallel_calls=AUTOTUNE)
    if batch_size is not None:
        dataset = dataset.batch(batch_size)
    
    if prefetch is not None:
        dataset = dataset.prefetch(prefetch)
    
    return dataset

In [None]:
dataset_train = build_dataset(file_list["train"], process_path,
    repeat=True, 
    batch_size=BATCH_SIZE,
                           prefetch=PREFETCH
                          )
dataset_valid = build_dataset(file_list["valid"], process_path,
    repeat=True, 
    batch_size=BATCH_SIZE,
                           prefetch=PREFETCH
                          )
dataset_test = build_dataset(file_list["test"], process_path,
    repeat=False, 
    batch_size=BATCH_SIZE,
                           prefetch=None
                          )

In [None]:
for image, label in dataset_train.take(1):
    print("Image shape: ", image.numpy().shape)
    print("Label: ", label.numpy().shape)

### Images Example

In [None]:
def show_image(image, image_size, one_channel=False):
    if one_channel:
        imshow(image.reshape(IMAGE_SIZE, IMAGE_SIZE))
    else:
        imshow(image)

In [None]:
for image, label in dataset_train.take(1):
    print("Label: ", label.numpy())
    show_image(image.numpy(), IMAGE_SIZE, ONE_CHANNEL)

In [None]:
for image, label in dataset_valid.take(1):
    print("Label: ", label.numpy())
    show_image(image.numpy(), IMAGE_SIZE, ONE_CHANNEL)

## Training model (transfer learning)

In [None]:
steps_per_epoch = np.ceil(len(file_list["train"]) / BATCH_SIZE)
steps_per_epoch_val = np.ceil(len(file_list["valid"]) / BATCH_SIZE)

### Creating model, using existing one

In [None]:
activation = mish
optimizer = Lookahead(RectifiedAdam(), sync_period=6, slow_step_size=0.5)
# compile the model (should be done *after* setting layers to non-trainable)
# optimizer='rmsprop'

In [None]:
nb_channel = 1 if ONE_CHANNEL else 3
# create the base pre-trained model
POOLING = int(np.ceil(IMAGE_SIZE / TARGET_SIZE))
PADDING = np.floor(TARGET_SIZE - np.floor(IMAGE_SIZE / POOLING))
PADDING_ASYM = int(PADDING % 2)

PADDING = int(np.floor(PADDING / 2))

inputs = Input(shape=(IMAGE_SIZE, IMAGE_SIZE, nb_channel))
input_layer = inputs
inputs = AveragePooling2D(pool_size=(POOLING, POOLING))(inputs)
inputs = ZeroPadding2D(
    padding=((PADDING, PADDING + PADDING_ASYM), (PADDING, PADDING + PADDING_ASYM),)
)(inputs)

base_model = PRETRAINED_MODEL(
    input_tensor=inputs,
    input_shape=(TARGET_SIZE, TARGET_SIZE, nb_channel),
    weights="imagenet",
    include_top=False,


# add a global spatial average pooling layer
x = (
    base_model.output
    if OUTPUT_LAYER is None or OUTPUT_LAYER >= -1
    else base_model.layers[-4].output
)
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation=activation, kernel_initializer="he_normal")(x)
x = Dense(512, activation=activation, kernel_initializer="he_normal")(x)
x = Dense(128, activation=activation, kernel_initializer="he_normal")(x)

# x = Dropout(0.3)(x)
# x = Dense(512, activation=activation, kernel_initializer="he_normal")(x)
# x = Dropout(0.3)(x)
# x = Dense(256, activation=activation, kernel_initializer="he_normal")(x)
# x = Dropout(0.3)(x)

# and a logistic layer -- let's say we have 200 classes
predictions = Dense(OUTPUT_DIM, activation="softmax")(x)

# this is the model we will train
model = Model(inputs=input_layer, outputs=predictions)

# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False

# es.set_model(model)
# compile the model (should be done *after* setting layers to non-trainable)
es = EarlyStopping(
    monitor="val_loss",
    verbose=1,
    mode="min",
    patience=patience,
    restore_best_weights=True,
)
# We need to recompile the model for these modifications to take effect
es.set_model(model)
model.compile(optimizer=optimizer, loss=LOSS)  # , metrics=[METRIC])

In [None]:
model.summary()

In [None]:
# let's visualize layer names and layer indices to see how many layers
# we should freeze:
for i, layer in enumerate(model.layers):
    print(i, layer.name)

In [None]:
gc.collect()
gc.collect()
gc.collect()
gc.collect()
gc.collect()

### Fit new layers

In [None]:
# train the model on the new data for a few epochs
history_1 = model.fit(
    dataset_train,
    callbacks=[es],
    epochs=epochs_1,
    steps_per_epoch=steps_per_epoch,
    validation_data=dataset_valid,
    validation_steps=steps_per_epoch_val,
)
# at this point, the top layers are well trained and we can start fine-tuning
# convolutional layers from inception V3. We will freeze the bottom N layers
# and train the remaining top layers.

In [None]:
plot_metric(history_1, "loss")

In [None]:
truth_test = []
for _, labels in dataset_test:
    truth_test.append(np.argmax(labels, axis=1))
truth_test = np.hstack(truth_test)
truth_test

truth_valid = []
for i, (_, labels) in enumerate(dataset_valid):
    truth_valid.append(np.argmax(labels, axis=1))
    if i >= steps_per_epoch_val - 1:
        break
truth_valid = np.hstack(truth_valid)
truth_valid.shape

In [None]:
preds_valid = model.predict(dataset_valid, steps=steps_per_epoch_val)
preds_valid.shape
if OUTPUT_DIM > 2:
    print(
        f"Accuracy valid: {accuracy_score(truth_valid, np.argmax(preds_valid, axis=1))}"
    )
if OUTPUT_DIM == 2:
    print(f"ROC AUC valid: {roc_auc_score(truth_valid, preds_valid[:, 1])}")
preds_test = model.predict(dataset_test)
preds_test.shape
if OUTPUT_DIM > 2:
    print(f"Accuracy test: {accuracy_score(truth_test, np.argmax(preds_test, axis=1))}")
if OUTPUT_DIM == 2:
    print(f"ROC AUC test: {roc_auc_score(truth_test, preds_test[:, 1])}")

In [None]:
# plot_metric(history_1, METRIC)

In [None]:
gc.collect()
gc.collect()
gc.collect()
gc.collect()
gc.collect()

### Unfreeze and fit more/all layers

In [None]:
# Let's unfreeze the whole model
for layer in model.layers:
    layer.trainable = False
for layer in model.layers[FROM_LAYER_RETRAIN:]:  # [144:]:  # [1019:]:  # [293:]:
    layer.trainable = True
# Let's build an optimizer
optimizer = Lookahead(RectifiedAdam(), sync_period=6, slow_step_size=0.5)
# optimizer=SGD(lr=0.0001, momentum=0.9)

es = EarlyStopping(
    monitor="val_loss",
    verbose=1,
    mode="min",
    patience=patience,
    restore_best_weights=True,
)
# We need to recompile the model for these modifications to take effect
es.set_model(model)
model.compile(optimizer=optimizer, loss=LOSS)  # , metrics=[METRIC])

In [None]:
# we train our model again (this time fine-tuning the top 2 inception blocks
# alongside the top Dense layers
history_2 = model.fit(
    dataset_train,
    callbacks=[es],
    epochs=epochs_2,
    steps_per_epoch=steps_per_epoch,
    validation_data=dataset_valid,
    validation_steps=steps_per_epoch_val,
)

In [None]:
plot_metric(history_2, "loss")

In [None]:
# plot_metric(history_2, METRIC)

## Evaluation

In [None]:
preds_valid = model.predict(dataset_valid, steps=steps_per_epoch_val)
preds_valid.shape
if OUTPUT_DIM > 2:
    print(
        f"Accuracy valid: {accuracy_score(truth_valid, np.argmax(preds_valid, axis=1))}"
    )
if OUTPUT_DIM == 2:
    print(f"ROC AUC valid: {roc_auc_score(truth_valid, preds_valid[:, 1])}")
preds_test = model.predict(dataset_test)
preds_test.shape
if OUTPUT_DIM > 2:
    print(f"Accuracy test: {accuracy_score(truth_test, np.argmax(preds_test, axis=1))}")
if OUTPUT_DIM == 2:
    print(f"ROC AUC test: {roc_auc_score(truth_test, preds_test[:, 1])}")

In [None]:
# EfficientNetB0

# EfficientNetB2 2h => ROC AUC valid: 0.9357130861335854 ROC AUC test: 0.9330937133279599

In [None]:
# "santander-customer-satisfaction"
# 0.8164865598696714 => target size 96, whole re train
# 0.8141824599511267 => target size 224, whole re train
# ROC AUC valid: 0.8268400760249797 => 160 => 160, whole, batch 32, 1layer 1024 -> 512 -> 128, no dropout, earlystoping eevry time

# give me some credit
# 0.8462134942186483 => target size 160, whole train (batch 128, 2layers 1024, 128)

# ROC AUC valid: 0.8348917439829162
# RL
# ROC AUC valid: 0.892118469133795 => 160, whole, batch 64, 1layer 1024
# ROC AUC valid: 0.9233534348199217 => 160 => 160, whole, batch 32, 1layer 1024 -> 512 -> 128, no dropout
# ROC AUC valid: 0.9465346534653467 => 160 => 160, whole, batch 32, 1layer 1024 -> 512 -> 128, no dropout, earlystoping eevry time

# Open payment
# ROC AUC valid: 0.9186458210299415 => 96 =>160, whole, batch 64, 1layer 1024
# ROC AUC valid: 0.8778868370932499 => 96 => 96, whole, batch 64, 1layer 1024
# ROC AUC valid: 0.8917381493730192 => 96 => 224, whole, batch 64, 1layer 1024
# ROC AUC valid: 0.9090374872044725 => 96 => 96, whole, batch 64, 1layer 1024 -> 512 -> 128
# ROC AUC valid: 0.886417393797122 => 96 => 160, whole, batch 64, 1layer 1024 -> 512 -> 128
# ROC AUC valid: 0.9045819676568436 => 96 => 96, whole, batch 64, 1layer 1024 -> 128, no dropout
# ROC AUC valid: 0.9045819676568436 => 96 => 96, whole, batch 64, 1layer 1024 -> 128, no dropout
# ROC AUC valid: 0.9406275221953189 => 96 => 96, whole, batch 32, 1layer 1024 -> 512 -> 128, no dropout
# ROC AUC valid: 0.9399110034154216 => 160 => 160, whole, batch 32, 1layer 1024 -> 512 -> 128, no dropout, earlystoping eevry time
# ROC AUC valid: 0.8811034128677376 => 96 => 96, whole, batch 128, 1layer 1024 -> 512 -> 128, no dropout, earlystoping eevry time

# Bank marketing
# ROC AUC valid: 0.7970734141661526 => 160 => 160, whole, batch 32, 1layer 1024 -> 512 -> 128, no dropout

# Albert
# ROC AUC valid: 0.7500980687987842 => 288 => 160, whole, batch 32, 1layer 1024 -> 512 -> 128, no dropout

# bnp-cardif
# ROC AUC valid: 0.7206667869818926 => 384 => 96, whole, batch 32, 1layer 1024 -> 512 -> 128, no dropout


In [None]:
# "santander-customer-satisfaction"
# 0.833417731838137 => target size 96, whole train
# 0.8170226029745679 => target size 224, whole train
# ROC AUC test: 0.8348549041045967 => 160 => 160, whole, batch 32, 1layer 1024 -> 512 -> 128, no dropout, earlystoping eevry time

# give me some credit
# 0.8453497574694486 => target size 160, whole train (batch 128, 2layers 1024, 128)
# ROC AUC test: 0.8447058873195916

# RL
# ROC AUC test: 0.9051288159651395 => 160, whole, batch 64, 1layer 1024
# ROC AUC test: 0.9128674518211912 => 160 => 160, whole, batch 32, 1layer 1024 -> 512 -> 128, no dropout
# ROC AUC test: 0.9456874816987527 => 160 => 160, whole, batch 32, 1layer 1024 -> 512 -> 128, no dropout, earlystoping eevry time
# ROC AUC test: 0.9399110034154216 => 160 => 160, whole, batch 32, 1layer 1024 -> 512 -> 128, no dropout, earlystoping eevry time

# Open payment
# ROC AUC test: 0.916907759155486 => 96 =>160, whole, batch 64, 1layer 1024
# ROC AUC test: 0.8931101859362467 => 96 => 96, whole, batch 64, 1layer 1024
# ROC AUC test: 0.8938387451368033 => 96 => 224, whole, batch 64, 1layer 1024
# ROC AUC test: 0.9067445823812874 => 96 => 96, whole, batch 64, 1layer 1024 -> 512 -> 128
# ROC AUC test: 0.8954549557710788 => 96 => 160, whole, batch 64, 1layer 1024 -> 512 -> 128
# ROC AUC test: 0.9062895529860363 => 96 => 96, whole, batch 64, 1layer 1024 -> 128, no dropout
# ROC AUC test: 0.9403517762951931 => 96 => 96, whole, batch 32, 1layer 1024 -> 512 -> 128, no dropout

# Bank marketing
# ROC AUC valid: 0.7959000291791145 => 160 => 160, whole, batch 32, 1layer 1024 -> 512 -> 128, no dropout

# Albert
# ROC AUC test: 0.7487660412524685 => 288 => 160, whole, batch 32, 1layer 1024 -> 512 -> 128, no dropout

# bnp-cardif
# ROC AUC test: 0.725934546476426 => 384 => 96, whole, batch 32, 1layer 1024 -> 512 -> 128, no dropout


## Explainability

In [None]:
# https://medium.com/google-developer-experts/interpreting-deep-learning-models-for-computer-vision-f95683e23c1d