# Vision Transformer (ViT)

11 classes of white blood cells:
- neutrophils (segmented) - SNE
- eosinophils - EO
- basophils - BA
- lymphocytes - LY
- monocytes - MO
- immature granulocytes (metamyelocytes, myelocytes, promyelocytes) and band - IG or separated - MMY, MY, PMY, BNE
- platelets - PLATELET
- erythroblasts - ERB

inspired by this [notebook](https://www.kaggle.com/raufmomin/vision-transformer-vit-fine-tuning).



The approach is Transfer Learning: we use the Vit_b16 model trained on ImageNet and fine tuned of ImageNet2012. We then proceed to the fine tuning of the last layers of 5 transformers blocks and the head of the model. Attention layer visualisation is used in particular to analyse the model predictions. 


In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.cm as cm

import sys, os
from pathlib import Path

import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Model
from tensorflow import keras
import pickle


from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, TerminateOnNaN, EarlyStopping

from tensorflow.keras.utils import Sequence
from collections import Counter
from sklearn.utils.class_weight import compute_class_weight

from vit_keras import vit, utils
from vit_keras import visualize

from keras.models import load_model
from sklearn.metrics import classification_report, confusion_matrix

sys.path.insert(0, str(Path.cwd().parent.parent))
import leukopy_lib as leuko
from importlib import reload
reload(leuko)

<module 'leukopy_lib' from '/home/marie-anne/code/Leukopy/notebooks/leukopy_lib.py'>

# Helper functions

In [8]:
def find_layer(model: Model, layer_name: str) -> tuple:
    '''
    find the last choosen layer.
    '''
    for (i, layer) in enumerate(model.layers):
        if layer.name == layer_name:
            return (i, layer)
    
def model_surgery(model: Model) -> Model:
    '''
    Flatten the nested model to a Functional model.
    '''
    submodel_index, submodel = find_layer(model, 'vit-b16')
    x = submodel.outputs[0]
    for layer_index in range(submodel_index+1, len(model.layers)):
        extracted_layer = model.layers[layer_index]
        print('Surgically appending layer : '+str(extracted_layer))
        x = extracted_layer(x)
    new_model = Model(inputs=submodel.inputs, outputs=[x])
    return new_model

In [10]:
def plot_hist(hist):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))

    ax1.plot(hist['accuracy'], label='training')
    ax1.plot(hist['val_accuracy'], label='testing')
    ax1.set_xlabel('Epochs')
    ax1.set_ylabel('Accuracy')
    ax1.legend()

    ax2.plot(hist['loss'], label='training')
    ax2.plot(hist['val_loss'], label='testing')
    ax2.set_xlabel('Epochs')
    ax2.set_ylabel('Loss')
    ax2.legend()
    plt.close()

    return fig

In [11]:
def plot_confusion_matrix(df, label_map):
    plt.figure(figsize=(10,8))
    labels = list(label_map.keys())
    g = sns.heatmap(confusion_matrix(df_pred['true_label'], df_pred['pred_label'], 
                                 normalize='true'),
                xticklabels=labels, yticklabels=labels,
                annot=True, cmap='GnBu', square=True, )
    g.set_yticklabels(g.get_yticklabels(), rotation=0) 
    plt.title('confusion matrix')
    plt.ylabel('True labels')
    plt.xlabel('Predicted label')
    plt.show()


In [12]:
def plot_attention(img_path: str, model: Model):
        
    attention_map = visualize.attention_map(model = model, image = image)

    # Plot results
    fig, (ax1, ax2) = plt.subplots(ncols = 2)
    ax1.axis('off')
    ax2.axis('off')
    ax1.set_title('Original')
    ax2.set_title('Attention Map')
    _ = ax1.imshow(image.astype('uint8'))
    _ = ax2.imshow(attention_map)
    
    return fig

In [13]:
def get_img_array(img_path: str, dim: tuple) -> np.ndarray:
    img = tf.keras.preprocessing.image.load_img(Path(img_path), target_size = dim)
    array = tf.keras.preprocessing.image.img_to_array(img)
    array = np.expand_dims(img, axis = 0)
    return array, img

def make_gradcam_heatmap(img_array: np.ndarray, model, layer_name: str):
    
    for layer in reversed(model.layers):
        if layer_name in layer.name:
            last_conv_layer = model.get_layer(layer.name)
            break

    grad_model = tf.keras.models.Model(
        [model.inputs], 
        [last_conv_layer.output,  model.output])

    with tf.GradientTape() as tape:
        last_conv_layer_output, preds = grad_model(img_array)
        pred_index = tf.argmax(preds[0])
        class_channel = preds[:, pred_index]

    grads = tape.gradient(class_channel, last_conv_layer_output)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1))

    last_conv_layer_output = last_conv_layer_output#[0]
    heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)[1:]
    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()

def gradcam(img_path: str, img_size: int, model: Model, layer_name: str) -> 'image':
    
    arr, img = get_img_array(img_path, (img_size, img_size))
    img = tf.keras.preprocessing.image.img_to_array(img)

    heatmap = make_gradcam_heatmap(arr, model, layer_name)
    heatmap = np.uint8(255 * heatmap)
    heatmap = np.reshape(heatmap, (14,14))
    jet = cm.get_cmap("jet")
    jet_colors = jet(np.arange(256))[..., :-1]
    jet_heatmap = jet_colors[heatmap]


    jet_heatmap = keras.preprocessing.image.array_to_img(jet_heatmap)
    jet_heatmap = jet_heatmap.resize((img.shape[1], img.shape[0]))
    jet_heatmap = tf.keras.preprocessing.image.img_to_array(jet_heatmap)

    superimposed_img = jet_heatmap * 0.8 + img
    superimposed_img = keras.preprocessing.image.array_to_img(superimposed_img)
    
    return superimposed_img

In [14]:
from datetime import datetime

t = datetime.now().strftime("%d%m%Y_%H")

#saving folder
save = input('To save enter: yes')

if save == 'yes':
    OUTPUT_PATH = Path(f'results/{t}')
    OUTPUT_PATH.mkdir(parents=True, exist_ok=True)

To save enter: yes yes


# Set up

## Generate dataframes

In [15]:
INPUT_PATH = Path('../input/main-dataset/main_dataset/')

df_train = leuko.generate_images_df(INPUT_PATH/'training_set')
df_test = leuko.generate_images_df(INPUT_PATH/'testing_set')
df_valid = leuko.generate_images_df(INPUT_PATH/'validation_set')

df_train.head()

Unnamed: 0,img_path,cell_type,label,label_2,label_3
0,../input/main-dataset/main_dataset/training_se...,training_set,BA,BA,BA
1,../input/main-dataset/main_dataset/training_se...,training_set,BNE,IG,BNE
2,../input/main-dataset/main_dataset/training_se...,training_set,MO,MO,MO
3,../input/main-dataset/main_dataset/training_se...,training_set,MY,IG,IG
4,../input/main-dataset/main_dataset/training_se...,training_set,BA,BA,BA


## Set constants
For the ViT model the size of the image needs to be a multiple of the patch size. 
Here images are patched into [16x16](https://arxiv.org/abs/2010.11929) images ("images is worth 16x16 words")

In [16]:
BATCH_SIZE = 32
IMG_SIZE  = 352 #need to be a multiple of patch size = 32

EPOCHS = 100
LR = 1e-3

N_CLASSES = 11

## choose classes

In [17]:
N_CLASSES, df_train, df_test, df_valid = leuko.choose_classes(df_train, df_test, df_valid, n_classes = N_CLASSES)

In [18]:
N_CLASSES

11

In [19]:
#sanity check
df_train["label"].value_counts()

EO     2349
PLT    1739
SNE    1263
BNE    1212
ERB    1162
MO     1068
LY      944
BA      903
MY      833
MMY     734
PMY     460
Name: label, dtype: int64

## Image generator
We import images from a dataframe using column names for feature and target. Shuffle is set to false for the testing set.

In [20]:
train_generator = ImageDataGenerator(rotation_range = 360,
                                     horizontal_flip = True, 
                                     vertical_flip = True,
                                     shear_range = 0.1)
valid_generator = ImageDataGenerator()
test_generator = ImageDataGenerator()

# Resize pictures, batchs from dataframe
training_set = train_generator.flow_from_dataframe(df_train, 
                                                   directory = None, # uses x_col
                                                   x_col = 'img_path', 
                                                   y_col = 'label',
                                                   target_size = (IMG_SIZE, IMG_SIZE), 
                                                   color_mode = 'rgb',
                                                   classes = None,   # uses y_col
                                                   class_mode = 'categorical', 
                                                   batch_size = BATCH_SIZE,
                                                   shuffle = True)

validation_set = valid_generator.flow_from_dataframe(df_valid, 
                                                     directory = None, # uses x_col
                                                     x_col = 'img_path', 
                                                     y_col = 'label',
                                                     target_size = (IMG_SIZE, IMG_SIZE), 
                                                     color_mode = 'rgb',
                                                     classes = None,   # uses y_col
                                                     class_mode = 'categorical', 
                                                     batch_size = BATCH_SIZE, 
                                                     shuffle = True)

testing_set = test_generator.flow_from_dataframe(df_test, 
                                                 directory = None, # uses x_col
                                                 x_col = 'img_path', 
                                                 y_col = 'label',
                                                 target_size = (IMG_SIZE, IMG_SIZE),
                                                 color_mode = 'rgb',
                                                 classes = None,   # uses y_col
                                                 class_mode = 'categorical', 
                                                 batch_size = BATCH_SIZE, 
                                                 shuffle = False)

# Saving the dictionnary of labels
label_map = training_set.class_indices
print('\n Label map\n ', label_map)

Found 12667 validated image filenames belonging to 11 classes.
Found 1730 validated image filenames belonging to 11 classes.
Found 2544 validated image filenames belonging to 11 classes.

 Label map
  {'BA': 0, 'BNE': 1, 'EO': 2, 'ERB': 3, 'LY': 4, 'MMY': 5, 'MO': 6, 'MY': 7, 'PLT': 8, 'PMY': 9, 'SNE': 10}


## Model import
We remove the top layer (dense) for fine tuning and adding batch norm to avoid overfitting.

In [21]:
vit_model = vit.vit_b32(
     weights='imagenet21k+imagenet2012',
        image_size = IMG_SIZE,
        activation = 'softmax',
        pretrained = True,
        include_top = False,
        pretrained_top = False, #fine tune
        classes = N_CLASSES)


2021-10-31 15:23:10.455220: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-10-31 15:23:10.554041: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-10-31 15:23:10.554737: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-10-31 15:23:10.556034: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

Downloading data from https://github.com/faustomorales/vit-keras/releases/download/dl/ViT-B_32_imagenet21k+imagenet2012.npz




In [22]:
vit_model.summary()

Model: "vit-b32"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 352, 352, 3)]     0         
_________________________________________________________________
embedding (Conv2D)           (None, 11, 11, 768)       2360064   
_________________________________________________________________
reshape (Reshape)            (None, 121, 768)          0         
_________________________________________________________________
class_token (ClassToken)     (None, 122, 768)          768       
_________________________________________________________________
Transformer/posembed_input ( (None, 122, 768)          93696     
_________________________________________________________________
Transformer/encoderblock_0 ( ((None, 122, 768), (None, 7087872   
_________________________________________________________________
Transformer/encoderblock_1 ( ((None, 122, 768), (None, 7087

### adding batch normalisation and dense layers to the model

In [23]:
temp_model = tf.keras.Sequential([
    vit_model,
    layers.BatchNormalization(),
    layers.Dense(256, activation = tfa.activations.gelu, name = 'dense_1'),
    layers.BatchNormalization(),
    layers.Dense(11, 'softmax')
    ])

### Flattening the model

In [24]:
model = model_surgery(temp_model)
model.summary()

Surgically appending layer : <keras.layers.normalization.batch_normalization.BatchNormalization object at 0x7f0a8014a290>
Surgically appending layer : <keras.layers.core.Dense object at 0x7f0a8014a690>
Surgically appending layer : <keras.layers.normalization.batch_normalization.BatchNormalization object at 0x7f0a8014a410>
Surgically appending layer : <keras.layers.core.Dense object at 0x7f0a90055910>
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 352, 352, 3)]     0         
_________________________________________________________________
embedding (Conv2D)           (None, 11, 11, 768)       2360064   
_________________________________________________________________
reshape (Reshape)            (None, 121, 768)          0         
_________________________________________________________________
class_token (ClassToken)     (None, 122, 768)        

### Setting trainability of layers 

In [25]:
for layer in model.layers[:-4]:
    layer.trainable = False
    
for layer in model.layers:
    print(layer, layer.trainable)
model.summary()

<keras.engine.input_layer.InputLayer object at 0x7f0a93e51e90> False
<keras.layers.convolutional.Conv2D object at 0x7f0a93d4f910> False
<keras.layers.core.Reshape object at 0x7f0a9480bed0> False
<vit_keras.layers.ClassToken object at 0x7f0a949f4dd0> False
<vit_keras.layers.AddPositionEmbs object at 0x7f0a949f4510> False
<vit_keras.layers.TransformerBlock object at 0x7f0a93e26f90> False
<vit_keras.layers.TransformerBlock object at 0x7f0a94e59790> False
<vit_keras.layers.TransformerBlock object at 0x7f0a93c2fa10> False
<vit_keras.layers.TransformerBlock object at 0x7f0a93ce1b90> False
<vit_keras.layers.TransformerBlock object at 0x7f0a94ad0590> False
<vit_keras.layers.TransformerBlock object at 0x7f0a94c55d90> False
<vit_keras.layers.TransformerBlock object at 0x7f0a93c9e6d0> False
<vit_keras.layers.TransformerBlock object at 0x7f0a94c551d0> False
<vit_keras.layers.TransformerBlock object at 0x7f0a93755450> False
<vit_keras.layers.TransformerBlock object at 0x7f0a937c4050> False
<vit_ker

In [26]:
f1_metric = tfa.metrics.F1Score(num_classes=11, average='macro', name='f1')
precision = tf.keras.metrics.Precision(name='precision')
recall = tf.keras.metrics.Recall(name='recall')

optimizer = tfa.optimizers.RectifiedAdam(learning_rate = LR)

model.compile(optimizer = optimizer, 
              loss = tf.keras.losses.CategoricalCrossentropy(label_smoothing = 0.05),
              metrics = ['accuracy', f1_metric, precision, recall])

### Adding callbacks

In [27]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor = 'val_accuracy',
                                                 factor = 0.2,
                                                 patience = 2,
                                                 verbose = 1,
                                                 min_delta = 1e-4,
                                                 min_lr = 1e-7,
                                                 mode = 'max')

earlystopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_accuracy',
                                                 min_delta = 1e-4,
                                                 patience = 7,
                                                 mode = 'max',
                                                 restore_best_weights = True,
                                                 verbose = 1)

checkpointer = tf.keras.callbacks.ModelCheckpoint(filepath = OUTPUT_PATH/'model.hdf5',
                                                  monitor = 'val_accuracy', 
                                                  verbose = 1, 
                                                  save_best_only = True,
                                                  save_weights_only = True,
                                                  mode = 'max')

callbacks_list = [earlystopping, reduce_lr, checkpointer]

## Compute weights 
To compensate for the class imbalance

In [28]:
class_weights = leuko.compute_weights(training_set, method = 3)
class_weights

{0: 7.0138427464008855,
 1: 5.225660066006601,
 6: 5.930243445692884,
 7: 7.603241296518607,
 2: 2.696253724989357,
 8: 3.642035652673951,
 10: 5.0146476642913695,
 3: 5.450516351118761,
 4: 6.709216101694915,
 9: 13.768478260869566,
 5: 8.628746594005449}

# Training

In [29]:
training_history = model.fit(x = training_set, 
                             steps_per_epoch = training_set.n/training_set.batch_size,
                             validation_steps = validation_set.n/validation_set.batch_size,

                             epochs = EPOCHS,
                             callbacks = callbacks_list,
                             validation_data = validation_set, 
                             class_weight = class_weights
                            )

model.save(OUTPUT_PATH)

2021-10-31 15:23:26.339489: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/100


2021-10-31 15:23:40.235380: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005



Epoch 00001: val_accuracy improved from -inf to 0.42832, saving model to results/31102021_15/model.hdf5
Epoch 2/100

Epoch 00002: val_accuracy improved from 0.42832 to 0.48150, saving model to results/31102021_15/model.hdf5
Epoch 3/100

Epoch 00003: val_accuracy did not improve from 0.48150
Epoch 4/100

Epoch 00004: val_accuracy improved from 0.48150 to 0.50347, saving model to results/31102021_15/model.hdf5
Epoch 5/100

Epoch 00005: val_accuracy improved from 0.50347 to 0.50751, saving model to results/31102021_15/model.hdf5
Epoch 6/100

Epoch 00006: val_accuracy improved from 0.50751 to 0.54798, saving model to results/31102021_15/model.hdf5
Epoch 7/100

Epoch 00007: val_accuracy did not improve from 0.54798
Epoch 8/100

Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.

Epoch 00008: val_accuracy did not improve from 0.54798
Epoch 9/100

Epoch 00009: val_accuracy did not improve from 0.54798
Epoch 10/100

Epoch 00010: ReduceLROnPlateau reducing learnin

2021-10-31 16:54:10.602548: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


# Saving
## History

In [30]:
with open(OUTPUT_PATH/'training_hist_b16', 'wb') as f:
    pickle.dump(training_history.history, f)

## Testing pred and eval

In [31]:
model.evaluate(testing_set)




[1.5219610929489136,
 0.5585691928863525,
 0.48439711332321167,
 0.8713043332099915,
 0.19693395495414734]

In [32]:
#make predictions on the test set
predictions = model.predict(testing_set)
y_pred = tf.argmax(predictions, axis = 1)

In [33]:
#create df
test_label_map = {v:k for k, v in testing_set.class_indices.items()}
df_pred = pd.DataFrame({'true':testing_set.classes,
                        'true_label':df_test['label'],
                        'pred':y_pred,
                        'img_path':df_test['img_path']})
df_pred['pred_label'] = df_pred['pred'].map(test_label_map)
df_pred.to_pickle(OUTPUT_PATH/'pred_b16.pk')
df_pred

Unnamed: 0,true,true_label,pred,img_path,pred_label
0,4,LY,4,../input/main-dataset/main_dataset/testing_set...,LY
1,10,SNE,4,../input/main-dataset/main_dataset/testing_set...,LY
2,0,BA,5,../input/main-dataset/main_dataset/testing_set...,MMY
3,0,BA,4,../input/main-dataset/main_dataset/testing_set...,LY
4,3,ERB,8,../input/main-dataset/main_dataset/testing_set...,PLT
...,...,...,...,...,...
2539,1,BNE,6,../input/main-dataset/main_dataset/testing_set...,MO
2540,10,SNE,9,../input/main-dataset/main_dataset/testing_set...,PMY
2541,3,ERB,3,../input/main-dataset/main_dataset/testing_set...,ERB
2542,4,LY,6,../input/main-dataset/main_dataset/testing_set...,MO
