# Training Notebook

Date: **26-02-2022**

In 'preprocess', we have prepared our data; now we will train.

[Template Notebook](https://colab.research.google.com/github/lcroffi/CNN/blob/master/CNN_InceptionResNetV2.ipynb#scrollTo=qC04FoyQn_08) and ideas from [2017 kaggle competition winners](https://arxiv.org/pdf/1806.06193.pdf)

## Approach:
-  Start with `Animalia` to test; test results, and with augmented also
-  Train with entirety of data; see what accuracy we get
-  Fine-tune with the mini of balanced dataset

In [None]:
import pandas as pd
import numpy as np
import os
import keras
import matplotlib.pyplot as plt

from keras.layers import Dense, GlobalAveragePooling2D, Dropout, Flatten
from keras.applications.inception_resnet_v2 import InceptionResNetV2, preprocess_input
from keras.applications.mobilenet_v3 import MobileNetV3Large, preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from keras.callbacks import ModelCheckpoint 
from keras.metrics import TopKCategoricalAccuracy

In [None]:
IMG_WIDTH, IMG_HEIGHT = 224, 224 # 299 for inception-resnet # 224 for mobielnet
BATCH_SIZE=32

In [None]:
IMG_TRAIN_DIR = 'data/train_mini_supercategory/Animalia/'
NUM_CLASSES = len(os.listdir(IMG_TRAIN_DIR))
print(f'There are {NUM_CLASSES} classes in {IMG_TRAIN_DIR}')

MODEL_SAVEDIR = './models'

In [None]:
# build train datagen; no validation datagen here
train_datagen = ImageDataGenerator(
    preprocessing_function = preprocess_input, # preprocess input already scales to [-1,1]
    validation_split=0.05,
    zoom_range=0.2,
    width_shift_range=0.1,
    height_shift_range=0.1,
)

# attach the datagen; we will try to build a model with birds first
train_generator = train_datagen.flow_from_directory(
    directory = IMG_TRAIN_DIR,
    target_size = (IMG_WIDTH, IMG_HEIGHT),
    color_mode='rgb',
    batch_size = BATCH_SIZE,
    class_mode = 'categorical',
    subset='training',
    shuffle=True
)

val_generator = train_datagen.flow_from_directory(
    directory = IMG_TRAIN_DIR,
    target_size = (IMG_WIDTH, IMG_HEIGHT),
    color_mode='rgb',
    batch_size = BATCH_SIZE,
    class_mode = 'categorical',
    subset='validation',
    shuffle=True
)

assert '.ipynb_checkpoints' not in val_generator.class_indices

In [None]:
# import basemodel Inception ResnetV2 with imagenet weights
base_model=MobileNetV3Large(
    weights='imagenet',
    include_top=False, 
    input_shape=(IMG_WIDTH, IMG_HEIGHT, 3)
)
print(f"Number of layers: {len(base_model.layers)}")

In [None]:
base_model.output.shape

In [None]:
# attach on top layer
x = base_model.output
# flatten and dense
x = Flatten()(x)
output = Dense(NUM_CLASSES, activation='softmax')(x) # number of classes
model = Model(inputs=base_model.input, outputs=output)

In [None]:
# freeze all base layers at first
for layer in base_model.layers:
    layer.trainable = False

In [None]:
# decay every 2 epochs (2* each step; a step is where each gradient update happens)
lr_schedule = ExponentialDecay(initial_learning_rate=0.045, decay_steps=(2*train_generator.samples//BATCH_SIZE) , decay_rate=0.94)

In [None]:
# compile model
top_k_metric = TopKCategoricalAccuracy(k=5)

model.compile(
    optimizer=RMSprop(momentum=0.9, learning_rate=lr_schedule), 
    loss='categorical_crossentropy', 
    metrics=['accuracy', top_k_metric]
)

In [None]:
print(f'Saving models in {MODEL_SAVEDIR}')
checkpoint = ModelCheckpoint(filepath=MODEL_SAVEDIR, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')

callbacks_list = [checkpoint]

In [None]:
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples//BATCH_SIZE,
    validation_data=val_generator,
    validation_steps=val_generator.samples//BATCH_SIZE,
    epochs = 20,
    callbacks=callbacks_list
    )


In [None]:
# return evaluation metrics
score = model.evaluate(val_generator)

print ("%s: %.2f%%" % (model.metrics_names[0], score[0]*100))
print ("%s: %.2f%%" % (model.metrics_names[1], score[1]*100))

In [None]:
print(score)

In [None]:
# unfreeze and train on all layers
for layer in model.layers:
    layer.trainable = True

In [None]:
# unsure about adjusting the learning rate here..; comment out for now and see how accuracy fares
# decay every 2 epochs (2* each step; a step is where each gradient update happens)
# lr_schedule = ExponentialDecay(initial_learning_rate=0.0045, decay_steps=(4*train_generator.samples//BATCH_SIZE) , decay_rate=0.94)
# model.compile(
#     optimizer=RMSprop(momentum=0.9, learning_rate=lr_schedule), 
#     loss='categorical_crossentropy', 
#     metrics=['accuracy', top_k_metric]
# )

In [None]:
# Fine-tune
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples//BATCH_SIZE,
    validation_data=val_generator,
    validation_steps=val_generator.samples//BATCH_SIZE,
    epochs = 100,
    callbacks=callbacks_list
    )


In [None]:
# return evaluation metrics
score = model.evaluate(val_generator)
print(score)

print ("%s: %.2f%%" % (model.metrics_names[0], score[0]*100))
print ("%s: %.2f%%" % (model.metrics_names[1], score[1]*100))

In [None]:
import matplotlib.pyplot as plt

# plot accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

# plot loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
# load best model
import tensorflow as tf
model = tf.keras.models.load_model('./models')

In [None]:
from keras.preprocessing import image

# load image and test
test_image = image.load_img('test_starfish.jpg', target_size = (IMG_WIDTH, IMG_HEIGHT))
test_image = image.img_to_array(test_image)
test_image = preprocess_input(test_image)
test_image = np.expand_dims(test_image, axis = 0)
result = model.predict(test_image) # logits layer of our model
    
label_map = val_generator.class_indices
reverse_label_map = {i:k for k,i in label_map.items()} # indices to value

In [None]:
result = model.predict(test_image, batch_size=1)[0]
top_5_args = np.argsort(result)[::-1][:5]
print('Top 5 most likely species:')
for arg in top_5_args:
    print(reverse_label_map[arg], f"{100*result[arg]:.2f}%")

In [None]:
def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
    # First, we create a model that maps the input image to the activations
    # of the last conv layer as well as the output predictions
    grad_model = tf.keras.models.Model(
        [model.inputs], [model.get_layer(last_conv_layer_name).output, model.output]
    )

    # Then, we compute the gradient of the top predicted class for our input image
    # with respect to the activations of the last conv layer
    with tf.GradientTape() as tape:
        last_conv_layer_output, preds = grad_model(img_array)
        if pred_index is None:
            pred_index = tf.argmax(preds[0])
        class_channel = preds[:, pred_index]

    # This is the gradient of the output neuron (top predicted or chosen)
    # with regard to the output feature map of the last conv layer
    grads = tape.gradient(class_channel, last_conv_layer_output)

    # This is a vector where each entry is the mean intensity of the gradient
    # over a specific feature map channel
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    # We multiply each channel in the feature map array
    # by "how important this channel is" with regard to the top predicted class
    # then sum all the channels to obtain the heatmap class activation
    last_conv_layer_output = last_conv_layer_output[0]
    heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)

    # For visualization purpose, we will also normalize the heatmap between 0 & 1
    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()

In [None]:
# all nan at the moment
# https://keras.io/examples/vision/grad_cam/
heatmap = make_gradcam_heatmap(test_image, model, 'Conv_1')
heatmap

In [None]:
import matplotlib.cm as cm
from IPython.display import Image, display

def save_and_display_gradcam(img_path, heatmap, cam_path="cam.jpg", alpha=0.4):
    # Load the original image
    img = keras.preprocessing.image.load_img(img_path)
    img = keras.preprocessing.image.img_to_array(img)

    # Rescale heatmap to a range 0-255
    heatmap = np.uint8(255 * heatmap)

    # Use jet colormap to colorize heatmap
    jet = cm.get_cmap("jet")

    # Use RGB values of the colormap
    jet_colors = jet(np.arange(256))[:, :3]
    jet_heatmap = jet_colors[heatmap]

    # Create an image with RGB colorized heatmap
    jet_heatmap = keras.preprocessing.image.array_to_img(jet_heatmap)
    jet_heatmap = jet_heatmap.resize((img.shape[1], img.shape[0]))
    jet_heatmap = keras.preprocessing.image.img_to_array(jet_heatmap)

    # Superimpose the heatmap on original image
    superimposed_img = jet_heatmap * alpha + img
    superimposed_img = keras.preprocessing.image.array_to_img(superimposed_img)

    # Save the superimposed image
    superimposed_img.save(cam_path)

    # Display Grad CAM
    display(Image(cam_path))


save_and_display_gradcam('test_starfish.jpg', heatmap)