<a href="https://colab.research.google.com/github/Sicily-F/cagedbirdID/blob/main/9_Ensembling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Ensemble learning and why we should use it?


Code inspiration from here: https://medium.com/@muhmd.mustain/product-image-classification-using-ensemble-learning-e5b6d166afc2

In [None]:
import numpy as np	
import PIL.Image as Image
import matplotlib.pylab as plt
import tensorflow as tf
import tensorflow_addons as tfa
import PIL
import pathlib
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
from tensorflow.keras.applications import DenseNet121, MobileNetV2
from tensorflow.keras.applications.inception_v3 import InceptionV3  
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras import layers
from tensorflow import keras
from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Input, Average, Flatten, Dropout, BatchNormalization
from tensorflow.keras.callbacks import Callback, EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "1" #What does this do- TF default GPU easily runs out of memory? https://stackoverflow.com/questions/57483567/tensorflow-device-cuda0-not-supported-by-xla-service-while-setting-up-xla-gpu



In [None]:
TRAIN_DIR = 'all_species_cropped_balanced/train'
VAL_DIR = 'all_species_cropped_balanced/val'
TEST_DIR= 'all_species_cropped_balanced/test'


BATCH_SIZE = 16  
IMG_HEIGHT = 224
IMG_WIDTH = 224
RANDOM_SEED = 1

# We will use the default parameters of random erasing,from the paper: https://arxiv.org/pdf/1708.04896.pdf - using pixel-level randomisation

def get_random_eraser(p=0.5, s_l=0.02, s_h=0.4, r_1=0.3, r_2=1/0.3, v_l=0, v_h=255, pixel_level=True): 
    def eraser(input_img):
        if input_img.ndim == 3:
            img_h, img_w, img_c = input_img.shape
        elif input_img.ndim == 2:
            img_h, img_w = input_img.shape

        p_1 = np.random.rand()

        if p_1 > p:
            return input_img

        while True:
            s = np.random.uniform(s_l, s_h) * img_h * img_w
            r = np.random.uniform(r_1, r_2)
            w = int(np.sqrt(s / r))
            h = int(np.sqrt(s * r))
            left = np.random.randint(0, img_w)
            top = np.random.randint(0, img_h)

            if left + w <= img_w and top + h <= img_h:
                break

        if pixel_level:
            if input_img.ndim == 3:
                c = np.random.uniform(v_l, v_h, (h, w, img_c))
            if input_img.ndim == 2:
                c = np.random.uniform(v_l, v_h, (h, w))
        else:
            c = np.random.uniform(v_l, v_h)

        input_img[top:top + h, left:left + w] = c

        return input_img

    return eraser

datagen = ImageDataGenerator(
    rescale=1/255,
 preprocessing_function=get_random_eraser(v_l=0, v_h=1, pixel_level=True),
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True) 
    
train_gen = datagen.flow_from_directory(
    TRAIN_DIR, 
    target_size=(IMG_HEIGHT, IMG_WIDTH), 
    batch_size=BATCH_SIZE,
    seed=RANDOM_SEED)

testgen = ImageDataGenerator(
    rescale=1/255)    
    
val_gen = testgen.flow_from_directory(
    VAL_DIR, 
    target_size=(IMG_HEIGHT, IMG_WIDTH), 
    batch_size=BATCH_SIZE,
    seed=RANDOM_SEED)
    
test_gen = testgen.flow_from_directory(
    TEST_DIR, 
    target_size=(IMG_HEIGHT, IMG_WIDTH), 
    batch_size= BATCH_SIZE,
    shuffle=False,
    seed=RANDOM_SEED)


There are two main API's in Keras (wrapped in Tensorflow), which you can use to add layers to existing, pre-trained models.These are the Sequential and Functional API's. The functional API offers more flexibility and control over the layers than the sequential API. It can be used to predict multiple outputs(i.e output layers) with multiple inputs(i.e input layers))
Here we used the Functional API. You can read more [here](https://www.analyticsvidhya.com/blog/2021/07/understanding-sequential-vs-functional-api-in-keras/).


In [None]:
# create the base pre-trained model1
base_model1 = DenseNet121(weights='imagenet', include_top=False)
x = base_model1.output
# add a global spatial average pooling layer
x = GlobalAveragePooling2D()(x)
# add a fully-connected layer
x = Dense(1024, activation='relu')(x)
# and a prediction layer for our classes (37)
predictions = Dense(37, activation='softmax')(x)
# this is the model we will train
model1 = Model(inputs=base_model1.input, outputs=predictions)
for layer in base_model1.layers:
    layer.trainable = False
# compile the model (should be done *after* setting layers to non-trainable)

# create the base pre-trained model2, which will serve as our fine-tuned model
base_model2 = InceptionV3(weights='imagenet', include_top=False)
x = base_model2.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(37, activation='softmax')(x)
model2 = Model(inputs=base_model2.input, outputs=predictions)
# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers, inspired from here https://keras.io/api/applications/
for i, layer in enumerate(base_model2.layers):
   print(i, layer.name)
for layer in model2.layers[:249]:
   layer.trainable = False
for layer in model2.layers[249:]:
   layer.trainable = True

# create the base pre-trained model3    
base_model3 = VGG16(weights='imagenet', include_top=False)
x = base_model3.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(37, activation='softmax')(x)
model3 = Model(inputs=base_model3.input, outputs=predictions)
for layer in base_model3.layers:
    layer.trainable = False

# Create the base pre-trained model3 
base_model3 = MobileNetV2(weights='imagenet', include_top=False)
x = base_model3.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(37, activation='softmax')(x)
model3 = Model(inputs=base_model3.input, outputs=predictions)
for layer in base_model3.layers:
    layer.trainable = False

# This is the package which lets us download Google's Vision Transformer model
# The code was inspired from this example with 11 classes: https://www.kaggle.com/raufmomin/vision-transformer-vit-fine-tuning
from vit_keras import vit

IMAGE_SIZE = 224
base_model5 = vit.vit_b32(
            image_size = IMAGE_SIZE,
            activation = 'softmax',
            pretrained = True,
            include_top = False,
            pretrained_top = False,
            classes = 37)

x = base_model4.output
x = Flatten()(x)
x = BatchNormalization()(x)
x = Dense(11, activation=tfa.activations.gelu)(x)
x = BatchNormalization()(x)
predictions = Dense(37, activation='softmax')(x)
model4 = Model(inputs=base_model4.input, outputs=predictions)


In [None]:
# This code averages all the models to create a singular 'ensemble' model
models=[model1, model2, model3, model4]
model_input = Input(shape=(IMG_WIDTH, IMG_HEIGHT, 3))
model_outputs = [model(model_input) for model in models] 
ensemble_output = Average()(model_outputs) # Average the output of all models

                            
ensemble_model = Model(inputs=model_input, outputs=ensemble_output, name='ensemble')
                            
filepath = "models/ensembleaugre16.h5"  # This makes a folder in your root directory called 'models'

checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')

# Stop training if there is no improvement in model for 3 consecutives epochs.
early_stopping_monitor = EarlyStopping(patience=3) 
callbacks_list = [checkpoint, early_stopping_monitor]

opt = Adam(lr=0.0001)
ensemble_model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['acc'])


In [None]:
EPOCHS = 100

history = ensemble_model.fit(
    train_gen, # Will be faster if you don't have the augmentation,
    steps_per_epoch = train_gen.samples // BATCH_SIZE,
    validation_data = val_gen, 
    validation_steps = val_gen.samples // BATCH_SIZE,
    epochs = EPOCHS,
    shuffle = True,
    callbacks=callbacks_list)

In [None]:
test_batch_x, test_batch_y = test_gen.next()
pred_batch = ensemble_model.predict(test_batch_x)

test_labels = np.argmax(test_batch_y, axis=1)
test_pred = np.argmax(pred_batch, axis=1)

test_acc = sum(test_labels == test_pred) / len(test_labels)
print('Accuracy: %.3f' % test_acc)


Y_pred = ensemble_model.predict(test_gen, test_gen.samples // BATCH_SIZE+1)
print (Y_pred)

y_pred = np.argmax(Y_pred, axis=1)
print(y_pred )

from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix
print(precision_score(test_gen.classes, y_pred , average="macro"))
print(recall_score(test_gen.classes, y_pred , average="macro"))
print(f1_score(test_gen.classes, y_pred , average="macro"))

A full explanation of the code can be seen in file 8. with regards to model training and the evaluation metrics, so check that one out first!