This project was done in collaboration with Lars Stegemüller.
Paper can be found [here](https://www.sciencedirect.com/science/article/pii/S0960852424016808).
Code and data can be found [here](https://github.com/stegemlar/microalgae-image/tree/main).

In [None]:
import h5py 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import random
import os
import glob
import plotly
import plotly.express as ex
import plotly.graph_objs as go
import matplotlib.pyplot as plt
%matplotlib inline

import tensorflow as tf
from tensorflow import keras
from keras.preprocessing.image import ImageDataGenerator,load_img
from keras.applications.resnet50 import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input


from tensorflow.keras.optimizers import Adam, SGD
from keras.models import Model,load_model
from tensorflow.keras.layers import Dense, Dropout,Conv2D,Flatten,MaxPooling2D
from keras import backend as K
from tensorflow.keras.layers.experimental import preprocessing
import visualkeras
from keras.utils import plot_model
from keras_tuner.tuners import GridSearch
from keras_tuner import HyperModel
from keras_tuner.tuners import Hyperband

from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
import seaborn as sns

In [2]:
## Images are of different sizes fixing the size to 64 x 64
im_size = 64
BATCH_SIZE = 32
NUM_CLASSES = 5 

In [3]:
# Define Training and Validation Data Generator with Augmentations
gen = ImageDataGenerator(
    rescale=1/255.,
    horizontal_flip=True,
    vertical_flip=True,
    rotation_range=0.4,
    zoom_range=0.4
)

In [None]:
# Load data here

filepath_train = #Define file path for train set here
filepath_test = #Define file path for train set here
filepath_val = #Define file path for train set here

Train_gen = gen.flow_from_directory(
    filepath_train,
    target_size=(im_size, im_size),
    color_mode='grayscale',  
    batch_size=BATCH_SIZE,
    class_mode='categorical'  
)
Test_gen = gen.flow_from_directory(
    filepath_test,
    target_size=(im_size, im_size),
    color_mode='grayscale',  
    batch_size=BATCH_SIZE,
    class_mode='categorical'  
)

Val_gen = gen.flow_from_directory(
    filepath_val,
    target_size=(im_size, im_size),
    color_mode='grayscale',  
    batch_size=1,
    shuffle = False,
    class_mode='categorical'  
)

In [None]:
Train_gen.class_indices

## Basic Model (Updated with optimised parameters)

In [54]:
INPUT_SHAPE = (64, 64, 1)
NUM_CLASSES = 5

model = keras.Sequential()
model.add(
    Conv2D(
        filters=32,
        kernel_size=3,
        activation='relu',
        input_shape=INPUT_SHAPE
    )
)
model.add(Conv2D(32, 3, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(rate=0.1))
model.add(Conv2D(32, 3, activation='relu'))
model.add(Conv2D(64, 3, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(rate=0.4))
model.add(Flatten())
model.add(Dense(units=1024, activation='relu'))
model.add(Dropout(rate=0.45))
model.add(Dense(NUM_CLASSES, activation='softmax'))

In [55]:
# Compile model 
#Learning rate adjusted from optimisation
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001113421743399749),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Only use for optimisation
### disable with esc +r / enable with esc+y

# Continue here after model was optimised
### Check that optimal parameters were added

In [None]:
## Training
hist = model.fit(
    Train_gen,
    epochs=200,
    validation_data=Test_gen
)


In [61]:
# Save the model,
model.save("best_model.keras")

In [None]:
# Print Train accuracy
history_dict = hist.history

print(history_dict.keys())

train_acc = history_dict['accuracy']

final_train_accuracy = train_acc[-1]
print(f"Final Training Accuracy: {final_train_accuracy:.4f}")


In [None]:
# Get true labels
true_labels = Val_gen.classes

#Load model
model = load_model()  #Adjust file location


In [6]:
# Validation
Val_gen.reset() 
predictions = model.predict(Val_gen, steps=len(Val_gen))

predicted_classes = np.argmax(predictions, axis=1)



In [None]:
# Visualization of validation (Confusion matrix)
cm = confusion_matrix(true_labels, predicted_classes)

cm_percentage = cm / np.sum(cm, axis=1)[:, np.newaxis]  



fig, ax = plt.subplots(figsize=(10, 8))  
sns.heatmap(cm, annot=False, fmt="d", cmap="viridis", 
            xticklabels=sorted(Val_gen.class_indices, key=Val_gen.class_indices.get), 
            yticklabels=sorted(Val_gen.class_indices, key=Val_gen.class_indices.get), ax=ax)


for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        color = "black" if i == j else "#FCE625"  
        ax.text(j + 0.5, i + 0.5, format(cm_percentage[i, j] * 100, '.1f'),
                ha="center", va="center", color=color, fontsize=14)


ax.tick_params(axis='both', which='major', labelsize=14)

plt.ylabel('True Label',fontsize=14)
plt.xlabel('Predicted Label',fontsize=14)
plt.show()

# Save the plot to a file
ax.figure.savefig('CNN_confusion.png', bbox_inches='tight')

In [None]:
# Print Validation accuracy
val_accuracy = accuracy_score(true_labels, predicted_classes)
print(f"Validation Accuracy: {val_accuracy:.4f}")

In [None]:
# Display model structure

plot_model(model, to_file='model_structure.png', show_shapes=True, show_layer_names=True)

from IPython.display import Image
Image(filename='model_structure.png')

## Explainability (SHAP)

!pip install shap

In [None]:
import json

import numpy as np
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input

import shap


# load pre-trained model and data
#model = ResNet50(weights="imagenet")
model = load_model(#add model direction here) 
X, y = next(Test_gen)
print(X.shape)
print(y.shape)
y_cat = np.argmax(y, axis=1)
print(y_cat)
print(y_cat.shape)
class_names = [keys for (keys, values) in Test_gen.class_indices.items()]

In [None]:
# python function to get model output; replace this function with your own model function.
def f(x):
    predictions = model.predict(x, steps=len(x))
    # print(predictions)
    #predicted_classes = np.argmax(predictions, axis=1)
    #K.constant(predicted_classes)
    predicted_classes = tf.convert_to_tensor(predictions, dtype=tf.float32)
    return predicted_classes

# define a masker that is used to mask out partitions of the input image.
masker = shap.maskers.Image("inpaint_telea", X[0].shape)
masker_blur = shap.maskers.Image("blur(128,128)", X[0].shape)


# create an explainer with model and image masker
explainer = shap.Explainer(f, masker_blur, output_names=class_names)

# here we explain two images using 500 evaluations of the underlying model to estimate the SHAP values
shap_values = explainer(
    X[:], max_evals=5000, batch_size=50, outputs=shap.Explanation.argsort.flip[:])

In [None]:
# output with shap values
shap.image_plot(shap_values)

print(y_cat[:])
print(Train_gen.class_indices)

In [None]:
X_batch, y_batch = Test_gen[0]
y_cat_batch = np.argmax(y_batch, axis=1)

shap_values_batch = explainer(
    X_batch[:], max_evals=5000, batch_size=50, outputs=shap.Explanation.argsort.flip[:])

shap.image_plot(shap_values_batch)
print(y_cat_batch[:])
print(Train_gen.class_indices)