In [None]:
# Imports
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
from tensorflow import keras
from tensorflow.keras import layers, models, optimizers
from sklearn.model_selection import train_test_split

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Input Pipeline

In [None]:
fish_dir = 'Fish_Dataset' 

path = []
label = []


for dir_name, _, filenames in os.walk(fish_dir):
    for filename in filenames:
        if filename.endswith('.png') and 'GT' not in dir_name:
            
            folder_name = dir_name.split(os.sep)[-1]
            
            label.append(folder_name)
            path.append(os.path.join(dir_name, filename))


data = pd.DataFrame({'path': path, 'label': label})

In [None]:
data.info()

In [None]:
data.head()

In [None]:
data['label'].unique() # Subclasses of the categorical variable "label"

In [None]:
data['label'].value_counts(ascending=True)

In [None]:
idx = 0
plt.figure(figsize=(15,12))
for unique_label in data['label'].unique():
    plt.subplot(3, 3, idx+1)
    plt.imshow(plt.imread(data[data['label']==unique_label].iloc[0,0]))
    plt.title(unique_label)
    plt.axis('off')
    idx+=1

Divide the dataset into training, validating and testing data

In [None]:
#Train Test Split 
x_train, x_test = train_test_split(data, test_size=0.3, shuffle=True, random_state=30)
x_train, x_val = train_test_split(x_train, test_size=0.2, random_state=30)

In [None]:
print("Shape of training data", x_train.shape)
print("Shape of test data", x_test.shape)
print("Shape of validation data", x_val.shape)

In [None]:
IMG_HEIGHT = 224
IMG_WIDTH = 224
BATCH_SIZE = 32

In [None]:
# Define the labels in sorted order
labels = ['Black Sea Sprat', 'Gilt-Head Bream', 'Hourse Mackerel',
          'Red Mullet', 'Red Sea Bream', 'Sea Bass', 'Shrimp',
          'Striped Red Mullet', 'Trout']
unique_labels = sorted(set(labels))

In [None]:
image_data_generator = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    zoom_range=0.2,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest',
    brightness_range=(0.8, 1.2)
)

train = image_data_generator.flow_from_dataframe(dataframe=x_train, x_col='path', y_col='label', target_size=(IMG_HEIGHT, IMG_WIDTH), color_mode='rgb', class_mode='categorical', batch_size=BATCH_SIZE, shuffle = True, class_names=unique_labels)
test = image_data_generator.flow_from_dataframe(dataframe=x_test, x_col='path', y_col='label', target_size=(IMG_HEIGHT, IMG_WIDTH), color_mode='rgb', class_mode='categorical', batch_size=BATCH_SIZE, shuffle = False, class_names=unique_labels)
val = image_data_generator.flow_from_dataframe(dataframe=x_val, x_col='path', y_col='label', target_size=(IMG_HEIGHT, IMG_WIDTH), color_mode='rgb', class_mode='categorical', batch_size=BATCH_SIZE, shuffle = False, class_names=unique_labels)

# Image classification using Multi Layer Perceptron (MLP)

Create the model

In [None]:
mlp_model = tf.keras.models.Sequential()

# Input layer
mlp_model.add(tf.keras.layers.Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3)))

# Flatten layer
mlp_model.add(tf.keras.layers.Flatten())

# 3 Hidden Layers with (256, 256, 128) neurons and relu activation function
mlp_model.add(tf.keras.layers.Dense(256, activation='relu'))
# Dropout layer to reduce overfitting
mlp_model.add(tf.keras.layers.Dropout(0.4))
mlp_model.add(tf.keras.layers.Dense(256, activation='relu'))
mlp_model.add(tf.keras.layers.Dense(128, activation='relu'))

# Output layer with 9 neurons and softmax activation function
mlp_model.add(tf.keras.layers.Dense(9, activation='softmax'))


In [None]:
mlp_model.summary()

Train the model

In [None]:
mlp_model.compile(loss='categorical_crossentropy',
                 optimizer='rmsprop',
                 metrics=['acc'])

mlp_model.fit(train,validation_data=val,epochs=5)

Testing result

In [None]:
results = mlp_model.evaluate(test)
print("Test Loss: {:.5f}".format(results[0]))
print("Test Accuracy: {:.2f}%".format(results[1] * 100))

In [None]:
pred = mlp_model.predict(test)
pred=np.argmax(pred,axis=1)

In [None]:
# Step 2: Get true labels from the test dataset
y_true = []
for _, labels in test:  # Iterate over the test dataset
    y_true.extend(np.argmax(labels.numpy(), axis=1))  # Convert one-hot to class indices
y_true = np.array(y_true)

In [None]:
cm = confusion_matrix(y_true, pred)

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

class_names = sorted(set(x_test['label'].values))  # Replace with actual class names if available
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
disp.plot(cmap=plt.cm.Blues)

# Rotate the x-axis labels for better readability
plt.xticks(rotation=45, ha='right')  # Rotate 45 degrees and align to the right
plt.title("Confusion Matrix")
plt.tight_layout()  # Adjust layout to prevent clipping
plt.show()

In [None]:
from sklearn.metrics import classification_report

pred_df = pd.DataFrame({
    'label': y_true,
    'pred': pred
})

print("Classification Report:")
print(classification_report(pred_df['label'], pred_df['pred'], target_names=class_names, zero_division=0))

We will loss the spatial features of the image when we flattening the image to 2D vector, we will loss a lot of information and the network does not relate the pixel values to each other when it is trying to find patterns thats why we get a very bad accuracy when we use MLP in such problem.

Why??

1. Loss of information

* When we Flatten the image to be a 1D vector, the pixel values that present the fish will be distributed in a certain way in the vector lets say in the left side of the image, if we have a new image that has the same object but in different location in the image, the neural network will not recognize it because different neurons need to fires in order to recognize the fish, the neural network will have no idea that this is the same fish. But why it does better than that on the MNIST data set, because MNIST data are well prepared for this task. The MLP will not learn the fish shape.

2. Very large number of parameters

* Another problem with the MLP is that it is an Fully connected layer, where every node in the layer is connected to all nodes of the previous layer and all nodes in the next layer. You saw that with this simple network we have more that 24 million parameters to learn, with more complex network and larger image size we will end up with billions of parameters to train and it is very computationally expensive.

Next we will use Convolutional neural networks to train the classifier

# Image classification using Convolutional Neural Networks (CNN)

Building the model

In [None]:
# Building the model 
cnn_model = tf.keras.models.Sequential()

#----------------------------------------------------------------------------------------------

# Conv layer: 32 filters of size (3, 3), with strides = 1 and relu activation
cnn_model.add(tf.keras.layers.Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3)))
cnn_model.add(tf.keras.layers.Conv2D(32, kernel_size=(3, 3), strides=1, activation='relu'))
cnn_model.add(tf.keras.layers.BatchNormalization())
cnn_model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2)))

#----------------------------------------------------------------------------------------------

# Conv layer: 64 filters of size (3, 3), with strides = 1 and relu activation
cnn_model.add(tf.keras.layers.Conv2D(64, kernel_size=(3, 3), strides=1, 
                                    activation='relu'))
cnn_model.add(tf.keras.layers.BatchNormalization())
cnn_model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2)))

#----------------------------------------------------------------------------------------------

# Conv layer: 128 filters of size (3, 3), with strides = 1 and relu activation
cnn_model.add(tf.keras.layers.Conv2D(128, kernel_size=(3, 3), strides=1, 
                                    activation='relu'))
cnn_model.add(tf.keras.layers.BatchNormalization())
cnn_model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2)))

#----------------------------------------------------------------------------------------------

# Conv layer: 128 filters of size (3, 3), with strides = 1 and relu activation
cnn_model.add(tf.keras.layers.Conv2D(128, kernel_size=(3, 3), strides=1, 
                                    activation='relu'))
cnn_model.add(tf.keras.layers.BatchNormalization())
cnn_model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2)))

#----------------------------------------------------------------------------------------------

# Conv layer: 256 filters of size (3, 3), with strides = 1 and relu activation
cnn_model.add(tf.keras.layers.Conv2D(256, kernel_size=(3, 3), strides=1, 
                                    activation='relu'))
cnn_model.add(tf.keras.layers.BatchNormalization())
cnn_model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2)))

#----------------------------------------------------------------------------------------------
# Global Average Pooling
cnn_model.add(tf.keras.layers.GlobalAveragePooling2D())

# Fully connected layer with 256 units and relu activation
cnn_model.add(tf.keras.layers.Dense(256, activation='relu'))

# Dropout layer to lower the overfitting with dropout rate of 0.4
cnn_model.add(tf.keras.layers.Dropout(0.4))

# Fully connected layer with 9 units and softmax activation
cnn_model.add(tf.keras.layers.Dense(9, activation='softmax'))

In [None]:
cnn_model.summary()

In [None]:
cnn_model.compile(loss='categorical_crossentropy',
                 optimizer='rmsprop',
                 metrics=['acc'])

In [None]:
history = cnn_model.fit(train, validation_data=val, epochs=5)

In [None]:
train_acc = history.history['acc']
val_acc = history.history['val_acc']

plt.plot(history.epoch, train_acc, label='Training Accuracy')
plt.plot(history.epoch, val_acc, label='Validation Accuracy')
plt.grid(True)
plt.legend()

In [None]:
results = cnn_model.evaluate(test)
print("Test Loss: {:.5f}".format(results[0]))
print("Test Accuracy: {:.2f}%".format(results[1] * 100))

In [None]:
pred = cnn_model.predict(test)
pred = np.argmax(pred, axis=1)

In [None]:
# Get true labels from the test dataset
y_true = []
for _, labels in test:  # Iterate over the test dataset
    y_true.extend(np.argmax(labels.numpy(), axis=1))  # Convert one-hot to class indices
y_true = np.array(y_true)

In [None]:
cm = confusion_matrix(y_true, pred)

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

class_names = sorted(set(x_test['label'].values))  # Replace with actual class names if available
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
disp.plot(cmap=plt.cm.Blues)

# Rotate the x-axis labels for better readability
plt.xticks(rotation=45, ha='right')  # Rotate 45 degrees and align to the right
plt.title("Confusion Matrix")
plt.tight_layout()  # Adjust layout to prevent clipping
plt.show()

In [None]:
from sklearn.metrics import classification_report

pred_df = pd.DataFrame({
    'label': y_true,
    'pred': pred
})

print("Classification Report:")
print(classification_report(pred_df['label'], pred_df['pred'], target_names=class_names))