### ***CNN FISH CLASSIFIER V_2***
* The dataset used is [A Large Scale Fish Dataset](https://www.kaggle.com/datasets/crowww/a-large-scale-fish-dataset),uploaded by OÄŸuzhan Ulucan on Kaggle



### IMPORTS

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# preventing unnecessary warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# TensorFlow and tf.keras
import tensorflow as tf

from pathlib import Path

#import useful module for keras library
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from keras.preprocessing.image import ImageDataGenerator

# get modules from sklearn library
from sklearn.preprocessing import StandardScaler 
from sklearn.model_selection import train_test_split 
from sklearn.metrics import classification_report 

#import libraries
import matplotlib.pyplot as plt
import seaborn as sns
import random

## LOADING DATASET

In [None]:
file = Path("../input/a-large-scale-fish-dataset/Fish_Dataset/Fish_Dataset") #dataset location path
File_Path = list(file.glob(r"**/*.png"))
Labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1],File_Path))

File_Path = pd.Series(File_Path).astype(str)
Labels = pd.Series(Labels)
df = pd.concat([File_Path,Labels],axis=1)
df.columns = ['image', 'label']
# Drop all the images that ends with (GT)

df = df[df["label"].apply(lambda x: x[-2:] != "GT")].reset_index(drop=True)

df.head() #get first 5 rows of the dataset

### DATA VISUALIZATION

In [None]:
# Display 12 picture of the dataset with their labels
fig, axes = plt.subplots(nrows=3, ncols=4, figsize=(14, 8),
                        subplot_kw={'xticks': [], 'yticks': []})

for i, ax ,in enumerate(axes.flat):
    ax.imshow(plt.imread(df.image[i]))
    ax.set_title(df.label[i])
    
plt.tight_layout()
plt.show()

### TRAIN TEST SPLIT

In [None]:
# split remaining data into train and test sets
Train_set, test_set = train_test_split(df, test_size = 0.3, random_state = 42)

#splitting the train set into train and evaluation set

train_set, val_set = train_test_split(Train_set, test_size= 0.2, random_state = 42)

print(train_set.shape)
print(test_set.shape)
print(val_set.shape)

In [None]:
img_gen = ImageDataGenerator(preprocessing_function = tf.keras.applications.mobilenet_v2.preprocess_input, rescale=1/255)

# img_gen cannot take in an array, so ensure the data that is been passed is a dataframe
train = img_gen.flow_from_dataframe(dataframe = train_set,
    x_col = 'image', #name of the column containing the image in the train set
    y_col ='label', #name of column containing the target in the train set
    target_size = (224, 224),
    color_mode = 'rgb',
    class_mode = 'categorical',#the class mode here and that for the model_loss(when using sequential model)
                                    #should be the same
    batch_size = 32,
    shuffle = False #not to shuffle the given data
)

test = img_gen.flow_from_dataframe(dataframe = test_set,
    x_col = 'image', #name of the column containing the image in the test set
    y_col ='label', #name of column containing the target in the test set
    target_size =(224, 224),
    color_mode ='rgb',
    class_mode ='categorical',
    batch_size = 32,
    shuffle = False # not to shuffle the given data
)


val = img_gen.flow_from_dataframe(dataframe = val_set,
    x_col = 'image', #name of the column containing the image in the validation set
    y_col ='label', #name of column containing the target in the validation set set
    target_size =(224, 224),
    color_mode ='rgb',
    class_mode ='categorical',
    batch_size = 32,
    shuffle = False #set to false so as not to shuffle the given data
)

### Building Sequential CNN model 

In [None]:
#define the input shape
input_shape = (224, 224, 3)

# define sequential model
model = tf.keras.models.Sequential()
# define conv-pool layers - set 1
model.add(tf.keras.layers.Conv2D(filters = 32, kernel_size=(3, 3), strides=(1, 1), 
                                activation='relu', padding='valid', input_shape = input_shape))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))


# add flatten layer
model.add(tf.keras.layers.Flatten())

# add dense layers with some dropout
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dropout(rate = 0.3))
model.add(tf.keras.layers.Dense(64, activation='relu'))

# add output layer
model.add(tf.keras.layers.Dense(9, activation='softmax')) #use softmax as activation in the output layer
#as it is multiclass. Sigmoid activation is used for binary and 'relu' shouldnt be use for output layer


# view model layers
model.summary()

In [None]:
# Compiling Model
model.compile(optimizer='adam', # optimize the model with adam optimizer
              loss="categorical_crossentropy", 
              metrics=['accuracy']) #to get accuracy of the model in each run


In [None]:
history = model.fit(train, #fit the model on the training set
                    validation_data = val, #add the validation set to evaluate the performance in each run
                    epochs = 15, 
                    verbose = 1)

In [None]:
acc = history.history['accuracy'] # get history report of the model

val_acc = history.history['val_accuracy'] # get history of the validation set

loss = history.history['loss'] #get the history of the lossses recorded on the train set
val_loss = history.history['val_loss'] #get the history of the lossses recorded on the validation set

plt.figure(figsize=(8, 8)) # set figure size for the plot generated
plt.subplot(2, 1, 1) # a sup plot with 2 rows and 1 column

plt.plot(acc, label='Training Accuracy') #plot accuracy curve for each train run
plt.plot(val_acc, label='Validation Accuracy') #plot accuracy curve for each validation run

plt.legend(loc='lower right')
plt.ylabel('Accuracy') #label name for y axis
plt.ylim([min(plt.ylim()),1]) #set limit for y axis
plt.title('Training and Validation Accuracy') #set title for the plot

In [None]:
plt.figure(figsize=(8, 8)) # set figure size for the plot generated
plt.subplot(2, 1, 1) # a sup plot with 2 rows and 1 column

plt.plot(loss, label='Training Loss') #plot loss curve for each train run
plt.plot(val_loss, label='Validation Loss') #plot loss curve for each validation run

plt.legend(loc='lower right')
plt.ylabel('Loss') #label name for y axis
plt.ylim([min(plt.ylim()),1]) #set limit for y axis
plt.title('Training and Validation Loss') #set title for the plot

In [None]:
# Predict the label of the test_images
pred = model.predict(test)
pred = np.argmax(pred,axis = 1) # pick the class with highest probability
# sequential model predicts by given probability for each of the classes
#np.argmax is called on the prediction to choose the class with the highest probability

# Map the label
labels = (train.class_indices)
labels = dict((v,k) for k,v in labels.items())
pred2 = [labels[k] for k in pred]

In [None]:
from sklearn.metrics import classification_report, confusion_matrix # import metrics for evaluation

y_test = test_set.label # set y_test to the expected output

print(classification_report(y_test, pred2)) # print the classification report

In [None]:
# Display 15 picture of the dataset with their labels
fig, axes = plt.subplots(nrows=3, ncols=5, figsize=(15, 10),
                        subplot_kw={'xticks': [], 'yticks': []})

color = "blue" if pred2[i] == test_set.label.iloc[i] else "red"
for i, ax ,in enumerate(axes.flat):
    ax.imshow(plt.imread(test_set.image.iloc[i]))
    ax.set_title(f"True: {test_set.label.iloc[i]}\nPredicted: {pred2[i]}",color=color)
    
plt.subplots_adjust(hspace = 0.3)
plt.suptitle("Model predictions (blue: correct, red: incorrect)",y=0.98)
plt.tight_layout()
plt.show()