In [1]:
#Importing Libraries
import numpy as np
import pandas as pd
from pathlib import Path
import os.path

from sklearn.model_selection import train_test_split
import tensorflow as tf

In [2]:
#converting this to path object allows us to search inside the directory
image_dir = Path('../input/a-large-scale-fish-dataset/Fish_Dataset/Fish_Dataset')

In [3]:
# Get filepaths and labels
filepaths = list(image_dir.glob(r'**/*.png'))
labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], filepaths))

filepaths = pd.Series(filepaths, name='Filepath').astype(str)
labels = pd.Series(labels, name='Label')

# Concatenate filepaths and labels
image_df = pd.concat([filepaths, labels], axis=1)

# Drop GT images
image_df['Label'] = image_df['Label'].apply(lambda x: np.NaN if x[-2:] == 'GT' else x)
image_df = image_df.dropna(axis=0)

#NOT gonna sample as I want higher accuracy

In [4]:
train_df, test_df = train_test_split(image_df, train_size=0.7, shuffle=True, random_state=1)

## Loading the Images

In [5]:
#Using ImageDataGenerator from keras
#we don't have to load all the files at the same time
#Using generator we only load in one batch at a time, train on the batch and then recycle the memory

#We are gonna use a pre trained model MobileNet
#Because of its extensive training, ImageNet has become extremely good at extracting information from any image
train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input, #we have to make sure that we use the preprocessing function that comes along with mobilenet
    validation_split=0.2
)

test_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input
)

#We are not loading in the images but rather just specifying how the imahes should be loaded

In [6]:
train_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(224, 224), #MobileNet was trained on (224,224)
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='training'
)

val_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(224, 224), #MobileNet was trained on (224,224)
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='validation'
)

test_images = test_generator.flow_from_dataframe(
    dataframe=test_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(224, 224), #MobileNet was trained on (224,224)
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=False
)

Found 5040 validated image filenames belonging to 9 classes.
Found 1260 validated image filenames belonging to 9 classes.
Found 2700 validated image filenames belonging to 9 classes.


In [7]:
pretrained_model = tf.keras.applications.MobileNetV2(
    input_shape=(224,224,3), #this was the shape it was originally trained on (we will keep it that way)
    include_top=False, #this means to include original output layer for ImageNet dataset (we are keeping it false as we want to make our own predictions)
    weights='imagenet',
    pooling='avg' #this will ensure that the output is a 1D vector
)
pretrained_model.trainable = False #we are not gonna train the exisiting weights and rather use them as is

2022-03-21 12:56:58.974573: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-21 12:56:59.063413: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-21 12:56:59.064107: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-21 12:56:59.065365: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5


In [8]:
pretrained_model.summary()

Model: "mobilenetv2_1.00_224"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 112, 112, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 112, 112, 32) 128         Conv1[0][0]                      
__________________________________________________________________________________________________
Conv1_relu (ReLU)               (None, 112, 112, 32) 0           bn_Conv1[0][0]                   
_______________________________________________________________________________

In [9]:
train_images.next()[1] #each label is encoded as vector

array([[0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0.

In [10]:
from keras.layers import Dense
from keras.callbacks import ModelCheckpoint,EarlyStopping,ReduceLROnPlateau

In [11]:
inputs = pretrained_model.input

#passing the output of pretrained model to Dense layer to perform classification
x = Dense(128, activation='relu')(pretrained_model.output)
#passing output of previous dense layer to next one
x = Dense(128, activation='relu')(x)
#Finally doing classification
outputs = Dense(9, activation='softmax')(x)


model = tf.keras.Model(inputs=inputs, outputs=outputs)


model.compile(
    optimizer='adam',
    loss='categorical_crossentropy', #as the labels are vectors and not integers
    metrics=['accuracy']
)

#create callback
filepath = 'best_fish_model.h5'

# filepath = 'my_best_model.hdf5'
checkpoint = ModelCheckpoint(filepath=filepath, 
                            monitor='val_loss',
                            verbose=1, 
                            save_best_only=True,
                            mode='min')
earlyStopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0, restore_best_weights=True)
reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=7, verbose=1, min_delta=1e-4, mode='min')
callbacks = [checkpoint,earlyStopping,reduce_lr_loss]


history = model.fit(
    train_images,
    validation_data=val_images,
    epochs=50,
    callbacks=callbacks
)

2022-03-21 12:57:03.918686: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/50


2022-03-21 12:57:07.796980: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005



Epoch 00001: val_loss improved from inf to 0.02530, saving model to best_fish_model.h5




Epoch 2/50

Epoch 00002: val_loss improved from 0.02530 to 0.01211, saving model to best_fish_model.h5
Epoch 3/50

Epoch 00003: val_loss did not improve from 0.01211
Epoch 4/50

Epoch 00004: val_loss improved from 0.01211 to 0.01162, saving model to best_fish_model.h5
Epoch 5/50

Epoch 00005: val_loss improved from 0.01162 to 0.00536, saving model to best_fish_model.h5
Epoch 6/50

Epoch 00006: val_loss improved from 0.00536 to 0.00455, saving model to best_fish_model.h5
Epoch 7/50

Epoch 00007: val_loss improved from 0.00455 to 0.00408, saving model to best_fish_model.h5
Epoch 8/50

Epoch 00008: val_loss did not improve from 0.00408
Epoch 9/50

Epoch 00009: val_loss did not improve from 0.00408
Epoch 10/50

Epoch 00010: val_loss improved from 0.00408 to 0.00373, saving model to best_fish_model.h5
Epoch 11/50

Epoch 00011: val_loss did not improve from 0.00373
Epoch 12/50

Epoch 00012: val_loss did not improve from 0.00373
Epoch 13/50

Epoch 00013: val_loss did not improve from 0.00373


In [12]:
results = model.evaluate(test_images, verbose=0)

print("    Test Loss: {:.5f}".format(results[0]))
print("Test Accuracy: {:.2f}%".format(results[1] * 100))

    Test Loss: 0.00190
Test Accuracy: 99.96%
