### Loading necessary libraries

In [117]:
import os
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.image import imread
import seaborn as sns
from PIL import Image
import glob

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout
from sklearn.metrics import classification_report

from tensorflow.keras.callbacks import EarlyStopping

import joblib

### Defining files' path

In [10]:
data_dir = '../data/'
model_dir = '../model/'

dir_ = os.listdir(data_dir)

model_file = model_dir + 'CNN.joblib'

In [11]:
dir_

['valid', 'train', 'test', 'images2predict']

### Retrieving directories list

In [15]:
all_dir = dict.fromkeys(dir_)

for d in dir_ :
    all_dir[d] = glob.glob(data_dir+d+'/*')

### Retrieving data files

In [80]:
files = dict.fromkeys(dir_)

for key in dict.fromkeys(dir_) :
    files[key] = []
    
    for d in range(len(all_dir[key])) :

        if key != 'images2predict' :
            files[key].extend(glob.glob(all_dir[key][d]+'/*.jpg'))
        else :
            files[key].append(all_dir[key][d])


### Investigate dataset resolution distribution

In [84]:
for key in dict.fromkeys(dir_) :
    # Empty shape list
    shape = []

    # Empty image list
    image_un = []

    for file in files[key] :
        # Read image file
        image = plt.imread(file)

        # Add image to image list
        image_un.append(image)

        shape.append(image.shape)

    # Sum the shape
    shape_sum = tuple(map(sum, tuple(zip(*shape)))) 
    shape_median = tuple(np.percentile(dim, 50) for dim in zip(*shape))
    shape_per25 = tuple(np.percentile(dim, 25) for dim in zip(*shape))
    shape_per75 = tuple(np.percentile(dim, 75) for dim in zip(*shape))
    shape_per90 = tuple(np.percentile(dim, 90) for dim in zip(*shape))
    shape_min = tuple(map(min, tuple(zip(*shape))))
    shape_max = tuple(map(max, tuple(zip(*shape))))

    # Calcualting shape average
    shape_avg = tuple( i // len(shape) for i in shape_sum)

    # Print results
    print(f'The average dimensions for images in the {key} folder is {shape_avg}')
    print(f'The maximum dimensions for images in the {key} folder is {shape_max}')
    print(f'The mimimum dimensions for images in the {key} folder is {shape_min}')
    print(f'The median dimensions for images in the {key} folder is {shape_median}')
    print(f'The 25th percentile dimensions for images in the {key} folder is {shape_per25}')
    print(f'The 75th percentile dimensions for images in the {key} folder is {shape_per75}')
    print(f'The 90th percentile dimensions for images in the {key} folder is {shape_per90}\n\n')

The average dimensions for images in the valid folder is (224, 224, 3)
The maximum dimensions for images in the valid folder is (224, 224, 3)
The mimimum dimensions for images in the valid folder is (224, 224, 3)
The median dimensions for images in the valid folder is (np.float64(224.0), np.float64(224.0), np.float64(3.0))
The 25th percentile dimensions for images in the valid folder is (np.float64(224.0), np.float64(224.0), np.float64(3.0))
The 75th percentile dimensions for images in the valid folder is (np.float64(224.0), np.float64(224.0), np.float64(3.0))
The 90th percentile dimensions for images in the valid folder is (np.float64(224.0), np.float64(224.0), np.float64(3.0))


The average dimensions for images in the train folder is (224, 224, 3)
The maximum dimensions for images in the train folder is (224, 224, 3)
The mimimum dimensions for images in the train folder is (224, 224, 3)
The median dimensions for images in the train folder is (np.float64(224.0), np.float64(224.0), np

### Defining the image resolution

In [85]:
res = (224, 224)

### Image Augmentation and Scaling

In [86]:
img_gen = ImageDataGenerator(rescale=1./255)

### Apply Image augmentation and scaling to dataset

In [87]:
train_data = img_gen.flow_from_directory(data_dir+'train',
                                         target_size=res, 
                                         batch_size=64, 
                                         class_mode='categorical',
                                         shuffle=True)

train_data.class_indices

Found 3208 images belonging to 20 classes.


{'ABBOTTS BABBLER': 0,
 'ABBOTTS BOOBY': 1,
 'ABYSSINIAN GROUND HORNBILL': 2,
 'AFRICAN CROWNED CRANE': 3,
 'AFRICAN EMERALD CUCKOO': 4,
 'AFRICAN FIREFINCH': 5,
 'AFRICAN OYSTER CATCHER': 6,
 'AFRICAN PIED HORNBILL': 7,
 'AFRICAN PYGMY GOOSE': 8,
 'ALBATROSS': 9,
 'ALBERTS TOWHEE': 10,
 'ALEXANDRINE PARAKEET': 11,
 'ALPINE CHOUGH': 12,
 'ALTAMIRA YELLOWTHROAT': 13,
 'AMERICAN AVOCET': 14,
 'AMERICAN BITTERN': 15,
 'AMERICAN COOT': 16,
 'AMERICAN FLAMINGO': 17,
 'AMERICAN GOLDFINCH': 18,
 'AMERICAN KESTREL': 19}

In [112]:
test_data = img_gen.flow_from_directory(data_dir+'test',
                                       target_size=res, 
                                       batch_size=1, 
                                       shuffle=False,
                                       class_mode='categorical')

test_data.class_indices

Found 100 images belonging to 20 classes.


{'ABBOTTS BABBLER': 0,
 'ABBOTTS BOOBY': 1,
 'ABYSSINIAN GROUND HORNBILL': 2,
 'AFRICAN CROWNED CRANE': 3,
 'AFRICAN EMERALD CUCKOO': 4,
 'AFRICAN FIREFINCH': 5,
 'AFRICAN OYSTER CATCHER': 6,
 'AFRICAN PIED HORNBILL': 7,
 'AFRICAN PYGMY GOOSE': 8,
 'ALBATROSS': 9,
 'ALBERTS TOWHEE': 10,
 'ALEXANDRINE PARAKEET': 11,
 'ALPINE CHOUGH': 12,
 'ALTAMIRA YELLOWTHROAT': 13,
 'AMERICAN AVOCET': 14,
 'AMERICAN BITTERN': 15,
 'AMERICAN COOT': 16,
 'AMERICAN FLAMINGO': 17,
 'AMERICAN GOLDFINCH': 18,
 'AMERICAN KESTREL': 19}

In [91]:
test_data = img_gen.flow_from_directory(data_dir+'valid',
                                       target_size=res, 
                                       batch_size=1, 
                                       shuffle=False,
                                       class_mode='categorical')

test_data.class_indices

Found 100 images belonging to 20 classes.


{'ABBOTTS BABBLER': 0,
 'ABBOTTS BOOBY': 1,
 'ABYSSINIAN GROUND HORNBILL': 2,
 'AFRICAN CROWNED CRANE': 3,
 'AFRICAN EMERALD CUCKOO': 4,
 'AFRICAN FIREFINCH': 5,
 'AFRICAN OYSTER CATCHER': 6,
 'AFRICAN PIED HORNBILL': 7,
 'AFRICAN PYGMY GOOSE': 8,
 'ALBATROSS': 9,
 'ALBERTS TOWHEE': 10,
 'ALEXANDRINE PARAKEET': 11,
 'ALPINE CHOUGH': 12,
 'ALTAMIRA YELLOWTHROAT': 13,
 'AMERICAN AVOCET': 14,
 'AMERICAN BITTERN': 15,
 'AMERICAN COOT': 16,
 'AMERICAN FLAMINGO': 17,
 'AMERICAN GOLDFINCH': 18,
 'AMERICAN KESTREL': 19}

In [103]:
pred_data = img_gen.flow_from_directory(data_dir+'images2predict',
                                       target_size=res, 
                                       batch_size=1, 
                                       shuffle=False,
                                       class_mode=None)

pred_data.class_indices

Found 6 images belonging to 1 classes.


{'dummy': 0}

### Load model if it already exists

In [104]:
if os.path.exists(model_file) :
    cnn = joblib.load('../model/CNN.joblib')
    img_gen = joblib.load('../model/CNN_img_gen.joblib')
    print('Model exists')

### Define model if it does not already exist

In [119]:
if not os.path.exists(model_file) :
    print('Model does not exists')

    cnn = Sequential()

    cnn.add(Conv2D(filters=8,kernel_size=(5,5), padding='valid',activation='relu',input_shape=(res[0],res[1],3)))

    cnn.add(MaxPool2D(strides=2))

    cnn.add(Conv2D(filters=16,kernel_size=(3,3), padding='same',activation='relu'))

    cnn.add(MaxPool2D(strides=1))

    cnn.add(Flatten())
    cnn.add(Dense(8, activation='relu'))

    cnn.add(Dropout(rate= 0.1)) 

    cnn.add(Dense(units=len(train_data.class_indices), activation='softmax'))

    cnn.compile(optimizer='adam',metrics=['accuracy'],loss='categorical_crossentropy')
    

### Print model summary

In [120]:
cnn.summary()

### Train the model on the dataset

In [121]:
early_stop = EarlyStopping(monitor='val_loss', patience =1)

cnn.fit(train_data, epochs=10, validation_data=test_data, callbacks=[early_stop])

Epoch 1/10
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 1s/step - accuracy: 0.0570 - loss: 2.9940 - val_accuracy: 0.0500 - val_loss: 2.9960
Epoch 2/10
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 1s/step - accuracy: 0.0601 - loss: 2.9938 - val_accuracy: 0.0500 - val_loss: 2.9961


<keras.src.callbacks.history.History at 0x76d8fc70ddf0>

### Save model

In [118]:
joblib.dump(cnn, "../model/CNN.joblib")
joblib.dump(scaler, "../model/CNN_scaler.joblib")

NameError: name 'scaler' is not defined