In [79]:
import tensorflow as tf
from keras.layers import Dense, Conv2D, BatchNormalization, MaxPooling2D, Input, Flatten, Dropout
import numpy as np
import matplotlib.pyplot as plt
# import opendatasets as od
import os
import cv2
from sklearn.preprocessing import LabelBinarizer
import keras
from keras.models import Sequential
from keras.callbacks import EarlyStopping
import random
from keras.applications import EfficientNetB0

# Load and Process Data

Notes about data:

1) the file PARAKETT AUKLET had a few typos. In the Train and Test sets it had two spaces inbetween words, but only one in the Valid set. It should also be PARAKEET. I manually updated this in my version of the files. 

2) The valid and test datasets only have 5 birds images for each bird

In [8]:
# od.download('https://www.kaggle.com/datasets/gpiosenka/100-bird-species/data')

In [9]:
# Set file path for loading data
train_dir = 'C:/Users/bcbot/COMP_4531/Final/100-bird-species/train'
val_dir = "C:/Users/bcbot/COMP_4531/Final/100-bird-species/valid"
test_dir = "C:/Users/bcbot/COMP_4531/Final/100-bird-species/test"


In [36]:
# This code just randomly selects some number of birds (specificed by the num_birds varaible). 
# This can be adjusted to speed up training, but need to make sure the softmax layer of the model matches the num_birds

num_birds = 50

birds = [i for i in os.listdir(train_dir)]

random.seed(7284)
rand_idxes = random.sample(range(0, len(birds)), num_birds)

include = [birds[i] for i in rand_idxes]

len(include)

50

In [37]:
minB = ['', np.inf]
maxB = ['', 0]
for i in os.listdir(val_dir):
    count = 0
    sub_directory = os.path.join(val_dir, i)
    for j in sub_directory:
        count += 1
    if count < minB[1]:
        minB = [i, count]
    if count > maxB[1]:
        maxB = [i, count]

print(minB)
print(maxB)

['EMU', 57]
['NORTHERN BEARDLESS TYRANNULET', 83]


In [64]:
# Instantiate the containers for holding image and label data
train_data = []
val_data = []

# Load the first 100 files for each bird for training set
for i in include:
    count = 0
    sub_directory = os.path.join(train_dir, i)
    for j in os.listdir(sub_directory):
        count += 1
        if count > 100:
            break
        img = cv2.imread(os.path.join(sub_directory, j))
        train_data.append([img, i])

# Load first 50 files for each bird in validation set
for i in include:
    sub_directory = os.path.join(val_dir, i)
    for j in os.listdir(sub_directory):
        img = cv2.imread(os.path.join(sub_directory, j))
        val_data.append([img, i])

print(len(train_data))
print(len(val_data))

5000
250


In [65]:
# Check if all images are the same shape
# NOTE: In this particular set, all the iamges are (224, 224, 3), but when I loaded files from all 525 birds there were some that were not, 
# and those screw up the model. So we may need to reshape if we use more birds

count_t = 0
count_v = 0

for t in train_data:
    if t[0].shape != (224, 224, 3):
        count_t += 1
    
for v in val_data:
    if v[0].shape != (224, 224, 3):
        count_v += 1

print(f'There are {count_t} images of different shape in the train data.\nThere are {count_v} images of different shape in the validation data.')
    

There are 0 images of different shape in the train data.
There are 0 images of different shape in the validation data.


In [None]:
# # Reshape images of the wrong size

# for x in train_data:
#     if x[0].shape != (224, 224, 3):
#         x[0] = cv2.resize(x[0], (224, 224))

# for x in val_data:
#     if x[0].shape != (224, 224, 3):
#         x[0] = cv2.resize(x[0], (224, 224))


In [66]:
# Shuffle data

np.random.seed(7284)
np.random.shuffle(train_data)
np.random.shuffle(val_data)

In [93]:
# Preprocess training data and validation data
lb = LabelBinarizer()

X_train = []
y_train = []
for x, y in train_data:
    X_train.append(x)
    y_train.append(y)

X_train = np.array(X_train)
y_train = np.array(y_train)

y_train_vect = lb.fit_transform(y_train)

X_val = []
y_val = []
for x, y in val_data:
    X_val.append(x)
    y_val.append(y)

X_val = np.array(X_val)
y_val = np.array(y_val)

y_val_vect = lb.fit_transform(y_val)

# Load Model

I initally tried the pre-trained model, but I don't know how to change the number of outputs in the last layer, and trying to run with all 525 species was taking forever. I put some notes on that run but I think I'm just going to give up on trying to use that one. 

Notes for attempt with all 525 birds on pre-trained model:

I tried running this and it earlystopped at 11 epochs. The val_Accuracy never got better than .12 (on epoch 6) but pretty much stayed in the < .01 range. Still not sure how you're supposed to use this model.

In [None]:
# # Load pre-trained model provided by dataset authors

# model_path = 'C:/Users/bcbot/COMP_4531/Final/100-bird-species/EfficientNetB0-525-(224 X 224)- 98.97.h5'
# bmod= keras.models.load_model(model_path, custom_objects={'F1_score':'F1_score'})

# # Check the weights

# bmod.weights
# # Check performance of model on training data to evaluate provided weights

# bmod.compile(loss= 'categorical_crossentropy', optimizer= 'adam', metrics=['accuracy'])
# bmod.evaluate(X_train, y_train_vect)

# history = bmod.fit(x = X_train, y= y_train_vect, batch_size=32, validation_data= (X_val, y_val_vect), verbose = 1, epochs = 30, callbacks = [callback])

In [74]:
# Set early stopping parameters
callback = EarlyStopping(monitor= 'val_accuracy', patience= 5, start_from_epoch= 5)

# Baseline model

A note on this. The EfficientNet model includes a image processing step that expects the pixels to be in the 0-255 range, I had originally been normalizing my pixels by dividing by 255 and that was resulting in terrible results (like 2% accuracy), as soon as I removed the normalization the accuracy jumped up to 80%. 

First test - 100 Birds from 50 species

Each epoch took about 2:30 minutes with a batch size of 80. I let it run for about 25 minute (11 epochs) and I was hovering around 85% accuracy 

In [100]:
# Try an Elasticnet model for baseline

# Load Model 
enet = EfficientNetB0(include_top= False, input_shape=(224, 224, 3))

layers = enet.layers

for layer in layers[:20]:
    layer.trainable = False

# Add a flatten and softmax layer
model= Sequential([
    enet,
    Flatten(),
    Dense(num_birds, activation='softmax')
])

# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [101]:
history = model.fit(x = X_train, y= y_train_vect, batch_size=80, validation_data= (X_val, y_val_vect), verbose = 1, epochs = 30, callbacks = [callback])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30

KeyboardInterrupt: 

Notes for attempt with all 525 birds:

I manually stopped this one after it was running for about 2.5 hours with a batch size of 32. This one never got great accuracy, but was pretty consistently performing between .12 and .14 over 18 epochs. So it's already performing better than the provided model, which is just confusing me more. 

Notes for 100 birds from 50 species:

This took about 20 seconds per epoch with a batch size of 80. Early stopping kicked in at epcoh 13. The model pretty much just hung out at a val accuracy of .60ish for all the epochs. 

In [71]:
# Try a very simple model to see how it performs
model = Sequential([
    Conv2D(32, 3, input_shape = (224, 224, 3), activation= 'relu'),
    MaxPooling2D(),
    Dropout(.2),
    Flatten(),
    Dense(num_birds, activation='softmax')
])





In [72]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])




In [78]:
history2 = model.fit(x = X_train, y= y_train_vect, batch_size=80, validation_data= (X_val, y_val_vect), verbose = 1, epochs = 30, callbacks = [callback])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
