Loading the dataset through Kaggle

In [2]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

!kaggle datasets download -d gpiosenka/100-bird-species

import zipfile
zip_ref = zipfile.ZipFile('/content/100-bird-species.zip', 'r')
zip_ref.extractall('/data')
zip_ref.close()

Downloading 100-bird-species.zip to /content
 99% 1.93G/1.96G [00:12<00:00, 224MB/s]
100% 1.96G/1.96G [00:12<00:00, 163MB/s]


*   setting up data directory path
*   standarizing image size to fit EffiencieNetB0 default size(original EffienceNEt model is trained on this size)
*   batch-size = 32(standard) wondering if we do bigger for faster training (?)




In [3]:
import os
import zipfile
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

data_dir = '/data'
img_size = 224
batch_size = 32


data preprocessing and augmentation
*   data augmentation: improves ability to generalize
*   flow_from_directory: generates batches of augmented data from images in a directory


In [4]:
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2
)

val_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    validation_split=0.2
)

train_generator = train_datagen.flow_from_directory(
    os.path.join(data_dir, 'train'),
    target_size=(img_size, img_size),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'
)

val_generator = val_datagen.flow_from_directory(
    os.path.join(data_dir, 'train'),
    target_size=(img_size, img_size),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'
)


Found 67911 images belonging to 525 classes.
Found 16724 images belonging to 525 classes.


Setting up transfer learning with EfficientNet architecture

In [5]:
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(img_size, img_size, 3))
base_model.trainable = False


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5


Building and compiling model with pre-trained model as base and
set up early stopping to prevent overfitting

In [6]:
x = GlobalAveragePooling2D()(base_model.output)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(train_generator.num_classes, activation='softmax')(x)


model = Model(inputs=base_model.input, outputs=output)

model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

Model training

In [7]:
epochs = 10
history = model.fit(
    train_generator,
    epochs=epochs,
    validation_data=val_generator,
    callbacks=[early_stopping]
)
model.save('/my_model.h5')
model.save('/my_model')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10


  saving_api.save_model(


MODEL EVALUATION

In [10]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [11]:
model_save_path = '/content/drive/My Drive/my_model.keras'
model.save(model_save_path)


In [12]:
from google.colab import files
files.download('/my_model.keras')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [13]:
test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

test_generator = test_datagen.flow_from_directory(
    os.path.join(data_dir, 'test'),
    target_size=(img_size, img_size),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False)

Found 2625 images belonging to 525 classes.


Model Evaluation

In [17]:
predictions = model.predict(test_generator, verbose=1)
predicted_classes = np.argmax(predictions, axis=1)
true_classes = test_generator.classes
class_labels = list(test_generator.class_indices.keys())

from sklearn.metrics import classification_report
print(classification_report(true_classes, predicted_classes, target_names=class_labels))


# from sklearn.metrics import confusion_matrix
# import seaborn as sns

# conf_matrix = confusion_matrix(true_classes, predicted_classes)
# plt.figure(figsize=(20, 20))
# sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=class_labels, yticklabels=class_labels)
# plt.title('Confusion Matrix')
# plt.ylabel('True Class')
# plt.xlabel('Predicted Class')
# plt.show()


                               precision    recall  f1-score   support

              ABBOTTS BABBLER       1.00      0.40      0.57         5
                ABBOTTS BOOBY       1.00      0.20      0.33         5
   ABYSSINIAN GROUND HORNBILL       1.00      1.00      1.00         5
        AFRICAN CROWNED CRANE       1.00      1.00      1.00         5
       AFRICAN EMERALD CUCKOO       1.00      0.80      0.89         5
            AFRICAN FIREFINCH       1.00      1.00      1.00         5
       AFRICAN OYSTER CATCHER       1.00      1.00      1.00         5
        AFRICAN PIED HORNBILL       1.00      0.60      0.75         5
          AFRICAN PYGMY GOOSE       1.00      1.00      1.00         5
                    ALBATROSS       0.71      1.00      0.83         5
               ALBERTS TOWHEE       1.00      1.00      1.00         5
         ALEXANDRINE PARAKEET       1.00      1.00      1.00         5
                ALPINE CHOUGH       1.00      1.00      1.00         5
     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
