In [71]:
from tensorflow import keras
from tensorflow.keras import layers
import pathlib
from tensorflow.keras.utils import image_dataset_from_directory

import os
import shutil

import numpy as np
import matplotlib.pyplot as plt

In [72]:
data_folder = pathlib.Path('G:/My Drive/University of Waterloo Files/Applied Machine Learning and Artificial Intelligence/Term 1/CSCN 8010 - Foundations of Machine Learning')
birds = os.listdir(data_folder / "train")

num_classes = np.array([len(os.listdir(data_folder / "train" / bird)) for bird in birds])
num_classes = num_classes.argsort()
indexes_of_top_three_classes = num_classes[::-1][:3]
top_three_classes = [os.listdir(data_folder / "train")[i] for i in indexes_of_top_three_classes]

print("Selected Birds and their Counts:")
print(top_three_classes[0], len(os.listdir(data_folder / "train" / top_three_classes[0])))
print(top_three_classes[1], len(os.listdir(data_folder / "train" / top_three_classes[1])))
print(top_three_classes[2], len(os.listdir(data_folder / "train" / top_three_classes[2])))

Selected Birds and their Counts:
RUFOUS TREPE 263
HOUSE FINCH 248
OVENBIRD 233


In [73]:
path = 'G:/My Drive/University of Waterloo Files/Applied Machine Learning and Artificial Intelligence/Term 1/CSCN 8010 - Foundations of Machine Learning'

for species in os.listdir(data_folder / "train"):
    if species not in top_three_classes:
        shutil.rmtree(path + "/train/" + species)

for species in os.listdir(data_folder / "validation"):
    if species not in top_three_classes:
        shutil.rmtree(path + "/validation/" + species)

for species in os.listdir(data_folder / "test"):
    if species not in top_three_classes:
        shutil.rmtree(path + "/test/" + species)

In [74]:
train_dataset = image_dataset_from_directory(
    data_folder / "train",
    image_size=(224, 224),
    batch_size=32)
validation_dataset = image_dataset_from_directory(
    data_folder / "validation",
    image_size=(224, 224),
    batch_size=32)
test_dataset = image_dataset_from_directory(
    data_folder / "test",
    image_size=(224, 224),
    batch_size=32)

Found 744 files belonging to 3 classes.
Found 15 files belonging to 3 classes.
Found 15 files belonging to 3 classes.


In [75]:
data_augmentation = keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.1),
        layers.RandomZoom(0.2),
    ]
)

In [76]:
conv_base = keras.applications.vgg16.VGG16(
    weights="imagenet",
    include_top=False,
    input_shape=(224, 224, 3))
conv_base.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

Part 1: Freezing conv_base ans training head

In [68]:
conv_base.trainable = False

inputs = keras.Input(shape=(224, 224, 3))
x = data_augmentation(inputs)
x = keras.applications.vgg16.preprocess_input(x)
x = conv_base(x)
x = layers.Flatten()(x)
x = layers.Dense(256)(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(3, activation="softmax")(x)
model = keras.Model(inputs, outputs)

model.compile(loss="sparse_categorical_crossentropy",
              optimizer="rmsprop",
              metrics=["accuracy"])
callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath="G:/My Drive/University of Waterloo Files/Applied Machine Learning and Artificial Intelligence/Term 1/CSCN 8010 - Foundations of Machine Learning/CSCN8010-lab-submissions/students_submissions/8972394",
        save_best_only=True,
        monitor="val_loss")
]
history = model.fit(
    train_dataset,
    epochs=50,
    validation_data=validation_dataset,
    callbacks=callbacks)

Epoch 1/50

KeyboardInterrupt: 

In [None]:
acc = history.history["accuracy"]
val_acc = history.history["val_accuracy"]
loss = history.history["loss"]
val_loss = history.history["val_loss"]
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, "bo", label="Training accuracy")
plt.plot(epochs, val_acc, "b", label="Validation accuracy")
plt.title("Training and validation accuracy")
plt.legend()
plt.figure()
plt.plot(epochs, loss, "bo", label="Training loss")
plt.plot(epochs, val_loss, "b", label="Validation loss")
plt.title("Training and validation loss")
plt.legend()
plt.show()

Part 2: Unfreezing last four layers and retraining

In [None]:
conv_base.trainable = True
for layer in conv_base.layers[:-4]:
    layer.trainable = False

model.compile(loss="sparse_categorical_crossentropy",
              optimizer=keras.optimizers.RMSprop(learning_rate=1e-5),
              metrics=["accuracy"])

callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath="G:/My Drive/University of Waterloo Files/Applied Machine Learning and Artificial Intelligence/Term 1/CSCN 8010 - Foundations of Machine Learning/CSCN8010-lab-submissions/students_submissions/8972394",
        save_best_only=True,
        monitor="val_loss")
]
history = model.fit(
    train_dataset,
    epochs=30,
    validation_data=validation_dataset,
    callbacks=callbacks)

In [None]:
acc = history.history["accuracy"]
val_acc = history.history["val_accuracy"]
loss = history.history["loss"]
val_loss = history.history["val_loss"]
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, "bo", label="Training accuracy")
plt.plot(epochs, val_acc, "b", label="Validation accuracy")
plt.title("Training and validation accuracy")
plt.legend()
plt.figure()
plt.plot(epochs, loss, "bo", label="Training loss")
plt.plot(epochs, val_loss, "b", label="Validation loss")
plt.title("Training and validation loss")
plt.legend()
plt.show()