# Machine Learning Bird Classification Application
## WGU C964 Capstone


---


Michael Lawrence

mlaw101@wgu.edu

002680987

### Create a Directory to Store the Model

In [None]:
7# Cell 1
!mkdir /content/models/

### Download the Dataset

In [None]:
# Cell 2
!kaggle datasets download -d gpiosenka/100-bird-species

Dataset URL: https://www.kaggle.com/datasets/gpiosenka/100-bird-species
License(s): CC0-1.0
Downloading 100-bird-species.zip to /content
100% 1.95G/1.96G [00:23<00:00, 160MB/s]
100% 1.96G/1.96G [00:23<00:00, 89.1MB/s]


In [None]:
# Cell 3
import zipfile
import os

dir1 = '/content/test'
dir2 = '/content/train'
dir3 = '/content/valid'
file1 = '/content/EfficientNetB0-525-(224 X 224)- 98.97.h5'
file2 = '/content/birds.csv'

if not (os.path.exists(dir1) and os.path.exists(dir2) and os.path.exists(dir3) and
        os.path.exists(file1) and os.path.exists(file2)):
    print("Extracting files...")
    zip_ref = zipfile.ZipFile('/content/100-bird-species.zip', 'r')
    zip_ref.extractall('/content')
    zip_ref.close()
    print("Extraction completed.")
else:
    print("Files already extracted, skipping extraction.")

Extracting files...
Extraction completed.


### Clean the Data of Unused Files

In [None]:
# Cell 4
!rm -rf /content/sample_data/
!rm -rf '/content/EfficientNetB0-525-(224 X 224)- 98.97.h5'

In [None]:
# Cell 5
# Clean the data of joke images added by dataset creator
!rm -rf '/content/test/LOONEY BIRDS'
!rm -rf '/content/train/LOONEY BIRDS'
!rm -rf '/content/valid/LOONEY BIRDS'

### Install and Set up Anvil.works Server

In [None]:
# Cell 6
!pip install anvil-uplink

Collecting anvil-uplink
  Downloading anvil_uplink-0.5.1-py2.py3-none-any.whl.metadata (1.5 kB)
Collecting argparse (from anvil-uplink)
  Downloading argparse-1.4.0-py2.py3-none-any.whl.metadata (2.8 kB)
Collecting ws4py-sslupdate (from anvil-uplink)
  Downloading ws4py_sslupdate-0.5.1b0-py2.py3-none-any.whl.metadata (1.7 kB)
Downloading anvil_uplink-0.5.1-py2.py3-none-any.whl (95 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.9/95.9 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading argparse-1.4.0-py2.py3-none-any.whl (23 kB)
Downloading ws4py_sslupdate-0.5.1b0-py2.py3-none-any.whl (45 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.8/45.8 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ws4py-sslupdate, argparse, anvil-uplink
Successfully installed anvil-uplink-0.5.1 argparse-1.4.0 ws4py-sslupdate-0.5.1b0


### Import frameworks and libraries

In [None]:
# Cell 7
import anvil.server

anvil.server.connect("server_K4OUTJXF6SA44QBYQBGEQUBE-LVUPCM3LXUEPINUS")

Connecting to wss://anvil.works/uplink
Anvil websocket open
Connected to "Default Environment" as SERVER


In [None]:
# Cell 8
import tensorflow as tf
import io
import datetime
import random
import tensorflow.keras.backend as K

from tensorflow.keras import Sequential, models, mixed_precision
from tensorflow.keras import utils
from tensorflow.keras.utils import plot_model
from tensorflow.keras.preprocessing import image, image_dataset_from_directory
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.saving import register_keras_serializable
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import (Dense, Conv2D, MaxPool2D, AvgPool2D, GlobalAveragePooling2D,
                                    Flatten, concatenate, BatchNormalization, ReLU,
                                    Dropout, Input, Reshape, Resizing, Rescaling, RandomFlip,
                                    RandomContrast, RandomZoom, RandomRotation)
from PIL import Image

print("Tensorflow Version: " + tf.__version__)

Tensorflow Version: 2.17.0


In [None]:
# Cell 9
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

### Define global variables

In [None]:
# Cell 10
path_to_training_data = '/content/train'
path_to_test_data = '/content/test'
path_to_validating_data = '/content/valid'

In [None]:
# Cell 11
batch_size = 64
image_size = (224, 224)
input_shape = (224, 224, 3)
num_classes = 524
seed = 42
np.random.seed(42)
tf.random.set_seed(seed)

### Datasets are generated, prefetched, cached and class names are extracted.
Data Augmentation is applied to the training dataset

In [None]:
# Cell 12
raw_training_dataset = image_dataset_from_directory(
    path_to_training_data,
    image_size=image_size,
    batch_size=batch_size,
    label_mode='categorical',
    seed=seed,
    shuffle=True
)

class_names = raw_training_dataset.class_names

data_augmentation_pipeline = Sequential([
        RandomFlip("horizontal_and_vertical"),
        RandomRotation(0.2),
        RandomZoom(0.2),
        RandomContrast(0.2)
    ])

training_dataset = raw_training_dataset.map(
    lambda x, y: (data_augmentation_pipeline(x, training=True), y)
).cache().prefetch(buffer_size=tf.data.AUTOTUNE)

valid_dataset = image_dataset_from_directory(
    path_to_validating_data,
    image_size=image_size,
    batch_size=batch_size,
    label_mode='categorical',
    seed=seed,
    shuffle=True
).cache().prefetch(buffer_size=tf.data.AUTOTUNE)

test_dataset = image_dataset_from_directory(
    path_to_test_data,
    image_size=image_size,
    batch_size=batch_size,
    label_mode='categorical',
    seed=seed,
    shuffle=True
).prefetch(buffer_size=tf.data.AUTOTUNE)

Found 84479 files belonging to 524 classes.
Found 2620 files belonging to 524 classes.
Found 2620 files belonging to 524 classes.


In [None]:
# Cell 13
trained_model = load_model('/content/models/bird_classification_densenet.keras')

### Create functions for preprocessing images and decoding predictions

In [None]:
# Cell 14
target_size = image_size

def preprocess_image(img_path, target_size):
    img = image.load_img(img_path, target_size=target_size)
    img_array = image.img_to_array(img)
    img_array = np.array([img_array])
    return img_array


In [None]:
# Cell 15
def decode_predictions(predictions, class_names, top=5):
    if len(predictions.shape) != 2:
        raise ValueError(f"`decode_predictions` expects a 2D array, received shape: {predictions.shape}")

    decoded_prediction = []
    for prediction in predictions:
        top_indices = prediction.argsort()[-top:][::-1]
        result = [(class_names[i], prediction[i]) for i in top_indices]
        decoded_prediction.append(result)

    return decoded_prediction

### Test model predictions on 5 test images and show the top 3 predictions and their probability

In [None]:
# Cell 16
image_paths = [
    '/content/test/BALD EAGLE/3.jpg',
    '/content/test/AMERICAN FLAMINGO/4.jpg',
    '/content/test/COMMON FIRECREST/2.jpg',
    '/content/test/CALIFORNIA GULL/4.jpg',
    '/content/test/CROW/5.jpg'

]

images = np.vstack([preprocess_image(img_path, target_size) for img_path in image_paths])
predictions = trained_model.predict(images)
decoded_preds = decode_predictions(predictions, class_names, top=3)

for img_path, decoded in zip(image_paths, decoded_preds):
    print(f"Image: {img_path}")
    for class_name, probability in decoded:
        print(f"  Predicted class: {class_name}, Probability: {probability:.4f}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 15s/step
Image: /content/test/BALD EAGLE/3.jpg
  Predicted class: BALD EAGLE, Probability: 0.8920
  Predicted class: WHITE TAILED TROPIC, Probability: 0.0944
  Predicted class: ALBATROSS, Probability: 0.0090
Image: /content/test/AMERICAN FLAMINGO/4.jpg
  Predicted class: AMERICAN FLAMINGO, Probability: 0.9993
  Predicted class: SCARLET IBIS, Probability: 0.0007
  Predicted class: BALD IBIS, Probability: 0.0000
Image: /content/test/COMMON FIRECREST/2.jpg
  Predicted class: COMMON FIRECREST, Probability: 1.0000
  Predicted class: D-ARNAUDS BARBET, Probability: 0.0000
  Predicted class: CAPE MAY WARBLER, Probability: 0.0000
Image: /content/test/CALIFORNIA GULL/4.jpg
  Predicted class: CALIFORNIA GULL, Probability: 0.9634
  Predicted class: NORTHERN FULMAR, Probability: 0.0354
  Predicted class: FAIRY TERN, Probability: 0.0006
Image: /content/test/CROW/5.jpg
  Predicted class: HAMERKOP, Probability: 0.6570
  Predicted class: BL

### Create callable classification function that links to Anvil.Works web front end
[Link to this project's Anvil.Works Front-end implementation](https://educated-oval-iberian-lynx.anvil.app)


In [None]:
# Cell 17
@anvil.server.callable
def classify_image(file):
    if not file:
        return {'result': 'No file uploaded'}

    image_stream = io.BytesIO(file.get_bytes())

    processed_image = preprocess_image(image_stream, target_size)

    prediction = trained_model.predict(processed_image)
    predicted_class = np.argmax(prediction, axis=-1)[0]

    label = class_names[predicted_class]

    return {'result' : label}

In [None]:
# Cell 18
anvil.server.wait_forever()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 12s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 237ms/step


KeyboardInterrupt: 