## Import statements and loading the data

In [None]:
import h5py
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D, BatchNormalization, SeparableConv2D, SpatialDropout2D
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
import random
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy
from sklearn.metrics import roc_curve, auc, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
f = h5py.File('bird_spectrograms.hdf5', 'r')
list(f.keys())

In [None]:
for key in list(f.keys()):
    print(f[key].shape)

In [None]:
# set seeds to ensure reproducibility 
np.random.seed(5322)
tf.random.set_seed(5322)
random.seed(5322)

## Multiclass Classification (All species)

In [None]:
X_list = []
y_list = []

# loop through each species' data
for species in list(f.keys()):
    spectrograms = np.array(f[species])  # (128, 517, N)
    n_samples = spectrograms.shape[2]
    labels = [species] * n_samples

    X_list.append(spectrograms)
    y_list.extend(labels)

In [None]:
# stack and reshape
X = np.concatenate(X_list, axis = 2)      # (128, 517, total_samples)
X = np.transpose(X, (2, 0, 1))           # (samples, 128, 517)
X = X[..., np.newaxis]                   # (samples, 128, 517, 1)

In [None]:
# normalize spectrograms to [0, 1]
X = X / np.max(X)

In [None]:
# labels to arrays
y = np.array(y_list)

In [None]:
# encode species to ints
le = LabelEncoder()
y_encoded = le.fit_transform(y)          # e.g., 'amecro' → 0, 'amerob' → 1, etc.
label_map = dict(zip(le.transform(le.classes_), le.classes_))

In [None]:
# one hot encode
y_onehot = to_categorical(y_encoded)     # shape: (samples, 12)

In [None]:
# split train and test. stratify to balance species
X_train, X_test, y_train, y_test = train_test_split(
    X, y_onehot, test_size = 0.2, random_state = 5322, stratify = y_encoded
)

In [None]:
# also get y_test_raw = y_encoded to decode predictions later and for plotting
_, y_test_raw = train_test_split(
    y_encoded, test_size = 0.2, random_state = 5322, stratify = y_encoded
)

In [None]:
# tried data augmentation by shifting, zooming, and filling. rotating did not make sense so did not do that
datagen = ImageDataGenerator(
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    fill_mode='nearest'
)

datagen.fit(X_train)

In [None]:
num_classes = len(le.classes_)
num_classes

In [None]:
# first multiclass model
model = Sequential([
    # layer group 1
    Conv2D(32, (3, 3), activation = 'relu', input_shape = (128, 517, 1), kernel_regularizer = l2(0.0001), padding = 'same'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    SpatialDropout2D(0.3),
    # group 2
    Conv2D(64, (3, 3), activation = 'relu', kernel_regularizer = l2(0.0001), padding = 'same'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    SpatialDropout2D(0.3),
    # group 3
    SeparableConv2D(128, (3, 3), activation = 'relu', kernel_regularizer = l2(0.0001), padding = 'same'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    Dropout(0.4),
    # group 4
    SeparableConv2D(128, (3, 3), activation = 'relu', kernel_regularizer = l2(0.0001), padding = 'same'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    Dropout(0.4),
    # group 5
    SeparableConv2D(256, (3, 3), activation = 'relu', kernel_regularizer = l2(0.0001), padding = 'same'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    Dropout(0.5),
    # global pooling
    GlobalAveragePooling2D(),
    Dense(units = 128, activation = 'relu', kernel_regularizer = l2(0.0001)),
    Dropout(0.5),
    Dense(units = num_classes, activation = 'softmax')
], name = "Multiclass_Model_1")

model.compile(
    optimizer = Adam(learning_rate = 0.0001),
    loss = CategoricalCrossentropy(),
    metrics = ['accuracy']
)

model.summary()

early_stopping = EarlyStopping(monitor = 'val_loss', patience = 5, restore_best_weights = True)

history = model.fit(
    datagen.flow(X_train, y_train, batch_size = 16),
    validation_data = (X_test, y_test),
    epochs = 100,
    # batch_size = 16,
    # validation_split = 0.2,
    callbacks=[early_stopping]
)

# Evaluation
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'\nTest accuracy: {test_acc}')

In [None]:
# predictions
y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = np.argmax(y_test, axis=1)

# get class labels in correct order (by integer encoding)
target_names = [label_map[i] for i in sorted(label_map.keys())]

print(classification_report(y_true, y_pred, target_names = target_names, digits = 4, zero_division = 0))

In [None]:
# confusion matrix
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize = (10, 8))
sns.heatmap(cm, annot = True, fmt = 'd', cmap = 'Blues')
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()

## Multiclass Classification with adjustments
- added label smoothing of 0.05
- increased batch size to 64 from 16

In [None]:
# first multiclass model
model = Sequential([
    # layer group 1
    Conv2D(32, (3, 3), activation = 'relu', input_shape = (128, 517, 1), kernel_regularizer = l2(0.0001), padding = 'same'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    Dropout(0.2),
    # group 2
    Conv2D(64, (3, 3), activation = 'relu', kernel_regularizer = l2(0.0001), padding = 'same'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    Dropout(0.2),
    # group 3
    Conv2D(128, (3, 3), activation = 'relu', kernel_regularizer = l2(0.0001), padding = 'same'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    Dropout(0.3),
    # group 4
    Conv2D(128, (3, 3), activation = 'relu', kernel_regularizer = l2(0.0001), padding = 'same'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    Dropout(0.3),
    # group 5
    Conv2D(256, (3, 3), activation = 'relu', kernel_regularizer = l2(0.0001), padding = 'same'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    Dropout(0.4),
    # global pooling
    GlobalAveragePooling2D(),
    Dense(units = 256, activation = 'relu', kernel_regularizer = l2(0.0001)),
    Dropout(0.3),
    Dense(units = 128, activation = 'relu', kernel_regularizer = l2(0.0001)),
    Dropout(0.4),
    Dense(units = num_classes, activation = 'softmax')
], name = "Multiclass_Model_2")

model.compile(
    optimizer = Adam(learning_rate = 0.0001),
    loss = CategoricalCrossentropy(label_smoothing = 0.05),
    metrics = ['accuracy']
)

model.summary()

early_stopping = EarlyStopping(monitor = 'val_loss', patience = 5, restore_best_weights = True)

history = model.fit(
    X_train, y_train,
    epochs = 100,
    batch_size = 64,
    validation_split = 0.2,
    callbacks=[early_stopping]
)

# Evaluation
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'\nTest accuracy: {test_acc}')

In [None]:
# first multiclass model
model = Sequential([
    # layer group 1
    Conv2D(32, (3, 3), activation = 'relu', input_shape = (128, 517, 1), kernel_regularizer = l2(0.0001), padding = 'same'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    Dropout(0.2),
    # group 2
    Conv2D(64, (3, 3), activation = 'relu', kernel_regularizer = l2(0.0001), padding = 'same'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    Dropout(0.2),
    # group 3
    Conv2D(128, (3, 3), activation = 'relu', kernel_regularizer = l2(0.0001), padding = 'same'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    Dropout(0.3),
    # group 4
    Conv2D(128, (3, 3), activation = 'relu', kernel_regularizer = l2(0.0001), padding = 'same'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    Dropout(0.3),
    # group 5
    Conv2D(256, (3, 3), activation = 'relu', kernel_regularizer = l2(0.0001), padding = 'same'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    Dropout(0.4),
    # global pooling
    GlobalAveragePooling2D(),
    Dense(units = 256, activation = 'relu', kernel_regularizer = l2(0.0001)),
    Dropout(0.3),
    Dense(units = 128, activation = 'relu', kernel_regularizer = l2(0.0001)),
    Dropout(0.4),
    Dense(units = num_classes, activation = 'softmax')
], name = "Multiclass_Model_1")

model.compile(
    optimizer = Adam(learning_rate = 0.0001),
    loss = CategoricalCrossentropy(label_smoothing = 0.05),
    metrics = ['accuracy']
)

model.summary()

early_stopping = EarlyStopping(monitor = 'val_loss', patience = 5, restore_best_weights = True)

history = model.fit(
    X_train, y_train,
    epochs = 100,
    batch_size = 16,
    validation_split = 0.2,
    callbacks=[early_stopping]
)

# Evaluation
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'\nTest accuracy: {test_acc}')