# Single Species Classifier

We demonstrate the classification of bird species observed in the Mt Kenya ecosystem leveraging open source models. In particular we demonstrate the use of embeddings obtained from Google's [Bird Vocalization Classifier](https://www.kaggle.com/models/google/bird-vocalization-classifier) to train classifiers for the species observed.

In this notebook we use logistic regression and a multilayer perceptron to classify bird species from the Mt Kenya ecosystem using embeddings extracted from the bird vocalisation classifier as features.

In [1]:
import os
import json
import random
import numpy as np


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from collections import Counter
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

from multilabel_mlp import *


2024-11-18 16:50:16.567275: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-18 16:50:16.697425: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-18 16:50:16.697459: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-18 16:50:16.721574: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-18 16:50:16.775906: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-18 16:50:16.776666: I tensorflow/core/platform/cpu_feature_guard.cc:1

In [2]:
# set the seed for reproducability
np.random.seed(1234)
random.seed(1234)

In [3]:
embeddings_dir = '../embeddings/'

In [4]:
embedding_files = os.listdir(embeddings_dir)

In [5]:
with open('../annotations/single_species_filenames.json', 'r') as fp:
    single_species_filenames = json.load(fp)

## Features and labels

The 1280 dimensional embeddings are extracted from each of the 12 five second segments from the 1
minute long recordings and the mean of these embeddings are used as a feature for the recording.

In [6]:
labels = []
mean_embeddings = np.array([])
for embedding_file in embedding_files:
    labels.append(single_species_filenames[embedding_file.replace('npz', 'mp3')])
    npzfile = np.load(os.path.join(embeddings_dir, embedding_file))
    file_embeddings = npzfile['embeddings']

    if mean_embeddings.size:
        mean_embeddings = np.vstack((mean_embeddings, np.mean(file_embeddings, 0)))
    else:
        mean_embeddings = np.mean(file_embeddings, 0)

In [7]:
label_encoder = LabelEncoder()

In [8]:
label_indices = label_encoder.fit_transform(labels)

We use species with more than 10 recordings

In [9]:
label_counts = Counter(labels)
threshold = 10

filtered_labels = [label for label, count in label_counts.items() if count >= threshold]

In [10]:
filtered_labels

['Brown Woodland Warbler',
 'Yellow-whiskered Greenbul',
 'White-starred Robin',
 'Mountain Yellow Warbler',
 "Hartlaub's Turaco",
 'Cinnamon Bracken Warbler',
 'Red-fronted Parrot',
 'Chestnut-throated Apalis',
 'Montane White-eye']

In [11]:
len(label_counts)

35

In [12]:
new_labels = []
for label in labels:
    if label in filtered_labels:
        new_labels.append(label)
    else:
        new_labels.append('other')
        


In [13]:
Counter(new_labels)

Counter({'Brown Woodland Warbler': 225,
         'other': 65,
         'Chestnut-throated Apalis': 26,
         'Yellow-whiskered Greenbul': 24,
         'White-starred Robin': 16,
         'Red-fronted Parrot': 16,
         'Mountain Yellow Warbler': 14,
         'Cinnamon Bracken Warbler': 14,
         "Hartlaub's Turaco": 12,
         'Montane White-eye': 12})

In [14]:
len(Counter(new_labels))

10

# MLP Example - Figure 8 Confusion Matrix

In [15]:
new_labels = label_encoder.fit_transform(new_labels)
X_train, X_test, y_train, y_test = train_test_split(mean_embeddings, new_labels, test_size=0.2, random_state=12)


X_train = tf.convert_to_tensor(X_train)
X_test = tf.convert_to_tensor(X_test)

y_train = tf.convert_to_tensor(y_train)
y_test = tf.convert_to_tensor(y_test)

2024-11-18 16:50:20.817160: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:274] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


In [16]:
X_train = tf.reshape(X_train, (-1,1, 1280))
X_test = tf.reshape(X_test, (-1,1,1280))

y_train = tf.reshape(y_train, (-1,1))
y_test = tf.reshape(y_test, (-1,1))

train_data = tf.data.Dataset.from_tensor_slices((X_train, y_train))
test_data = tf.data.Dataset.from_tensor_slices((X_test, y_test))

In [17]:
for x,y in train_data:
    print(x.shape,y.shape)

(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) (1,)
(1, 1280) 

In [18]:
y_train.shape

TensorShape([339, 1])

In [19]:
for x, y in train_data:
    print(x.shape, y)

(1, 1280) tf.Tensor([6], shape=(1,), dtype=int64)
(1, 1280) tf.Tensor([0], shape=(1,), dtype=int64)
(1, 1280) tf.Tensor([0], shape=(1,), dtype=int64)
(1, 1280) tf.Tensor([3], shape=(1,), dtype=int64)
(1, 1280) tf.Tensor([0], shape=(1,), dtype=int64)
(1, 1280) tf.Tensor([0], shape=(1,), dtype=int64)
(1, 1280) tf.Tensor([9], shape=(1,), dtype=int64)
(1, 1280) tf.Tensor([0], shape=(1,), dtype=int64)
(1, 1280) tf.Tensor([0], shape=(1,), dtype=int64)
(1, 1280) tf.Tensor([0], shape=(1,), dtype=int64)
(1, 1280) tf.Tensor([0], shape=(1,), dtype=int64)
(1, 1280) tf.Tensor([0], shape=(1,), dtype=int64)
(1, 1280) tf.Tensor([9], shape=(1,), dtype=int64)
(1, 1280) tf.Tensor([3], shape=(1,), dtype=int64)
(1, 1280) tf.Tensor([9], shape=(1,), dtype=int64)
(1, 1280) tf.Tensor([4], shape=(1,), dtype=int64)
(1, 1280) tf.Tensor([9], shape=(1,), dtype=int64)
(1, 1280) tf.Tensor([2], shape=(1,), dtype=int64)
(1, 1280) tf.Tensor([0], shape=(1,), dtype=int64)
(1, 1280) tf.Tensor([1], shape=(1,), dtype=int64)


In [20]:
hidden_layer_1_size = 200
hidden_layer_2_size = 100
output_size = 10

mlp_model = MLP([
    DenseLayer(out_dim=hidden_layer_1_size, activation=tf.nn.relu),
    DenseLayer(out_dim=hidden_layer_2_size, activation=tf.nn.relu),
    DenseLayer(out_dim=output_size)])

In [21]:
train_losses, train_accs, val_losses, val_accs = train_model(mlp_model, train_data, test_data, 
                                                             loss=log_loss, acc=accuracy,
                                                             optimizer=Adam(), epochs=50)

Epoch: 0
Training loss: 0.227, Training accuracy: 0.584
Validation loss: 0.195, Validation accuracy: 0.682
Epoch: 1
Training loss: 0.176, Training accuracy: 0.637
Validation loss: 0.178, Validation accuracy: 0.659
Epoch: 2
Training loss: 0.152, Training accuracy: 0.684
Validation loss: 0.173, Validation accuracy: 0.624
Epoch: 3
Training loss: 0.137, Training accuracy: 0.705
Validation loss: 0.179, Validation accuracy: 0.600
Epoch: 4
Training loss: 0.126, Training accuracy: 0.714
Validation loss: 0.168, Validation accuracy: 0.635
Epoch: 5
Training loss: 0.115, Training accuracy: 0.737
Validation loss: 0.160, Validation accuracy: 0.659
Epoch: 6
Training loss: 0.107, Training accuracy: 0.752
Validation loss: 0.162, Validation accuracy: 0.671
Epoch: 7
Training loss: 0.098, Training accuracy: 0.773
Validation loss: 0.160, Validation accuracy: 0.706
Epoch: 8
Training loss: 0.089, Training accuracy: 0.808
Validation loss: 0.155, Validation accuracy: 0.694
Epoch: 9
Training loss: 0.084, Traini