# Active Learning for Bioacoustics


The anaconda python environment requirements are saved in './requirements.text'. You can use anaconda and this requirements file to create a python environment with the appropriate libraries installed. To do this open a terminal/command prompt in the root directory of this folder and execute the following command.  

conda create --name active_learning --file requirements.txt

Then activate the environment using the following command. 

conda activate active_learning

In [1]:
import os
os.environ["BACKEND"] = "PERCH"
#os.environ["BACKEND"] = "BirdNET_2.4"
#os.environ["BACKEND"] = "PNWCnet"
BACKEND = os.environ.get("BACKEND", "PERCH")

import polars as pl
import numpy as np
import librosa
import pickle

#from typing import List, Dict, Any, Tuple
import soundfile as sf
import tensorflow as tf
import tensorflow_hub as hub

from pathlib import Path
from sklearn.model_selection import train_test_split

from modules import utilities as u
from modules import config as cfg
from modules import classifier as tc
from modules import database as db
from modules import display as dis


2025-05-28 14:27:51.116826: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-28 14:27:51.202308: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748467671.234139  110818 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748467671.245638  110818 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1748467671.320382  110818 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [2]:
# Global Variables 
# Target sound name
sound_name = "NOWA_song"
CLASS_MAP = {
    "NOWA_song": 0,
}
#sound_name = "HFS"
#CLASS_MAP = {
#    "HFS": 0,
#    "NFS": 1,
#    "SFS": 2
#}
# Target path with audio files you'd like to perform active learning over
path_to_target_folder = "./data/wildtrax_wav/"
embeddings_filename = f"{path_to_target_folder}{sound_name}_AL_embeddings.pkl"
Audio_DB_filename = f"{path_to_target_folder}{sound_name}_AL_database.parquet"

In [20]:
import importlib
importlib.reload(u)
importlib.reload(cfg)
importlib.reload(tc)
importlib.reload(db)
importlib.reload(dis)

<module 'modules.display' from '/home/matt/pCloudDrive/PROJECTS/Bioacoustic_Active_Learning/modules/display.py'>

In [3]:
# Setup GPU's so they don't take over my system
gpus = tf.config.list_physical_devices("GPU")
for gpu in gpus:
  print("Name:", gpu.name, "  Type:", gpu.device_type)
if gpus:
  try:
      # Currently, memory growth needs to be the same across GPUs
      for gpu in gpus:
          tf.config.experimental.set_memory_growth(gpu, True)
      logical_gpus = tf.config.list_logical_devices("GPU")
      print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
      # Memory growth must be set before GPUs have been initialized
      print(e) 

Name: /physical_device:GPU:0   Type: GPU
Name: /physical_device:GPU:1   Type: GPU
2 Physical GPUs, 2 Logical GPUs


I0000 00:00:1748467758.911124  110818 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 19939 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4090, pci bus id: 0000:01:00.0, compute capability: 8.9
I0000 00:00:1748467758.911472  110818 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 22272 MB memory:  -> device: 1, name: NVIDIA GeForce RTX 4090, pci bus id: 0000:05:00.0, compute capability: 8.9


In [None]:
# Loading Perch version 8 from Tensorflow hub
#e_model = hub.load(
#    f"https://www.kaggle.com/models/google/bird-vocalization-classifier/frameworks/TensorFlow2/variations/bird-vocalization-classifier/versions/{cfg.PERCH_V}"
#)
#e_model = u.load_embedding_model()

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [4]:
# Load Classifier
#classifier_model = tf.keras.models.load_model(f"./checkpoints/BCE.keras")
#classifier_model = tf.keras.models.load_model(f"./checkpoints/species_level.keras")
classifier_model = tf.keras.models.load_model(f"./checkpoints/BCE_reviewed_w_training_data.keras")
classifier_model = tf.keras.models.load_model(f"./checkpoints/BCE_AL_3.keras")

In [9]:
# Instantiate the audio database and read from disk if present
audio_db = db.Audio_DB()
if Path(Audio_DB_filename).is_file():
  audio_db.load_db(Audio_DB_filename)
files = Path(f"{path_to_target_folder}/audio/").glob("**/*.wav")
files = [str(f) for f in files]


In [6]:
# Embed the data or load embeddings
print(BACKEND)
files = Path(f"{path_to_target_folder}/audio/").glob("**/*.wav")
files = [str(f) for f in files]
print(f"{len(files)} files found in the target folder")
if Path(embeddings_filename).is_file():
  with open(embeddings_filename, "rb") as file:
    embeddings = pickle.load(file)
else:
  embeddings = u.load_and_preprocess(files)
with open(embeddings_filename, "wb") as file:
  pickle.dump(embeddings, file)



PERCH
147 files found in the target folder


In [7]:
files

['data/wildtrax_wav/audio/mp3_6714.wav',
 'data/wildtrax_wav/audio/mp3_6723.wav',
 'data/wildtrax_wav/audio/mp3_6736.wav',
 'data/wildtrax_wav/audio/mp3_6738.wav',
 'data/wildtrax_wav/audio/mp3_6743.wav',
 'data/wildtrax_wav/audio/mp3_6746.wav',
 'data/wildtrax_wav/audio/mp3_6767.wav',
 'data/wildtrax_wav/audio/mp3_6831.wav',
 'data/wildtrax_wav/audio/mp3_6833.wav',
 'data/wildtrax_wav/audio/mp3_6834.wav',
 'data/wildtrax_wav/audio/mp3_6851.wav',
 'data/wildtrax_wav/audio/mp3_6852.wav',
 'data/wildtrax_wav/audio/mp3_6854.wav',
 'data/wildtrax_wav/audio/mp3_6859.wav',
 'data/wildtrax_wav/audio/mp3_7114.wav',
 'data/wildtrax_wav/audio/mp3_7115.wav',
 'data/wildtrax_wav/audio/mp3_7117.wav',
 'data/wildtrax_wav/audio/mp3_7123.wav',
 'data/wildtrax_wav/audio/mp3_7124.wav',
 'data/wildtrax_wav/audio/mp3_7174.wav',
 'data/wildtrax_wav/audio/mp3_7175.wav',
 'data/wildtrax_wav/audio/mp3_7178.wav',
 'data/wildtrax_wav/audio/mp3_7208.wav',
 'data/wildtrax_wav/audio/mp3_7244.wav',
 'data/wildtrax_

Build the reference database using the polars package. 

In [8]:
# Populate the database
for i, embedding in enumerate(files): #enumerate(embeddings):
  n_row = 1#embedding.shape[0]
  f = sf.SoundFile(files[i])
  duration_sec = f.frames / f.samplerate
  #audio = u.load_audio(files[i])
  #duration_sec = len(audio)/cfg.TARGET_SR
  clip_start = 0
  file_name = files[i].split("/")[-1].split(".")[0]
  file_path = files[i]
  for n in range(n_row):
    audio_db.add_clip_row(
      file_name = file_name,
      file_path = file_path,
      duration_sec = duration_sec,
      clip_start = clip_start,
      clip_end = clip_start + cfg.WINDOW, 
      sampling_rate=cfg.TARGET_SR,
    )
    clip_start += cfg.WINDOW

#preds = u.get_classifier_predictions(embeddings, classifier_model, CLASS_MAP, sound_name)
#audio_db.populate_scores(preds)

KeyboardInterrupt: 

In [11]:
dis.annotate(audio_db, 
             black_and_white=False, 
             review_mode = "random", bin_target=50)

VBox(children=(HBox(children=(HTML(value="<h3 style='margin:0px;'>Audio Clip Annotation Tool (Random Mode)</h3…

In [16]:
#Save your work
#audio_db.save_db(Audio_DB_filename)
audio_db.export_wav_clips(f"{path_to_target_folder}/annotated/", sound_name)

Found 0 positive clips and 28 negative clips for export.
Exported 10/28 negative clips...
Exported 20/28 negative clips...
Export complete. Successfully exported 0/0 positive clips and 28/28 negative clips.


(0, 28)

In [18]:
files = Path(f"./data/annotated_reviewed_w_train_3/").glob("**/*.wav")
files = [str(f) for f in files]
labels = [u.get_label(CLASS_MAP, file) for file in files]
train_embeddings = u.load_and_preprocess(files)
X_train, X_test, y_train, y_test = train_test_split(train_embeddings, labels, test_size=0.30, random_state=42)

2025-04-01 14:29:51.110343: W tensorflow/compiler/tf2xla/kernels/assert_op.cc:39] Ignoring Assert operator jax2tf_infer_fn_/assert_equal_1/Assert/AssertGuard/Assert





In [21]:
#files = Path(f"./data/phone_recordings/annotated/").glob("**/*.wav")
#files = [str(f) for f in files]
#labels = [u.get_label(CLASS_MAP, file) for file in files]
#embeddings = list(map(lambda f: load_and_preprocess(f, e_model),  files))
#X_train, X_test, y_train, y_test = train_test_split(embeddings, labels, test_size=0.30, random_state=42)

importlib.reload(tc)
classifier_model, train_losses, val_losses, cmaps = tc.fit_w_tape(
    np.array(X_train).squeeze(),
    np.array(y_train),
    np.array(X_test).squeeze(),
    np.array(y_test),
    10000,
    32,
    0.1,
    2,
    "BCE_AL_3",
    True,
)
classifier_model = tf.keras.models.load_model(f"./checkpoints/BCE_AL_3.keras")

Macro cMAP of best fit 0.18625103030811452 obtained on step: 1 val loss: 0.3153334856033325
Macro cMAP of best fit 0.2695747753770999 obtained on step: 2 val loss: 0.36898940801620483
Macro cMAP of best fit 0.39267066968220016 obtained on step: 3 val loss: 0.3797832131385803
Macro cMAP of best fit 0.5426233948049922 obtained on step: 4 val loss: 0.3611080050468445
Macro cMAP of best fit 0.6181850547151513 obtained on step: 5 val loss: 0.319024920463562
Macro cMAP of best fit 0.6577631956251807 obtained on step: 6 val loss: 0.2699712812900543
Macro cMAP of best fit 0.6837178079763275 obtained on step: 7 val loss: 0.23179514706134796
Macro cMAP of best fit 0.7044540714635565 obtained on step: 8 val loss: 0.1980213224887848
Macro cMAP of best fit 0.7249593851067856 obtained on step: 9 val loss: 0.17121650278568268
Macro cMAP of best fit 0.7522910440739158 obtained on step: 10 val loss: 0.14308057725429535
Macro cMAP of best fit 0.7723169324025059 obtained on step: 11 val loss: 0.127795651

In [10]:
preds = u.get_classifier_predictions(embeddings, classifier_model, CLASS_MAP, sound_name)
audio_db.populate_scores(preds)

In [11]:
#Evalutate focal class performance
annotated_examples = audio_db.df.filter(pl.col("annotation") < 3)
print(f"AUC: {tc.get_AUC(annotated_examples.select("annotation"), annotated_examples.select("score"))['individual'][CLASS_MAP[sound_name]]}")
print(f"AP:  {tc.cmap(annotated_examples.select("annotation"), annotated_examples.select("score"), 0)['individual'][CLASS_MAP[sound_name]]}")

AUC: 0.999645390070922
AP:  0.990909090909091


| Classifier |n clips| AUC | Average precision |
| :- | :-: | :-: | :-: |
| original | 978 | 0.767 | 0.192 |




In [26]:
audio_db.df.write_csv("output.csv")

In [10]:
audio_db.df.head()

file_name,file_path,duration_sec,clip_start,clip_end,sampling_rate,score,annotation,created_at
str,str,f32,f32,f32,i32,f32,i32,datetime[μs]
"""mp3_104761""","""data/wildtrax_wav/audio/mp3_10…",180.062042,0.0,5.0,32000,3e-06,4,2025-03-28 11:20:58.113609
"""mp3_104761""","""data/wildtrax_wav/audio/mp3_10…",180.062042,5.0,10.0,32000,2e-06,4,2025-03-28 11:20:58.114834
"""mp3_104761""","""data/wildtrax_wav/audio/mp3_10…",180.062042,10.0,15.0,32000,3.8e-05,4,2025-03-28 11:20:58.115287
"""mp3_104761""","""data/wildtrax_wav/audio/mp3_10…",180.062042,15.0,20.0,32000,0.663639,4,2025-03-28 11:20:58.115522
"""mp3_104761""","""data/wildtrax_wav/audio/mp3_10…",180.062042,20.0,25.0,32000,1e-05,4,2025-03-28 11:20:58.115711


In [12]:
annotated_examples.filter(pl.col("annotation") == 1)

file_name,file_path,duration_sec,clip_start,clip_end,sampling_rate,score,annotation,created_at
str,str,f32,f32,f32,i32,f32,i32,datetime[μs]
"""Gilchrist_1_035_SMA00727_20240…","""data/eval/audio/SD#035 from AR…",3598.0,200.0,205.0,32000,0.820676,1,2025-03-20 09:59:59.813465
"""Gilchrist_1_035_SMA00727_20240…","""data/eval/audio/SD#035 from AR…",3598.0,240.0,245.0,32000,0.997637,1,2025-03-20 09:59:59.902155
"""Gilchrist_1_035_SMA00727_20240…","""data/eval/audio/SD#035 from AR…",3598.0,1180.0,1185.0,32000,0.999903,1,2025-03-20 10:00:01.727441
"""Gilchrist_1_035_SMA00727_20240…","""data/eval/audio/SD#035 from AR…",3598.0,1210.0,1215.0,32000,0.918792,1,2025-03-20 10:00:01.787008
"""Gilchrist_1_035_SMA00727_20240…","""data/eval/audio/SD#035 from AR…",3598.0,3465.0,3470.0,32000,0.999986,1,2025-03-20 10:00:06.435402
"""Gilchrist_1_035_SMA00727_20240…","""data/eval/audio/SD#035 from AR…",3598.0,1255.0,1260.0,32000,0.266532,1,2025-03-20 10:01:05.744925
"""Gilchrist_1_035_SMA00727_20240…","""data/eval/audio/SD#035 from AR…",3598.0,1525.0,1530.0,32000,0.507328,1,2025-03-20 10:01:06.291665
"""Gilchrist_1_035_SMA00727_20240…","""data/eval/audio/SD#035 from AR…",3598.0,1600.0,1605.0,32000,0.998858,1,2025-03-20 10:01:06.443009
"""Gilchrist_1_035_SMA00727_20240…","""data/eval/audio/SD#035 from AR…",3598.0,1615.0,1620.0,32000,0.99983,1,2025-03-20 10:01:06.473564
"""Gilchrist_1_035_SMA00727_20240…","""data/eval/audio/SD#035 from AR…",3598.0,1635.0,1640.0,32000,0.996057,1,2025-03-20 10:01:06.514035
