# Active Learning for Bioacoustics


In [1]:
import polars as pl
import numpy as np
import librosa

from pathlib import Path
#from typing import List, Dict, Any, Tuple
import soundfile as sf
import tensorflow as tf
import tensorflow_hub as hub

from modules import utilities as u
from modules import config as cfg
from modules import database as db
from modules import display as dis



2025-03-14 15:31:54.851190: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-14 15:31:54.858862: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1741991514.867675  372174 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1741991514.870218  372174 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1741991514.877227  372174 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [2]:
import importlib
importlib.reload(u)
importlib.reload(db)

<module 'modules.database' from '/home/matt/pCloudDrive/PROJECTS/Bioacoustic_Active_Learning/modules/database.py'>

In [3]:
# Setup GPU's so they don't take over my system
gpus = tf.config.list_physical_devices("GPU")
for gpu in gpus:
  print("Name:", gpu.name, "  Type:", gpu.device_type)
if gpus:
  try:
      # Currently, memory growth needs to be the same across GPUs
      for gpu in gpus:
          tf.config.experimental.set_memory_growth(gpu, True)
      logical_gpus = tf.config.list_logical_devices("GPU")
      print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
      # Memory growth must be set before GPUs have been initialized
      print(e) 

Name: /physical_device:GPU:0   Type: GPU
Name: /physical_device:GPU:1   Type: GPU
2 Physical GPUs, 2 Logical GPUs


I0000 00:00:1741991654.957254  372174 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 20525 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4090, pci bus id: 0000:01:00.0, compute capability: 8.9
I0000 00:00:1741991654.957573  372174 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 22272 MB memory:  -> device: 1, name: NVIDIA GeForce RTX 4090, pci bus id: 0000:05:00.0, compute capability: 8.9


In [4]:
# Loading Perch version 8 from Tensorflow hub
e_model = hub.load(
    f"https://www.kaggle.com/models/google/bird-vocalization-classifier/frameworks/TensorFlow2/variations/bird-vocalization-classifier/versions/{cfg.PERCH_V}"
)

In [5]:
# Load Classifier
classifier_model = tf.keras.models.load_model(f"./checkpoints/BCE.keras")

In [6]:
# Target path with audio files you'd like to perform active learning over
audio_to_label_path = "./data/"
# Instantiate the audio database
audio_db = db.Audio_DB()
read_from_file = False
if read_from_file:
  audio_db.load_db("audio_embeddings.parquet")


In [7]:
# Embed the data or load embeddings

files = Path(audio_to_label_path).glob("**/*.wav")
files = [str(f) for f in files]
if read_from_file:
  pass
  #...load embeddings...
else:
  embeddings = list(map(lambda f: u.load_and_preprocess(f, e_model),  files))



I0000 00:00:1741991752.908480  372174 service.cc:152] XLA service 0x47947860 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1741991752.908498  372174 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 4090, Compute Capability 8.9
I0000 00:00:1741991752.908500  372174 service.cc:160]   StreamExecutor device (1): NVIDIA GeForce RTX 4090, Compute Capability 8.9
2025-03-14 15:35:53.038218: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-03-14 15:35:53.044768: W tensorflow/compiler/tf2xla/kernels/assert_op.cc:39] Ignoring Assert operator jax2tf_infer_fn_/assert_equal_1/Assert/AssertGuard/Assert
I0000 00:00:1741991753.433433  372174 cuda_dnn.cc:529] Loaded cuDNN version 90300
















I0000 00:00:1741991759.569287  372174 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once fo

lorem

In [9]:
# Populate the database
for i, embedding in enumerate(embeddings):
  n_row = embedding.shape[0]
  audio = u.load_audio(files[i])
  clip_start = 0 
  for n in range(n_row):
    audio_db.add_clip_row(
      file_name = files[i].split("/")[-1].split(".")[0],
      file_path = files[i],
      duration_sec = len(audio)/cfg.SR,
      clip_start = clip_start,
      clip_end = clip_start + 5, 
      sampling_rate=cfg.SR,
    )
    clip_start += 5

preds = u.get_classifier_predictions(embeddings, classifier_model)
audio_db.populate_scores(preds)

In [10]:

audio_db.df.head()

file_name,file_path,duration_sec,clip_start,clip_end,sampling_rate,score,annotation,created_at
str,str,f32,f32,f32,i32,f32,i32,datetime[μs]
"""2024-07-10_07_31""","""data/2024-07-10_07_31.wav""",61.21653,0.0,5.0,32000,9.7632e-17,4,2025-03-14 15:36:51.168604
"""2024-07-10_07_31""","""data/2024-07-10_07_31.wav""",61.21653,5.0,10.0,32000,0.000183,4,2025-03-14 15:36:51.172510
"""2024-07-10_07_31""","""data/2024-07-10_07_31.wav""",61.21653,10.0,15.0,32000,1.0,4,2025-03-14 15:36:51.172783
"""2024-07-10_07_31""","""data/2024-07-10_07_31.wav""",61.21653,15.0,20.0,32000,2.3387e-15,4,2025-03-14 15:36:51.172954
"""2024-07-10_07_31""","""data/2024-07-10_07_31.wav""",61.21653,20.0,25.0,32000,6.2512e-16,4,2025-03-14 15:36:51.173130


In [11]:
from modules import display as dis
importlib.reload(dis)

<module 'modules.display' from '/home/matt/pCloudDrive/PROJECTS/Bioacoustic_Active_Learning/modules/display.py'>

In [12]:
dis.annotate(audio_db)

VBox(children=(HBox(children=(HTML(value="<h2 style='margin-bottom: 5px;'>Audio Clip Annotation Tool</h2>"), H…

In [36]:
test =audio_db.df

In [13]:
audio_db.df.write_csv("output.csv")