# Analysis of Bioacoustic Data

This notebook provides tools for analyzing data using a custom classifier (developed with `agile_modeling.ipynb`).

In [None]:
#@title Installation. { vertical-output: true }
#@markdown You will likely need to work with `embed_audio.ipynb` and/or
#@markdown `agile_modeling.ipynb` before working with this notebook.
#@markdown
#@markdown Run this notebook in Google Colab by following
#@markdown [this link](https://colab.research.google.com/github/google-research/perch/blob/main/agile_modeling.ipynb).
#@markdown
#@markdown Run this cell to install the project dependencies.
%pip install git+https://github.com/google-research/perch.git


In [None]:
#@title Imports. { vertical-output: true }

from etils import epath
import numpy as np

from chirp.inference.search import search
from chirp.inference.notebook_helpers.agile_modeling_helpers import AgileModelingConfig, AgileModeling, estimate_call_density, estimate_roc_auc


In [None]:
#@title Basic Configuration. { vertical-output: true }

working_dir = '/tmp/agile' #@param {type:'string'}

config = AgileModelingConfig(

  data_source = 'filesystem',  
  a2o_auth_token = '', 

  #@markdown Define the model: Usually perch or birdnet.
  model_choice = 'perch',
  #@markdown Set the base directory for the project.
  working_dir = '/tmp/agile',

  # Set the embedding and labeled data directories.
  labeled_data_path = epath.Path(working_dir) / 'labeled',
  custom_classifier_path = epath.Path(working_dir) / 'custom_classifier',

  # The embeddings_path should be detected automatically, but can be overridden.
  #embeddings_path = ''

)


In [None]:
#@title Load Existing Project State and Models. { vertical-output: true }

agile_modelling = AgileModeling(config)

In [None]:
#@title Write classifier inference CSV. { vertical-output: true }

#@markdown This cell writes detections (locations of audio windows where
#@markdown the logit was greater than a threshold) to a CSV file.

output_filepath = epath.Path(config.working_dir) / 'inference.csv'  #@param

#@markdown Set the default detection thresholds, used for all classes.
#@markdown To set per-class detection thresholds, modify the code below.
#@markdown Keep in mind that thresholds are on the logit scale, so 0.0
#@markdown corresponds to a 50% model confidence.
default_threshold = 0.0  #@param

# Add any per-class thresholds here.
class_thresholds = {'my_class': 1.0}  #@param

#@markdown Classes to ignore when counting detections.
exclude_classes = ['unknown']  #@param

#@markdown The `include_classes` list is ignored if empty.
#@markdown If non-empty, only scores for these classes will be written.
include_classes = []  #@param

agile_modelling.run_inference(output_filepath = output_filepath,
                    default_threshold = default_threshold, 
                    class_thresholds = class_thresholds, 
                    include_classes = include_classes,
                    exclude_classes = exclude_classes)

## Call Density Estimation

See 'All Thresholds Barred': https://arxiv.org/abs/2402.15360

In [None]:
#@title Validation and Call Density. { vertical-output: true }

target_class = 'my_class'  #@param {type:'string'}

#@markdown Bin bounds for validation. Should be an ordered list, beginning with
#@markdown 0.0 and ending with 1.0.
bounds = [0.0, 0.9, 0.99, 0.999, 1.0]  #@param

#@markdown Number of validation samples per bin.
samples_per_bin = 25  #@param

binned_validation_examples = agile_modelling.prepare_call_density_estimation(target_class: str = target_class, 
                                                                             bounds = bounds, 
                                                                             samples_per_bin = samples_per_bin)


In [None]:
#@title Display Results. { vertical-output: true }

combined_results = binned_validation_examples['combined_results']

samples_per_page = 40  #@param
page_state = display.PageState(
    np.ceil(len(combined_results) / samples_per_page))

display.display_paged_results(
    search.TopKSearchResults(len(combined_results), combined_results),
    page_state, samples_per_page,
    project_state=agile_modelling.project_state,
    embedding_sample_rate=agile_modelling.project_state.embedding_model.sample_rate,
    exclusive_labels=True,
    checkbox_labels=[target_class, f'not {target_class}', 'unsure'],
)

In [None]:
#@title Collate results and write validation log. { vertical-output: true }

validation_log = agile_modelling.write_validation_log(binned_validation_examples)

In [None]:
#@title Estimate Call Density. { vertical-output: true }

estimate_call_density(binned_validation_examples, validation_log)


In [None]:
#@title Naive Estimation of ROC-AUC for target class. { vertical-output: true }
#@markdown Computes ROC-AUC from the validation logs, with bin weighting.
#@markdown ROC-AUC is the overall probability that a random positive example
#@markdown has a higher classifier score than a random negative example.

estimate_roc_auc(binned_validation_examples, validation_log)