# Agile Modeling for Bioacoustics.

This notebook provides a workflow for creating custom classifiers for target signals, by first **searching** for training data, and then engaging in an **active learning** loop.

We assume that embeddings have been pre-computed using `embed.ipynb`.

## Configuration and Imports.

In [None]:
#@title Installation. { vertical-output: true }
#@markdown Run this notebook in Google Colab by following [this link](https://colab.research.google.com/github/google-research/perch/blob/main/agile_modeling.ipynb).
#@markdown
#@markdown Run this cell to install the project dependencies.
%pip install git+https://github.com/google-research/perch.git


In [None]:
 #@title Imports. { vertical-output: true }

from etils import epath

from chirp.inference.notebook_helpers.agile_modeling_helpers import AgileModelingConfig, AgileModeling


In [None]:
#@title Basic Configuration. { vertical-output: true }

working_dir = '/tmp/agile' #@param {type:'string'}

config = AgileModelingConfig(

  #@markdown Choose what data to work with.
  #@markdown * For local data (most cases), choose 'filesystem'.
  #@markdown * For Australian Acoustic Observatory, select 'a2o'.
  #@markdown This will cause many options (like model_choice) to be overridden.
  #@markdown Note that you will need an Authentication Token from:
  #@markdown https://data.acousticobservatory.org/my_account
  data_source = 'filesystem', #@param['filesystem', 'a2o']
  a2o_auth_token = '', #@param {type:'string'}

  #@markdown Define the model: Usually perch or birdnet.
  model_choice = 'perch',  #@param {type:'string'}
  #@markdown Set the base directory for the project.
  working_dir = working_dir,  

  #@markdown Set the embedding and labeled data directories.
  labeled_data_path = epath.Path(working_dir) / 'labeled',
  custom_classifier_path = epath.Path(working_dir) / 'custom_classifier',

  #@markdown The embeddings_path should be detected automatically, but can be
  #@markdown overridden.
  embeddings_path = '',

  #@markdown OPTIONAL: Set up separation model.
  separation_model_key = 'separator_model_tf',  #@param {type:'string'}
  separation_model_path = ''  #@param {type:'string'}

)


In [None]:
#@title Load Project State and Models. { vertical-output: true }

# this initializes the project state based on the configuration
# associated with the embeddings
agile_modelling = AgileModeling(config)

## Search Embeddings

### Query Creation

In [None]:
#@title Load query audio. { vertical-output: true }

#@markdown You may specify:
#@markdown * an audio filepath (like `/home/me/audio/example.wav`),
#@markdown * a Xeno-Canto id (like `xc12345`), or
#@markdown * an audio file URL (like
#@markdown https://upload.wikimedia.org/wikipedia/commons/7/7c/Turdus_merula_2.ogg).
audio_path = 'xc692557'  #@param
#@markdown Choose the start time for the audio window within the file.
#@markdown We will focus on the model's `window_size_s` seconds of audio,
#@markdown starting from `start_s` seconds into the file.
start_s = 0  #@param

agile_modelling.load_query_audio(audio_path)

In [None]:
#@title Select the query channel. { vertical-output: true }

#@markdown Choose a name for the class.
query_label = 'my_class'  #@param
#@markdown If you have applied separation, choose a channel.
#@markdown Ignored if no separation model is being used.
query_channel = 0  #@param

search_query = agile_modelling.get_query_embedding(query_channel = query_channel)


### Execute Search

In [None]:
#@title Run Top-K Search. { vertical-output: true }

#@markdown Number of search results to capture.
top_k = 50  #@param

#@markdown Target distance for search results.
#@markdown This lets us try to hone in on a 'classifier boundary' instead of
#@markdown just looking at the closest matches.
#@markdown Set to 'None' for raw 'best results' search.
target_score = None  #@param

metric = 'mip'  #@param['euclidean', 'mip', 'cosine']

random_sample = False  #@param

agile_modelling.do_search(search_query, 
                          top_k=top_k, target_score=target_score, 
                          metric=metric, random_sample=random_sample)

agile_modelling.plot_histogram_of_distances(target_score=target_score)

In [None]:
#@title Display results. { vertical-output: true }

samples_per_page = 25
agile_modelling.display_search_results(query_label, samples_per_page=samples_per_page)

In [None]:
#@title Write annotated examples. { vertical-output: true }

agile_modelling.save_validated_results()

## Active Learning for a Target Class

In [None]:
# @title Load+Embed the Labeled Dataset. { vertical-output: true }

#@markdown Time-pooling strategy for audio longer than the model's window size.
time_pooling = 'mean'  # @param

# This will load the labeled data from a folder of folders and generate embeddings for it.
agile_modelling.embed_labelled_set(time_pooling=time_pooling)

In [None]:
#@title Train small model over embeddings. { vertical-output: true }

metrics = agile_modelling.train_custom_classifier(
  #@markdown Number of random training examples to choose form each class.
  #@markdown Set exactly one of `train_ratio` and `train_examples_per_class`.
  train_ratio = 0.9,  #@param
  train_examples_per_class = None,  #@param

  #@markdown Number of random re-trainings. Allows judging model stability.
  num_seeds = 3,  #@param

  # Classifier training hyperparams.
  # These should be good defaults.
  batch_size = 32,
  num_epochs = 128,
  num_hiddens = -1,
  learning_rate = 1e-3,
)

In [None]:
#@title Run model on target unlabeled data. { vertical-output: true }

#@markdown Choose the target class to work with.
target_class = 'my_class'  #@param
#@markdown Choose a target logit; will display results close to the target.
#@markdown Set to None to get the highest-logit examples.
target_logit = 0.0  #@param
#@markdown Number of results to display.
num_results = 50  #@param

agile_modelling.do_search_with_model(target_class=target_class, 
                                     target_logit=target_logit, 
                                     num_results=num_results)

agile_modelling.plot_histogram_of_distances(target_score=target_logit)

In [None]:
#@title Display results for the target label. { vertical-output: true }

agile_modelling.display_search_results(target_class)

In [None]:
#@title Add selected results to the labeled data. { vertical-output: true }

agile_modelling.save_validated_results()

In [None]:
#@title Save the Custom Classifier. { vertical-output: true }

agile_modelling.save_model()

## Inference

In [None]:
#@title Write classifier inference CSV. { vertical-output: true }

#@markdown This cell writes detections (locations of audio windows where
#@markdown the logit was greater than a threshold) to a CSV file.

output_filepath = epath.Path(config.working_dir) / 'inference.csv'  #@param

#@markdown Set the default detection thresholds, used for all classes.
#@markdown To set per-class detection thresholds, modify the code below.
#@markdown Keep in mind that thresholds are on the logit scale, so 0.0
#@markdown corresponds to a 50% model confidence.
default_threshold = 0.0  #@param

# Add any per-class thresholds here.
class_thresholds = {'my_class': 1.0}  #@param

#@markdown Classes to ignore when counting detections.
exclude_classes = ['unknown']  #@param

#@markdown The `include_classes` list is ignored if empty.
#@markdown If non-empty, only scores for these classes will be written.
include_classes = []  #@param

agile_modelling.run_inference(output_filepath = output_filepath,
                    default_threshold = default_threshold, 
                    class_thresholds = class_thresholds, 
                    include_classes = include_classes,
                    exclude_classes = exclude_classes)
