# Create a process to download audio from outside of the habitat of the target species
- this can be done by adding an option to build.py
- currently the audio all comes from the osfl habitat which has the advantage that the main differences between the present and absent class is the bird call itself, rather than the presence or absence of environmental noise. 
- In order to get a fairer picture of how the model will perform in production, it needs to be tested on audio from outside the habitat of the target species to see how many false positives get thrown up. 
- It might also be beneficial to train the model on these clips to give a wider set of examples from the negative class. 

In [1]:
# imports
from pathlib import Path
import sys
import pandas as pd
BASE_PATH = Path.cwd().parent.parent
sys.path.append(str(BASE_PATH))
from src.utils import display_all, plot_locations, keep_cols
import src.data
from src.data import build

  from tqdm.autonotebook import tqdm


# load the cleaned metadata


In [2]:
data_path = Path.cwd().parent.parent / "data"
train_df_path = Path.cwd().parent.parent / "data" / "interim" 
df_train: pd.DataFrame = pd.read_pickle(train_df_path / "cleaned_metadata.pkl")
df_train.head(2)

Unnamed: 0,organization,project,project_id,location,location_id,recording_date_time,recording_id,task_method,task_id,aru_task_status,...,spectrogram_url,clip_url,sensorId,tasks,status,recording_url,latitude,longitude,location_buffer_m,file_type
1623,BU,Alberta Archetypes,1501,P-E0-1-10,308678,2022-06-05 06:51:00,416962,no_restrictions,596169,Transcribed,...,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,ARU,357,Active,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,52.64404,-115.14051,,flac
1752,BU,Amplitude Quality Testing 2020,293,AM-403-SE2,36043,2017-06-15 04:46:00,92051,no_restrictions,87956,Transcribed,...,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,ARU,174,Published - Private,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,54.607774,-110.681271,,flac


# Download some recordings from other habitats and save them to disk

In [29]:
other_habitats = build.other_habitat_df(df_train, "OSFL", 1)

47223 not downloaded
downloading 1 clips
https://wildtrax-aru.s3.us-west-2.amazonaws.com/6998d3be-f4bd-49ae-b8f4-c0133c64b1c2/371072.flac
skipped 0 previously downloaded files


In [30]:
other_habitats.head()

Unnamed: 0_level_0,recording_url,task_method,project,detection_time,tag_duration,latitude,longitude,file_type,media_url,individual_order,location_id,filename
relative_path,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
../../data/raw/recordings/OSFL_other_habitats/recording-3569.mp3,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,1SPM,Boreal Wetland Community Monitoring,"[0.0, 2.81, 9.83, 60.01, 60.81, 64.56, 120.04,...","[5.01, 0.39, 0.95, 5.0, 0.3, 1.16, 4.96, 1.05,...",57.45649,-111.183394,mp3,https://portal.wildtrax.ca/home/aru-tasks/reco...,1.0,333,recording-3569.mp3
../../data/raw/recordings/OSFL_other_habitats/recording-3839.flac,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,1SPM,Boreal Wetland Community Monitoring,"[30.19, 36.98, 117.08, 176.43]","[1.99, 1.79, 2.75, 3.54]",57.344772,-111.338258,flac,https://portal.wildtrax.ca/home/aru-tasks/reco...,2.0,372,recording-3839.flac
../../data/raw/recordings/OSFL_other_habitats/recording-3843.mp3,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,1SPM,Boreal Wetland Community Monitoring,"[0.03, 13.28, 60.01, 74.79, 120.01, 135.83]","[5.01, 5.7, 5.0, 5.31, 5.04, 5.8]",57.467946,-111.410932,mp3,https://portal.wildtrax.ca/home/aru-tasks/reco...,1.0,373,recording-3843.mp3
../../data/raw/recordings/OSFL_other_habitats/recording-3869.flac,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,1SPM,Boreal Wetland Community Monitoring,"[0.03, 60.03, 120.06]","[1.49, 1.69, 1.36]",57.339787,-111.347997,flac,https://portal.wildtrax.ca/home/aru-tasks/reco...,1.0,380,recording-3869.flac
../../data/raw/recordings/OSFL_other_habitats/recording-4232.mp3,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,1SPM,Boreal Wetland Community Monitoring,"[0.76, 2.58, 3.41, 4.45, 40.25, 66.88, 72.08, ...","[1.58, 1.53, 1.74, 1.98, 1.28, 1.58, 1.35, 1.9...",57.45649,-111.183394,mp3,https://portal.wildtrax.ca/home/aru-tasks/reco...,2.0,333,recording-4232.mp3


In [31]:
other_habitats["present"] = 0.0
other_habitats = other_habitats[["present"]]

# Split the recordings into 3s chunks with 0.5 overlap and label them "present" = 0

In [38]:
import opensoundscape as opso
from opensoundscape import SpectrogramPreprocessor
other_habitats_ds = opso.AudioSplittingDataset(other_habitats, preprocessor=SpectrogramPreprocessor(3.0), overlap_fraction=0.5)

  clips[label_df.columns] = label_df.loc[f]
  clips[label_df.columns] = label_df.loc[f]
  clips[label_df.columns] = label_df.loc[f]
  clips[label_df.columns] = label_df.loc[f]
  clips[label_df.columns] = label_df.loc[f]
  clips[label_df.columns] = label_df.loc[f]
  clips[label_df.columns] = label_df.loc[f]
  clips[label_df.columns] = label_df.loc[f]
  clips[label_df.columns] = label_df.loc[f]
  clips[label_df.columns] = label_df.loc[f]
  clips[label_df.columns] = label_df.loc[f]
  clips[label_df.columns] = label_df.loc[f]
  clips[label_df.columns] = label_df.loc[f]
  clips[label_df.columns] = label_df.loc[f]
  clips[label_df.columns] = label_df.loc[f]
  clips[label_df.columns] = label_df.loc[f]
  clips[label_df.columns] = label_df.loc[f]
  clips[label_df.columns] = label_df.loc[f]
  clips[label_df.columns] = label_df.loc[f]
  clips[label_df.columns] = label_df.loc[f]
  clips[label_df.columns] = label_df.loc[f]
  clips[label_df.columns] = label_df.loc[f]
  clips[label_df.columns] = labe

Save the labelled dataset to disk ready for cleaning. 

In [40]:
other_habitats_ds.label_df.to_pickle(data_path / "interim" / "train_and_valid_set" / "other_habitats_label_df.pkl")