# This notebook aims to identify sections of audio containing target vocalization, and sections which do not contain the target vocalization. 

__proxy for target vocalisation:__ 
- Audio from a section of a recording contianing target vocalizaition, and taken from within the same timestamp as the tag. 
- Audio from a recording with tagging method 'no restrictions' AND taken from within tag timestamp of the target species. 

__proxys for NOT target vocalization__
- Audio from a recording with tagging method '1SPM' AND there is no target species tag in the recording
- Audio from a recording with tagging method '1SPM' AND there is an target species tag in the recording AND the sample is taken from before the start of the target species tag. 
- Audio from a recording with tagging method 'no restrictions' AND taken from inbetween tags of the target species. 

In [1]:
import sys
from pathlib import Path

BASE_PATH = Path.cwd().parents[1]
sys.path.append(str(BASE_PATH / "src" / "data"))  # for clean_csv
sys.path.append(str(BASE_PATH / "src"))  # for utils

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import clean_csv
import train_test_split
from utils import *

In [2]:
%load_ext autoreload
%autoreload 2

In [16]:
df = pd.read_pickle(BASE_PATH / "data"/"processed" / "train_set" / "train_set.pkl")
df_lite = df[keep_cols]

In [24]:
df.task_method.value_counts(dropna=False)

task_method
1SPT                        201904
1SPM                        178249
NaN                          50291
1SPM Audio/Visual hybrid      2530
Name: count, dtype: int64

### Get info on the individual recordings in the training set. Look at the tags associated with these recordings. 


In [5]:
unique_recordings = df.recording_id.unique()

df.loc[df.recording_id == unique_recordings[0]].species_code.value_counts()

species_code
OVEN    106
LINO     42
MOWI     36
LIWI      3
Name: count, dtype: int64

In [6]:
df.loc[df.recording_id == 3557]

Unnamed: 0.1,Unnamed: 0,organization,project,project_id,location,location_id,recording_date_time,recording_id,task_method,task_id,...,spectrogram_url,clip_url,sensorId,tasks,status,recording_url,latitude,longitude,location_buffer_m,file_type
124456,1016821,BU,Boreal Wetland Community Monitoring,41,Y-7-253-NE,301,2018-07-04 02:00:00,3557,1SPM,6290,...,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,ARU,20082,Published - Public,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,57.480754,-111.200676,,mp3
124457,1016823,BU,Boreal Wetland Community Monitoring,41,Y-7-253-NE,301,2018-07-04 02:00:00,3557,1SPM,6290,...,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,ARU,20082,Published - Public,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,57.480754,-111.200676,,mp3
124458,1016827,BU,Boreal Wetland Community Monitoring,41,Y-7-253-NE,301,2018-07-04 02:00:00,3557,1SPM,6290,...,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,ARU,20082,Published - Public,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,57.480754,-111.200676,,mp3


In [7]:
df.groupby("recording_id").species_code.value_counts()

recording_id  species_code
3557          LIDT            3
3560          NONE            2
              LCSP            1
3562          LIDT            3
              LINO            3
                             ..
826382        WTSP            2
              TEWA            1
826383        WTSP            3
829015        OSFL            1
              WTSP            1
Name: count, Length: 227820, dtype: int64

In [8]:
osfls = df.loc[df.species_code == "OSFL"]
osfl_recordings = osfls.recording_id.unique()

In [9]:
osfls

Unnamed: 0.1,Unnamed: 0,organization,project,project_id,location,location_id,recording_date_time,recording_id,task_method,task_id,...,spectrogram_url,clip_url,sensorId,tasks,status,recording_url,latitude,longitude,location_buffer_m,file_type
2566,14827,BU,Bayne-OSFL-BU-AnyYr 2021,787,OSFL-AB-1-1,98531,2016-07-01 04:57:55,255412,,231598,...,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,ARU,365,Published - Private,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,57.523776,-111.418890,,mp3
2595,14885,BU,Bayne-OSFL-BU-AnyYr 2021,787,OSFL-AB-15-2,98499,2017-06-19 16:00:00,255940,,232126,...,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,ARU,365,Published - Private,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,57.481493,-111.416121,,flac
2596,14888,BU,Bayne-OSFL-BU-AnyYr 2021,787,OSFL-AB-15-2,98499,2017-06-29 13:00:00,255949,,232135,...,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,ARU,365,Published - Private,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,57.481493,-111.416121,,flac
2597,14889,BU,Bayne-OSFL-BU-AnyYr 2021,787,OSFL-AB-15-2,98499,2017-06-29 13:00:00,255949,,232135,...,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,ARU,365,Published - Private,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,57.481493,-111.416121,,flac
2598,14898,BU,Bayne-OSFL-BU-AnyYr 2021,787,OSFL-AB-18-1,98497,2017-06-20 04:00:00,255967,,232153,...,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,ARU,365,Published - Private,https://wildtrax-aru.s3.us-west-2.amazonaws.co...,57.482156,-111.435525,,flac
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
537872,2360371,CWS-NOR,YT High Elevation Monitoring Program CWS North...,828,YTHEMP-YSL-259270-H3,180758,2021-06-08 07:45:00,280093,1SPT,256831,...,https://wildtrax-aru-ca.s3.ca-central-1.amazon...,https://wildtrax-aru-ca.s3.ca-central-1.amazon...,ARU,1128,Published - Public,https://wildtrax-aru-ca.s3.ca-central-1.amazon...,60.595357,-135.069651,,mp3
538266,2360948,CWS-NOR,YT High Elevation Monitoring Program CWS North...,828,YTHEMP-YSL-261611-H2,180780,2021-06-15 04:45:00,282687,1SPT,261611,...,https://wildtrax-aru-ca.s3.ca-central-1.amazon...,https://wildtrax-aru-ca.s3.ca-central-1.amazon...,ARU,1128,Published - Public,https://wildtrax-aru-ca.s3.ca-central-1.amazon...,60.637168,-135.264948,,mp3
538282,2360967,CWS-NOR,YT High Elevation Monitoring Program CWS North...,828,YTHEMP-YSL-261611-H2,180780,2021-06-26 06:15:00,280349,1SPT,257087,...,https://wildtrax-aru-ca.s3.ca-central-1.amazon...,https://wildtrax-aru-ca.s3.ca-central-1.amazon...,ARU,1128,Published - Public,https://wildtrax-aru-ca.s3.ca-central-1.amazon...,60.637168,-135.264948,,mp3
538811,2361652,CWS-NOR,YT High Elevation Monitoring Program CWS North...,828,YTHEMP-YSL-266936-S2,181109,2021-06-19 07:15:00,282767,1SPT,261691,...,https://wildtrax-aru-ca.s3.ca-central-1.amazon...,https://wildtrax-aru-ca.s3.ca-central-1.amazon...,ARU,1128,Published - Public,https://wildtrax-aru-ca.s3.ca-central-1.amazon...,60.763676,-135.518418,,mp3
