### Threshold Noise from large WAVs
- data are linked at https://figshare.com/articles/BirdsongRecognition/3470165
- Part of the function of this notebook is to segment bouts of song from silence, when you have a microphone continuously recording from a bird
- The script reads each wav file, finds periods of increased noise, and segments it out as a bout
  - There is a also some basic filtering to ignore noise wavs in which birds are shuffling around, wing noises, etc.
  - Params need to be uniquely set to your birds

In [1]:
import numpy as np
import sys
import os
from tqdm import tqdm_notebook as tqdm
from glob import glob
import re 
from datetime import datetime, timedelta
import pandas as pd 
import os
from sklearn.externals.joblib import Parallel, delayed
%matplotlib inline

%load_ext autoreload
%autoreload 2

In [2]:
# import local methods from the source code
from avgn.segment_song.preprocessing import *
import avgn.segment_song.preprocessing as pp

In [3]:
# Where to put the final HDF5 files
#Its recommended to use absolute path in the output location
output_location = '../data/BF_wavs/'

### Find Each WAV, and the bird attached to the wav

In [4]:
#Add in the input_loc, the absolute path of the bird data.
input_loc = '/mnt/cube/Datasets/BengaleseFinch/Koumura_Okanoya-paper_data/'

In [5]:
dsets = [(input_loc+'Bird*/Wave/*.wav', 'BF')]

In [6]:
dsets

[('/mnt/cube/Datasets/BengaleseFinch/Koumura_Okanoya-paper_data/Bird*/Wave/*.wav',
  'BF')]

In [7]:
wav_list = np.array([])
dset_list = np.array([])
for search_directory, dset in tqdm(dsets):
    new_wavs = np.array(glob(search_directory))
    dset_list = np.append(dset_list, [dset for i in range(len(new_wavs))])
    wav_list = np.append(wav_list,new_wavs)
print(wav_list[0], len(wav_list))

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))


/mnt/cube/Datasets/BengaleseFinch/Koumura_Okanoya-paper_data/Bird4/Wave/412.wav 2965


In [8]:
bird_names = [i.split('/')[6] for i in wav_list]
print(np.unique(bird_names))

['Bird0' 'Bird1' 'Bird10' 'Bird2' 'Bird3' 'Bird4' 'Bird5' 'Bird6' 'Bird7'
 'Bird8' 'Bird9']


#### Extract times for bird / create dataframe of wavs

In [9]:
import xml.etree.ElementTree

In [10]:
bird_xml_locs = glob(input_loc+'/*/Annotation.xml')
bird_xml_locs[:2]

['/mnt/cube/Datasets/BengaleseFinch/Koumura_Okanoya-paper_data/Bird4/Annotation.xml',
 '/mnt/cube/Datasets/BengaleseFinch/Koumura_Okanoya-paper_data/Bird3/Annotation.xml']

#### this part requires some custom parsing of an XML file to get wav time info
- as far as I can tell specific datetime information is not available in the XML

In [11]:
song_df = pd.DataFrame(columns=['bird','WavLoc', 'WaveFileName','Position','Length', 'NumNote', 'NotePositions', 'NoteLengths', 'NoteLabels'])

for bird_loc in tqdm(bird_xml_locs):
    bird_xml = xml.etree.ElementTree.parse(bird_loc).getroot()
    bird = bird_loc.split('/')[-2]
    for element in tqdm(bird_xml.getchildren(), leave=False):
        if element.tag == 'Sequence':
            notePositions = []
            noteLengths = []
            noteLabels = []
            for seq_element in element.getchildren():
                if seq_element.tag == 'Position': position = seq_element.text
                elif seq_element.tag == 'Length': length = seq_element.text
                elif seq_element.tag == 'WaveFileName': WaveFileName = seq_element.text
                elif seq_element.tag == 'NumNote': NumNote = seq_element.text
                elif seq_element.tag == 'Note':
                    for note_element in seq_element.getchildren():
                        if note_element.tag == 'Label': noteLabels.append(note_element.text)
                        elif note_element.tag == 'Position': notePositions.append(note_element.text)
                        elif note_element.tag == 'Length': noteLengths.append(note_element.text)
            song_df.loc[len(song_df)] = [bird, input_loc+bird+'/Wave/'+WaveFileName, WaveFileName, position, length, NumNote, notePositions, noteLengths, noteLabels]

HBox(children=(IntProgress(value=0, max=11), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1964), HTML(value='')))

HBox(children=(IntProgress(value=0, max=2110), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1351), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1486), HTML(value='')))

HBox(children=(IntProgress(value=0, max=412), HTML(value='')))

HBox(children=(IntProgress(value=0, max=572), HTML(value='')))

HBox(children=(IntProgress(value=0, max=419), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1854), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1495), HTML(value='')))

HBox(children=(IntProgress(value=0, max=2501), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1238), HTML(value='')))




In [12]:
# we cant parse anything about datetime from these wavs so we just make something up
wav_times = []
wav_loc = wav_list[0]
n_no_date = 0
for wav_file in wav_list:
        # default give up method
        dt = datetime(1900, 1, 1, 0, 0) + timedelta(hours=n_no_date)
        n_no_date+=1
        wav_times.append(dt)    
wav_times = np.array(wav_times)

In [13]:
wav_times[:3]

array([datetime.datetime(1900, 1, 1, 0, 0),
       datetime.datetime(1900, 1, 1, 1, 0),
       datetime.datetime(1900, 1, 1, 2, 0)], dtype=object)

In [14]:
# Make a pandas dataframe corresponding to the files and datetimes
wav_df = pd.DataFrame.from_dict({'filename':wav_list,
                                'wav_time': wav_times,
                                'dset': dset_list,
                                'birdname': bird_names})
wav_df[:3]

Unnamed: 0,birdname,dset,filename,wav_time
0,Bird4,BF,/mnt/cube/Datasets/BengaleseFinch/Koumura_Okan...,1900-01-01 00:00:00
1,Bird4,BF,/mnt/cube/Datasets/BengaleseFinch/Koumura_Okan...,1900-01-01 01:00:00
2,Bird4,BF,/mnt/cube/Datasets/BengaleseFinch/Koumura_Okan...,1900-01-01 02:00:00


### Parameters for processing vocalizations
- ***These will almost certainly need to be set based upon species and the quality of your vocalizations***

In [15]:
param_dict = {}
param_dict['BF'] = {
    ### Parameters ###
    'lowcut': 50, # Hz # Low cut for our butter bandpass filter
    'highcut': 15000, # Hz # High cut for our butter bandpass filter

    'rms_window':  .01, # seconds # the size of your window
    'rms_stride': .01, # seconds # how big your step size should be for moving the filter
    'noise_thresh': .01, # threshold percent of maximum noise to consider silence
    'segment_padding': 4.0, # seconds to pad waveform extracted
    'rms_padding': 1.0, #5.0, # seconds # how much to pad around vocalizations
   
    # filtering
    'min_amp_val': 1000, # the minimum value of a wav's amplitude to be considered containing any sound
    'min_segment_length_s': 10., # How long a bout has to be to count
    'max_segment_length_s': 200.,  # If a bout is too long, dont count it
    'min_silence_pct': 0.05,  # measure of noise in wav, by threshing the pct of time that the wav is silent

    # FFT (we create a spectrogram here to filter out noise)
    'num_freq':1024, # how many channels to use in a spectrogram 
    'sample_rate':44100, # what rate are your WAVs sampled at?
    'preemphasis':0.97, 
    'ref_level_db':20, # reference db for computing spec
    'frame_shift_ms':2, # step size for fft
    'min_level_db': -50,# threshold for spectrograms (lower filters out more noise)
    'max_power_f_min': 1000,# (HZ) If the maximum power of the spectral envelope is below this, call noise
    'frame_shift_ms':40, # step size for fft
    'frame_length_ms':40, # frame length for fft

    # # Filter based upon power-frequency envelope
    'vocal_freq_min' : 700,
    'vocal_freq_max' : 15000
    }

In [16]:
# print the number of wav files found for each individual
for (dset, bird), group in wav_df.groupby(('dset', 'birdname')):
    print(dset, bird, len(group))

BF Bird0 135
BF Bird1 315
BF Bird10 94
BF Bird2 339
BF Bird3 402
BF Bird4 441
BF Bird5 335
BF Bird6 235
BF Bird7 310
BF Bird8 142
BF Bird9 217


#### Parameters for debugging / speed of preprocessing

In [17]:
skip_created = True # whether to skip song that has already been processed
parallel = False # whether to run this algorithm in parallel (across wav files)
visualize = False # whether to output visualizations of spectrograms to the notebook screen - this is useful for setting parameters - you may also want to edit the code to visualized other aspects of the algorithm
n_parallel = 10 # How many threads to run in parallel (if parallel == True)
verbosity = 1 # how verbose to make the output of the parallelization (higher = more, 0 = none, >50 output is sent to std.out)
verbose=False

In [18]:
# Where to save output wavs
save_to_folder = '../../../data/bf_wav/' 
save_spectrograms = False # whether or not to save spectrogram PNGs to the save_to_folder to visually inspect whether the song segmentation algorithm works

In [19]:
#from soundsig.sound import BioSound 
#from soundsig.sound import WavFile

In [20]:
# loop through all wavs, 
try:
    key_list = ('wav_list', 'time_index', 'wav_file', 'wav_time', 'rate')
    for (dset, bird), group in wav_df.groupby(('dset', 'birdname')):   

        print('processing %s to save at %s' % (bird, save_to_folder))
        bird_data = {key : [] for key in key_list}

        print('total wavs: ', len(group)) 

        # Create a spot to save the data
        bird_folder = save_to_folder+bird+'/'
        if not os.path.exists(bird_folder+'wavs/'):
            os.makedirs(bird_folder+'wavs/') 
        if not os.path.exists(bird_folder+'csv/'):
            os.makedirs(bird_folder+'csv/') 

        #print(asdfads)
        if parallel:
            with Parallel(n_jobs=n_parallel, verbose=verbosity) as parallel:
                parallel(delayed(pp.process_bird_wav)(bird, filename, wav_time, param_dict[dset],save_to_folder,
                                                      visualize= visualize, skip_created= skip_created,
                                                      save_spectrograms= save_spectrograms, verbose=verbose) 
                                                      for idx, gbird, gdset, filename, wav_time in tqdm(group.itertuples(),total=len(group)))
        else:
            for idx, gbird, gdset, filename, wav_time in tqdm(group.itertuples(), total=len(group)):
                process_bird_wav(bird, filename, wav_time, param_dict[dset],save_to_folder, visualize=visualize,
                                 skip_created=skip_created, save_spectrograms= save_spectrograms, verbose=verbose) 
except KeyboardInterrupt:
    print('interrrupted')

processing Bird0 to save at ../../../data/bf_wav/
total wavs:  135


HBox(children=(IntProgress(value=0, max=135), HTML(value='')))


processing Bird1 to save at ../../../data/bf_wav/
total wavs:  315


HBox(children=(IntProgress(value=0, max=315), HTML(value='')))


processing Bird10 to save at ../../../data/bf_wav/
total wavs:  94


HBox(children=(IntProgress(value=0, max=94), HTML(value='')))


processing Bird2 to save at ../../../data/bf_wav/
total wavs:  339


HBox(children=(IntProgress(value=0, max=339), HTML(value='')))


processing Bird3 to save at ../../../data/bf_wav/
total wavs:  402


HBox(children=(IntProgress(value=0, max=402), HTML(value='')))


processing Bird4 to save at ../../../data/bf_wav/
total wavs:  441


HBox(children=(IntProgress(value=0, max=441), HTML(value='')))


processing Bird5 to save at ../../../data/bf_wav/
total wavs:  335


HBox(children=(IntProgress(value=0, max=335), HTML(value='')))


processing Bird6 to save at ../../../data/bf_wav/
total wavs:  235


HBox(children=(IntProgress(value=0, max=235), HTML(value='')))


processing Bird7 to save at ../../../data/bf_wav/
total wavs:  310


HBox(children=(IntProgress(value=0, max=310), HTML(value='')))


processing Bird8 to save at ../../../data/bf_wav/
total wavs:  142


HBox(children=(IntProgress(value=0, max=142), HTML(value='')))


processing Bird9 to save at ../../../data/bf_wav/
total wavs:  217


HBox(children=(IntProgress(value=0, max=217), HTML(value='')))


