In [112]:
import os
import sys
import glob
import numpy as np
import librosa
import librosa.display
import pandas as pd
import matplotlib.pyplot as plt
from scipy.io import wavfile
from aubio import onset
from scipy.signal import butter, lfilter
from multiprocessing import Pool, cpu_count

import signalproc
from birdutils import read_labels

%matplotlib inline

In [146]:
def get_acc(features, labels):
    features['overlap'] = False
    labels['overlap'] = False
    filenames = labels['File Name'].unique()

    for filename in filenames:
        df_label = labels[labels['File Name'] == filename]
        df_feat = features[features['File Name'] == filename]
        for index1, row_label in df_label.iterrows():
            for index2, row_feat in df_feat.iterrows():
                feat_start = row_feat['Time Start']
                feat_end = row_feat['Time End']
                label_start = row_label['Time Start']
                label_end = row_label['Time End']
                overlap = (label_start <= feat_end) and (label_end >= feat_start)
                if overlap:
                    labels.at[index1, 'overlap'] = True
                    features.at[index2, 'overlap'] = True
    return features, labels

In [148]:
def find_onsets(path):
    name = os.path.splitext(os.path.basename(path))[0]
    y, sr = librosa.load(path, sr=16000)
    y = signalproc.bandpass_filter(y, sr, 500, 6000)
    onset_env = librosa.onset.onset_strength(y=y, sr=sr, feature=librosa.feature.melspectrogram,
                                         fmax=8000.0, aggregate=np.median, detrend=False, center=False)
    onset_s = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, units='time', backtrack=True, hop_length=512)
    onset_s = np.unique(onset_s)
    onset_end_s = onset_s + 0.5
    df = pd.DataFrame(data={'File Name': name, 'Time Start': onset_s, 'Time End': onset_end_s})
    return df

In [2]:
bird_calls_labels_path = '/mnt/data/Birdman/sthelena_labels.xls'
samples_dir = '/mnt/data/Birdman/samples/recordings/'
samples_paths = glob.glob(samples_dir + 'STHELENA-02_20140605_200000*.wav')
labels_dict = read_labels(bird_calls_labels_path)
labels = pd.concat(labels_dict, axis=0, ignore_index=True)

In [151]:
with Pool(cpu_count()) as p:
    onset_all = p.map(find_onsets, samples_paths)

In [152]:
onsets_df = pd.concat(onset_all)
print(len(onsets_df))

2256


In [144]:

    # return sum(labels['overlap']) / len(labels)

In [145]:
feat, lab = get_acc(onsets_df, labels)
sum(lab['overlap']) / len(lab)

0.9974226804123711

In [125]:
lab.head(10)

Unnamed: 0,Date,File Name,Type of Call,Time Start,Time End,Species,Notes,overlap
0,2014-06-05,STHELENA-02_20140605_200000_1,Flight Call,2.905,4.197,Storm Petrel,,True
1,2014-06-05,STHELENA-02_20140605_200000_1,Flight Call,10.008,11.622,Storm Petrel,,True
2,2014-06-05,STHELENA-02_20140605_200000_1,Flight Call,14.527,16.464,Storm Petrel,,True
3,2014-06-05,STHELENA-02_20140605_200000_1,Flight Call,17.11,18.724,Storm Petrel,,True
4,2014-06-05,STHELENA-02_20140605_200000_1,Flight Call,20.338,21.307,Storm Petrel,,True
5,2014-06-05,STHELENA-02_20140605_200000_1,Flight Call,34.22,34.866,Storm Petrel,,False
6,2014-06-05,STHELENA-02_20140605_200000_1,Flight Call,47.134,52.944,Storm Petrel,,True
7,2014-06-05,STHELENA-02_20140605_200000_1,Flight Call,60.692,61.984,Storm Petrel,,False
8,2014-06-05,STHELENA-02_20140605_200000_1,Flight Call,64.889,66.826,Storm Petrel,,True
9,2014-06-05,STHELENA-02_20140605_200000_1,Flight Call,67.472,68.763,Storm Petrel,,False


In [127]:
feat[feat['File Name'] == 'STHELENA-02_20140605_200000_1'].head(40)

Unnamed: 0,File Name,Time Start,Time End,overlap
0,STHELENA-02_20140605_200000_1,2.848,3.348,True
1,STHELENA-02_20140605_200000_1,2.976,3.476,True
2,STHELENA-02_20140605_200000_1,3.168,3.668,True
3,STHELENA-02_20140605_200000_1,3.296,3.796,True
4,STHELENA-02_20140605_200000_1,3.424,3.924,True
5,STHELENA-02_20140605_200000_1,3.552,4.052,True
6,STHELENA-02_20140605_200000_1,3.744,4.244,True
7,STHELENA-02_20140605_200000_1,3.936,4.436,True
8,STHELENA-02_20140605_200000_1,4.16,4.66,True
9,STHELENA-02_20140605_200000_1,8.384,8.884,True


In [130]:
sum(feat['overlap'])/len(feat)

0.9922288943836101

In [111]:
len(onsets_df)

2067

In [3]:
samples_paths

['/mnt/data/Birdman/samples/recordings/STHELENA-02_20140605_200000_11.wav',
 '/mnt/data/Birdman/samples/recordings/STHELENA-02_20140605_200000_5.wav',
 '/mnt/data/Birdman/samples/recordings/STHELENA-02_20140605_200000_3.wav',
 '/mnt/data/Birdman/samples/recordings/STHELENA-02_20140605_200000_10.wav',
 '/mnt/data/Birdman/samples/recordings/STHELENA-02_20140605_200000_2.wav',
 '/mnt/data/Birdman/samples/recordings/STHELENA-02_20140605_200000_7.wav',
 '/mnt/data/Birdman/samples/recordings/STHELENA-02_20140605_200000_13.wav',
 '/mnt/data/Birdman/samples/recordings/STHELENA-02_20140605_200000_8.wav',
 '/mnt/data/Birdman/samples/recordings/STHELENA-02_20140605_200000_9.wav',
 '/mnt/data/Birdman/samples/recordings/STHELENA-02_20140605_200000_6.wav',
 '/mnt/data/Birdman/samples/recordings/STHELENA-02_20140605_200000_1.wav',
 '/mnt/data/Birdman/samples/recordings/STHELENA-02_20140605_200000_4.wav',
 '/mnt/data/Birdman/samples/recordings/STHELENA-02_20140605_200000_12.wav']

In [83]:
y, sr = librosa.load('/mnt/data/Birdman/samples/recordings/STHELENA-02_20140605_200000_1.wav', sr=16000, duration=20)

In [84]:
onset_env = librosa.onset.onset_strength(y=y, sr=sr, feature=librosa.feature.melspectrogram,
                                         fmax=8000.0, aggregate=np.mean, detrend=False, center=False)
onset_s = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, units='time', backtrack=True)
pd.DataFrame(data=[])

In [85]:
onset_s + 0.5

array([ 3.348,  3.476,  3.476,  3.476,  3.668,  3.668,  3.796,  3.796,
        3.924,  4.052,  4.244,  4.34 ,  4.436,  4.66 ,  5.428,  8.884,
        8.884,  8.884,  9.012,  9.012,  9.172,  9.172,  9.364,  9.364,
        9.524,  9.524, 10.42 , 10.516, 10.516, 10.804, 10.804, 10.932,
       10.932, 11.188, 11.188, 11.38 , 11.38 , 11.572, 11.572, 13.94 ,
       15.06 , 15.06 , 15.188, 15.348, 15.54 , 15.732, 15.828, 15.924,
       16.02 , 17.364, 17.78 , 17.876, 18.1  , 18.196, 18.196, 18.42 ,
       18.484, 18.484, 18.58 , 18.676, 18.676])

In [78]:
labels.head()

Unnamed: 0,Date,File Name,Type of Call,Time Start,Time End,Species,Notes
0,2014-06-05,STHELENA-02_20140605_200000_1,Flight Call,2.905,4.197,Storm Petrel,
1,2014-06-05,STHELENA-02_20140605_200000_1,Flight Call,10.008,11.622,Storm Petrel,
2,2014-06-05,STHELENA-02_20140605_200000_1,Flight Call,14.527,16.464,Storm Petrel,
3,2014-06-05,STHELENA-02_20140605_200000_1,Flight Call,17.11,18.724,Storm Petrel,
4,2014-06-05,STHELENA-02_20140605_200000_1,Flight Call,20.338,21.307,Storm Petrel,


In [74]:

onset_s

array([ 2.848,  2.976,  2.976,  2.976,  3.168,  3.168,  3.296,  3.296,
        3.424,  3.552,  3.744,  3.84 ,  3.936,  4.16 ,  4.928,  8.384,
        8.384,  8.384,  8.512,  8.512,  8.672,  8.672,  8.864,  8.864,
        9.024,  9.024,  9.92 , 10.016, 10.016, 10.304, 10.304, 10.432,
       10.432, 10.688, 10.688, 10.88 , 10.88 , 11.072, 11.072, 13.44 ,
       14.56 , 14.56 , 14.688, 14.848, 15.04 , 15.232, 15.328, 15.424,
       15.52 , 16.864, 17.28 , 17.376, 17.6  , 17.696, 17.696, 17.92 ,
       17.984, 17.984, 18.08 , 18.176, 18.176])

In [58]:
onset = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, units='frames', backtrack=True)
onset_bt = librosa.onset.onset_backtrack(onset, onset_env)

In [60]:
len(onset_bt)

61

In [55]:
onset

array([  2,   2,   9,  29,  38,  40,  49,  51,  56,  60,  64,  70,  72,
        83,  94,  96, 103, 105, 105, 110, 110, 114, 117, 121, 123, 125,
       131, 131, 136, 141, 145, 145, 150, 161, 163, 175, 178, 186, 188,
       201, 207, 216, 222, 222, 231, 243, 250, 255, 264, 268, 271, 273,
       277, 279, 281, 283, 285, 285, 300, 309, 313, 313, 321, 324, 324,
       328, 328, 334, 336, 336, 336, 342, 342, 349, 349, 352, 358, 362,
       366, 374, 385, 393, 400, 410, 413, 417, 420, 426, 439, 439, 443,
       447, 451, 457, 461, 467, 470, 472, 472, 478, 478, 484, 484, 488,
       490, 490, 498, 506, 509, 514, 517, 522, 534, 538, 542, 545, 547,
       549, 552, 555, 559, 562, 565, 567, 567, 571, 580, 591, 597, 600,
       605, 605, 613, 621])

In [49]:
onset_bt

array([ 91,  95,  95,  95, 101, 101, 105, 105, 109, 113, 119, 122, 125,
       132, 156, 264, 264, 264, 268, 268, 273, 273, 279, 279, 284, 284,
       312, 315, 315, 324, 324, 328, 328, 336, 336, 342, 342, 348, 348,
       422, 457, 457, 461, 466, 472, 478, 481, 484, 487, 529, 542, 545,
       552, 555, 555, 562, 564, 564, 567, 570, 570])

In [None]:
times = librosa.frames_to_time(np.arange(len(onset_env)), sr=sr)
times

In [31]:
librosa.frames_to_time(onset, sr=16000)

array([ 2.912,  3.04 ,  3.04 ,  3.04 ,  3.232,  3.232,  3.36 ,  3.36 ,
        3.488,  3.616,  3.808,  3.904,  4.   ,  4.224,  4.992,  8.448,
        8.448,  8.448,  8.576,  8.576,  8.736,  8.736,  8.928,  8.928,
        9.088,  9.088,  9.984, 10.08 , 10.08 , 10.368, 10.368, 10.496,
       10.496, 10.752, 10.752, 10.944, 10.944, 11.136, 11.136, 13.504,
       14.624, 14.624, 14.752, 14.912, 15.104, 15.296, 15.392, 15.488,
       15.584, 16.928, 17.344, 17.44 , 17.664, 17.76 , 17.76 , 17.984,
       18.048, 18.048, 18.144, 18.24 , 18.24 ])

In [None]:
times = librosa.frames_to_time(np.arange(len(onset_env)), sr=sr)
D = np.abs(librosa.stft(y))

In [None]:
onset_times = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, units='time')
onset_times

In [None]:
onset_frames = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, units='frames')


In [None]:
plt.plot(onset_env)

In [None]:
times[onset_frames]

![sss]("http://latex.codecogs.com/gif.latex?%5Ba%3D%5Cfrac%7B%5Cleft(%20%7B%7B2%7D%5E%7B%5Cfrac%7B1%7D%7B3%7D%7D%7D%5Ccdot%20%5Csqrt%7B3%7D%5Ccdot%20i-%7B%7B2%7D%5E%7B%5Cfrac%7B1%7D%7B3%7D%7D%7D%5Cright)%20%5Ccdot%20%7B%7Bk%7D%5E%7B%5Cfrac%7B1%7D%7B3%7D%7D%7D%7D%7B2%7D%2Ca%3D-%5Cfrac%7B%5Cleft(%20%7B%7B2%7D%5E%7B%5Cfrac%7B1%7D%7B3%7D%7D%7D%2B%7B%7B2%7D%5E%7B%5Cfrac%7B1%7D%7B3%7D%7D%7D%5Ccdot%20%5Csqrt%7B3%7D%5Ccdot%20i%5Cright)%20%5Ccdot%20%7B%7Bk%7D%5E%7B%5Cfrac%7B1%7D%7B3%7D%7D%7D%7D%7B2%7D%2Ca%3D%7B%7B2%7D%5E%7B%5Cfrac%7B1%7D%7B3%7D%7D%7D%5Ccdot%20%7B%7Bk%7D%5E%7B%5Cfrac%7B1%7D%7B3%7D%7D%7D%5D")