In [1]:
'''
A notebook which allows a pretrained detector to be applied to 
any audio file.
'''
%load_ext autoreload
%autoreload 2
%matplotlib inline

# General imports
import os
import sys
import matplotlib.pyplot as plt
import numpy as np
import yaml
import cPickle as pickle
from scipy.ndimage.interpolation import zoom

# Neural network imports
import nolearn
import lasagne

# Custom functions and classes
sys.path.append('../..')
from lib import train_helpers

# Wav file loading and manipulation
import librosa
from librosa.feature import melspectrogram

Using gpu device 0: GeForce GTX 770 (CNMeM is disabled, cuDNN 5110)
  warn("IPython.utils.traitlets has moved to a top-level traitlets package.")


## Testing options

In [2]:
# You can provide here either:
#   A path to a single .wav file, 
# or
#   A path to a folder which contains one or more .wav files
#
# In this example code, we give a folder which contains a single .wav file.
# We could also give the complete path to this file - the result would be the same
# test_file_or_folder = '/home/michael/Dropbox/FairbrassFirmanetal_/models/example_test_data/'
# test_file_or_folder = '/media/michael/Engage/data/audio/alison_data/diversity_labelled_data/'
# test_file_or_folder = '/media/michael/Elements1/Fieldwork_Data/2015/IG62XL/SM2+/'
# test_file_or_folder = '/media/michael/Elements1/Fieldwork_Data/2015/E29RR/250515-010615/SM2+/'
# test_file_or_folder = '/media/michael/Elements/Fieldwork_Data/2013/RM14_3YB/'
test_file_or_folder = '/home/michael/Dropbox/engage/FairbrassFirmanetal_/recordings/W84LA/SM2+_Sliced/'

# specify where the pretrained model is that we want to load
models_dir = '/home/michael/Dropbox/engage/FairbrassFirmanetal_/data/models/anthrop_trained_large/'

# Specify the names of the files we want to load in
# (Keep as they are to load in the model in the dropbox folder)
weights_path = os.path.join(models_dir, 'weights_99.pkl')
options_path = os.path.join(models_dir, 'network_params.yaml')

In [3]:
# Loading the options for network architecture, spectrogram type etc
options = yaml.load(open(options_path))

# Checking the weights exist
assert os.path.exists(weights_path), 'Error - cannot find the weights file at ' + weights_path

## Finding the .wav files to test on

In [4]:
if os.path.isdir(test_file_or_folder):
    
    # User provided a folder - we must get the list of wav files
    wav_dir = test_file_or_folder
    filenames = [xx for xx in os.listdir(wav_dir) if xx.endswith('.wav')]
    assert len(filenames), 'Error - no files found!'

elif os.path.isfile(test_file_or_folder):
    
    # User provided a file - we must deal with the filename appropriately
    wav_dir = os.path.dirname(test_file_or_folder)
    filenames = [os.path.basename(test_file_or_folder)]

else:
    raise Exception("No such file or folder: ", test_file_or_folder)
    
print "We will load %d file(s) from the folder:" % len(filenames)
print wav_dir
print 
print "Files are:"
for filename in filenames:
    print "  -", filename

We will load 9773 file(s) from the folder:
/home/michael/Dropbox/engage/FairbrassFirmanetal_/recordings/W84LA/SM2+_Sliced/

Files are:
  - W84LA-013548_20130628_0624.wav
  - W84LA-013548_20130628_0115.wav
  - W84LA-013548_20130626_0145.wav
  - W84LA-013548_20130626_1413.wav
  - W84LA-013548_20130626_0910.wav
  - W84LA-013548_20130623_1034.wav
  - W84LA-013548_20130627_2325.wav
  - W84LA-013548_20130627_1244.wav
  - W84LA-013548_20130626_0400.wav
  - W84LA-013548_20130626_1405.wav
  - W84LA-013548_20130625_0614.wav
  - W84LA-013548_20130627_1024.wav
  - W84LA-013548_20130625_0754.wav
  - W84LA-013548_20130625_2244.wav
  - W84LA-013548_20130622_2351.wav
  - W84LA-013548_20130628_0342.wav
  - W84LA-013548_20130626_1721.wav
  - W84LA-013548_20130622_0650.wav
  - W84LA-013548_20130626_1751.wav
  - W84LA-013548_20130624_0338.wav
  - W84LA-013548_20130627_1745.wav
  - W84LA-013548_20130627_0736.wav
  - W84LA-013548_20130622_0307.wav
  - W84LA-013548_20130621_1654.wav
  - W84LA-013548_20130626

## Setting up network

In [5]:
# Create the layers of the neural network, with the same options we used in training
net_options = {xx: options[xx] for xx in train_helpers.net_params}
network = train_helpers.create_net(**net_options)

# Create an object which will iterate over the test spectrograms appropriately
test_sampler = train_helpers.SpecSampler(
    4, options['HWW'], False, options['LEARN_LOG'], randomise=0, seed=10)

# Create a nolearn object to contain the network and push data through it
net = nolearn.lasagne.NeuralNet(
    layers=network['prob'], update=lasagne.updates.adam, batch_iterator_test=test_sampler)

# Initialise the network and load in the pretrained parameters
net.initialize()
net.load_params_from(weights_path)

  for input_layer in input_layers]
  inputs = X_inputs + [theano.Param(y_batch, name="y")]


## Generating spectrogram(s)

In [9]:
# We will store spectrograms in a dictionary, where keys are the filenames
from tqdm import tqdm
cache_dir = '/media/michael/Engage/data/audio/alison_data/full_dataset/specs/'
LOAD_FROM_CACHE_IF_POSS = True

specs = {}  

# Loop over each file we want to predict for
for filename in tqdm(filenames):
    
    if filename in specs:
        continue
        
    savepath = cache_dir + filename.replace('.wav', '.pkl')
    
    if os.path.exists(savepath) and LOAD_FROM_CACHE_IF_POSS:
        specs[filename] = pickle.load(open(savepath))
        
    else:

        # Read in the wav file
        wav, sample_rate = librosa.load(os.path.join(wav_dir, filename), 22050)

        # Compute the spectrogram
        spec = melspectrogram(wav, sr=sample_rate, n_fft=options['N_FFT'], 
                              hop_length=options['HOP_LENGTH'], n_mels=options['N_MELS'])

        # Do log conversion:
        spec = np.log(options['A'] + options['B'] * spec)
        spec -= np.median(spec, axis=1, keepdims=True)

        # Add to the dictionary
        specs[filename] = spec.astype(np.float32)
        
        with open(savepath, 'w') as f:
            pickle.dump(spec.astype(np.float32), f, -1)

100%|██████████| 9773/9773 [3:22:11<00:00,  1.23s/it]


## Applying classifier

In [13]:
# Push this list through the network
# test_sampler, defined above, does the hard work here.
preds = {}

for filename in tqdm(filenames[:500]):
    preds[filename] = net.predict_proba([specs[filename]])


  0%|          | 0/500 [00:00<?, ?it/s][A
  0%|          | 1/500 [00:04<38:02,  4.57s/it][A

  0%|          | 2/500 [00:09<38:21,  4.62s/it][A
  1%|          | 3/500 [00:13<37:31,  4.53s/it][A
  1%|          | 4/500 [00:18<38:10,  4.62s/it][A
  1%|          | 5/500 [00:23<38:17,  4.64s/it][A
  1%|          | 6/500 [00:25<31:20,  3.81s/it][A
  1%|▏         | 7/500 [00:29<31:53,  3.88s/it][A
  2%|▏         | 8/500 [00:33<34:11,  4.17s/it][A
  2%|▏         | 9/500 [00:38<35:00,  4.28s/it][A
  2%|▏         | 10/500 [00:43<35:39,  4.37s/it][A
  2%|▏         | 11/500 [00:47<35:56,  4.41s/it][A
  2%|▏         | 12/500 [00:52<36:07,  4.44s/it][A
  3%|▎         | 13/500 [00:54<30:22,  3.74s/it][A
  3%|▎         | 14/500 [00:57<29:00,  3.58s/it][A
  3%|▎         | 15/500 [01:01<31:11,  3.86s/it][A
  3%|▎         | 16/500 [01:06<32:44,  4.06s/it][A
  3%|▎         | 17/500 [01:10<33:59,  4.22s/it][A
  4%|▎         | 18/500 [01:15<34:44,  4.32s/it][A
  4%|▍         | 19/500 [01:

## Saving results to disk

In [None]:
savedir = '/home/michael/Dropbox/engage/FairbrassFirmanetal_/data/predictions/ours_on_all_data/'
for fname, pred in preds.iteritems():
    with open(savedir + fname.replace('.wav', '_' + options['CLASSNAME'] + '.pkl'), 'w') as f:
        pickle.dump(pred, f, -1)