In [1]:
from keras.models import model_from_json
import json
import os
import matplotlib.pyplot as pyplot
import pickle
import numpy as np
import librosa
import librosa.display
import pandas as pd

### Specify the directory for test recordings, the path to the stored model, and the output file path/name

In [2]:
# Recording directory
recording_dir = '/home/gabsoni/Documentos/ENTORNOS/exp1/arbimon2-cnn/test_recordings/'

# CNN model
model_path = '/home/gabsoni/Documentos/ENTORNOS/exp1/arbimon2-cnn/model/ResNet50_test'

# Path to output prediction CSV
output_path = 'prediction_output.csv'


### Run remaining cells to generate prediction CSV

In [3]:
# CNN input sample rate
model_sample_rate = 48000

test_recordings = os.listdir(recording_dir)
print(len(test_recordings))


50


In [4]:
# Load CNN model

model = model_from_json(open(model_path+'.json', 'r').read())
model.load_weights(model_path+'.h5')
class_dict = json.load(open(model_path+'_classes.json', 'r'))
class_dict_rev = {(str(v[0])): k for k, v in class_dict.items()}

print(model_path)
print('Loaded model ')

model_input_shape = model.get_layer(index=0).input_shape[1:]
n_classes = model.get_layer(index=-1).output_shape[1:][0]


/home/gabsoni/Documentos/ENTORNOS/exp1/arbimon2-cnn/model/ResNet50_test
Loaded model 


In [5]:
model_input_shape

(224, 224, 3)

In [6]:
from keras.preprocessing.image import ImageDataGenerator
test_datagen = ImageDataGenerator(rescale=1./255)


In [7]:
def fig2data ( fig ):
    """
    @brief Convert a Matplotlib figure to a 4D np array with RGBA channels and return it
    @param fig a matplotlib figure
    @return a np 3D array of RGBA values
    """
    # draw the renderer
    fig.canvas.draw ( )
 
    # Get the RGBA buffer from the figure
    w,h = fig.canvas.get_width_height()
    buf = np.frombuffer ( fig.canvas.tostring_rgb(), dtype=np.uint8 )
    buf.shape = ( w, h, 3 )
    
    return buf

In [8]:
### Run detections

pixLen = 188 # 188 spectrogram pixels is ~2 seconds
shft = 93 # %50 overlap between 188-length windows

# Matrix of output predictions: rows are recordings, columns are species, 
prediction = np.zeros((len(test_recordings), n_classes))

# Function to break image into frames
def divide_frames(im, w, s): 
    for i in range(0, im.shape[1], s):  
        yield im[:, i:i + w] 

for n, j in enumerate(test_recordings): # loop over recordings
            
    #print('Processing recording ' + str(j+1) + '/' + str(len(test_recordings)) )
    
    audio_data, sampling_rate = librosa.load(recording_dir+j, sr=model_sample_rate)
    
    pxx = librosa.feature.melspectrogram(y = audio_data, 
                                           sr = sampling_rate,
                                           n_fft=2048, 
                                           hop_length=512, 
                                           win_length=1024)
    
    X = []
    for c, jj in enumerate(divide_frames(pxx, pixLen, shft)): # loop over frames
        if jj.shape[1] != pixLen:
            continue
        dpi=100
        fig = pyplot.figure(num=None, figsize=(224/dpi, 224/dpi), dpi=dpi)
        pyplot.subplot(222)
        ax = pyplot.axes()
        ax.set_axis_off()
        librosa.display.specshow(librosa.power_to_db(jj, ref=np.max))
        img = fig2data(fig)
        pyplot.close()
        X.append(img/255.0)
    X = np.stack(X)
    
    p = model.predict(X)
    #print(p)
            
    for i in range(n_classes):
        print(n,i)
        prediction[n, i] = max(p[:,i]) # Max-probability across 2s windows
        #prediction[n, i, 1] = np.mean(np.sort(p[:,i])[-2:]) # Mean probability of top 2 windows

        
            
            

0 0
1 0
2 0
3 0
4 0
5 0
6 0
7 0
8 0
9 0
10 0
11 0
12 0
13 0
14 0
15 0
16 0
17 0
18 0
19 0
20 0
21 0
22 0
23 0
24 0
25 0
26 0
27 0
28 0
29 0
30 0
31 0
32 0
33 0
34 0
35 0
36 0
37 0
38 0
39 0
40 0
41 0
42 0
43 0
44 0
45 0
46 0
47 0
48 0
49 0


In [9]:
# Make dataframe of predictions
prediction = pd.DataFrame(prediction)
prediction.index = test_recordings
prediction.columns = [class_dict[str(i)] for i in range(n_classes)]
print(prediction.columns)
prediction.to_csv(output_path)

Index(['Agelasticus_thilius'], dtype='object')


In [10]:
display(prediction)

Unnamed: 0,Agelasticus_thilius
project_2770_site_22306_2019_11_MON_20191104_090000.p5.flac,0.152143
project_2770_site_22306_2019_11_MON_20191104_090000.p4.flac,0.149813
project_2770_site_22306_2019_11_MON_20191106_080000.p4.flac,0.148559
project_2770_site_22306_2019_11_MON_20191104_160000.p5.flac,0.15124
project_2770_site_22306_2019_11_MON_20191106_200000.p3.flac,0.149824
project_2770_site_22306_2019_11_MON_20191103_170000.p2.flac,0.150176
project_2770_site_22306_2019_11_MON_20191106_100000.p4.flac,0.148101
project_2770_site_22306_2019_11_MON_20191104_110000.p1.flac,0.14983
project_2770_site_22306_2019_11_MON_20191102_170000.p1.flac,0.149601
project_2770_site_22306_2019_11_MON_20191105_080000.p1.flac,0.147969


In [None]:
print(p)

In [None]:
X.shape