In [21]:
from keras.models import model_from_json
import json
import os
import matplotlib.pyplot as pyplot
import pickle
import numpy as np
import librosa
import librosa.display
import pandas as pd

### Specify the directory for test recordings, the path to the stored model, and the output file path/name

In [33]:
# Recording directory
recording_dir = '/home/gabsoni/Documentos/ENTORNOS/exp1/arbimon2-cnn/test_recordings/'

# CNN model
model_path = '/home/gabsoni/Documentos/ENTORNOS/exp1/arbimon2-cnn/model/ResNet50_test'

# Path to output prediction CSV
output_path = 'prediction_output.csv'


### Run remaining cells to generate prediction CSV

In [7]:
# CNN input sample rate
model_sample_rate = 48000

test_recordings = os.listdir(recording_dir)
print(test_recordings)


['project_2764_site_22266_2020_1_20200101_120000.p3.flac', 'project_2764_site_22266_2020_1_20200101_150000.p4.flac', 'project_2764_site_22266_2020_1_20200101_060700.p37.flac', 'project_2764_site_22266_2020_1_20200101_150000.p3.flac', 'project_2764_site_22266_2020_1_20200101_070000.p5.flac', 'project_2764_site_22266_2020_1_20200101_060700.p34.flac', 'project_2764_site_22266_2020_1_20200101_060700.p41.flac', 'project_2764_site_22266_2020_1_20200101_140000.p1.flac', 'project_2764_site_22266_2020_1_20200101_060700.p44.flac', 'project_2764_site_22266_2020_1_20200101_120000.p1.flac', 'project_2764_site_22266_2020_1_20200101_060700.p35.flac', 'project_2764_site_22266_2020_1_20200101_120000.p2.flac', 'project_2764_site_22266_2020_1_20200101_150000.p1.flac']


In [8]:
# Load CNN model

model = model_from_json(open(model_path+'.json', 'r').read())
model.load_weights(model_path+'.h5')
class_dict = json.load(open(model_path+'_classes.json', 'r'))
class_dict_rev = {(str(v[0])): k for k, v in class_dict.items()}

print(model_path)
print('Loaded model ')

model_input_shape = model.get_layer(index=0).input_shape[1:]
n_classes = model.get_layer(index=-1).output_shape[1:][0]


/home/gabsoni/Documentos/ENTORNOS/exp1/arbimon2-cnn/model/ResNet50_test
Loaded model 


In [9]:
model_input_shape

(224, 224, 3)

In [10]:
from keras.preprocessing.image import ImageDataGenerator
test_datagen = ImageDataGenerator(rescale=1./255)


In [11]:
def fig2data ( fig ):
    """
    @brief Convert a Matplotlib figure to a 4D np array with RGBA channels and return it
    @param fig a matplotlib figure
    @return a np 3D array of RGBA values
    """
    # draw the renderer
    fig.canvas.draw ( )
 
    # Get the RGBA buffer from the figure
    w,h = fig.canvas.get_width_height()
    buf = np.frombuffer ( fig.canvas.tostring_rgb(), dtype=np.uint8 )
    buf.shape = ( w, h, 3 )
    
    return buf

In [39]:
### Run detections

pixLen = 188 # 188 spectrogram pixels is ~2 seconds
shft = 93 # %50 overlap between 188-length windows

# Matrix of output predictions: rows are recordings, columns are species, 
prediction = np.zeros((len(test_recordings), n_classes))

# Function to break image into frames
def divide_frames(im, w, s): 
    for i in range(0, im.shape[1], s):  
        yield im[:, i:i + w] 

for n, j in enumerate(test_recordings): # loop over recordings
            
    #print('Processing recording ' + str(j+1) + '/' + str(len(test_recordings)) )
    
    audio_data, sampling_rate = librosa.load(recording_dir+j, sr=model_sample_rate)
    
    pxx = librosa.feature.melspectrogram(y = audio_data, 
                                           sr = sampling_rate,
                                           n_fft=2048, 
                                           hop_length=512, 
                                           win_length=1024)
    
    X = []
    for c, jj in enumerate(divide_frames(pxx, pixLen, shft)): # loop over frames
        if jj.shape[1] != pixLen:
            continue
        dpi=100
        fig = pyplot.figure(num=None, figsize=(224/dpi, 224/dpi), dpi=dpi)
        pyplot.subplot(222)
        ax = pyplot.axes()
        ax.set_axis_off()
        librosa.display.specshow(librosa.power_to_db(jj, ref=np.max))
        img = fig2data(fig)
        pyplot.close()
        X.append(img/255.0)
    X = np.stack(X)
    
    p = model.predict(X)
    print(p)
            
    for i in range(n_classes):
        print(n,i)
        prediction[n, i] = max(p[:,i]) # Max-probability across 2s windows
        #prediction[n, i, 1] = np.mean(np.sort(p[:,i])[-2:]) # Mean probability of top 2 windows

        
            
            

[[0.3872137 ]
 [0.38678804]
 [0.38977957]
 [0.3892624 ]
 [0.39067298]
 [0.39043152]
 [0.39099762]
 [0.39231652]
 [0.39273235]
 [0.38868752]
 [0.38903937]
 [0.39164162]
 [0.39225256]
 [0.3906853 ]
 [0.38884372]
 [0.3898558 ]
 [0.39240545]
 [0.390558  ]
 [0.39261064]
 [0.3931023 ]
 [0.3906175 ]
 [0.39056563]
 [0.38697827]
 [0.389143  ]
 [0.38435224]
 [0.3845362 ]
 [0.38430208]
 [0.38528845]
 [0.3797068 ]
 [0.38457993]
 [0.38426918]
 [0.38508257]
 [0.38784662]
 [0.38996288]
 [0.38862693]
 [0.38773948]
 [0.39071292]
 [0.39060014]
 [0.3906774 ]
 [0.3885218 ]
 [0.3889363 ]
 [0.39017344]
 [0.3901822 ]
 [0.39217728]
 [0.39157897]
 [0.39186478]
 [0.39174008]
 [0.3908967 ]
 [0.39112538]
 [0.3916981 ]
 [0.39120352]
 [0.3911624 ]
 [0.3932023 ]
 [0.3937574 ]
 [0.3869516 ]
 [0.38851506]
 [0.38783708]
 [0.38920036]
 [0.38849598]]
0 0
[[0.38582087]
 [0.38467813]
 [0.38474375]
 [0.38523647]
 [0.38653633]
 [0.38538578]
 [0.3863815 ]
 [0.38592187]
 [0.3857347 ]
 [0.3860224 ]
 [0.38579193]
 [0.3853255 ]
 

[[0.3797877 ]
 [0.3870682 ]
 [0.3857047 ]
 [0.38594735]
 [0.38460827]
 [0.3838671 ]
 [0.38430926]
 [0.38635793]
 [0.38697425]
 [0.38690427]
 [0.38421392]
 [0.38663208]
 [0.3876415 ]
 [0.38414267]
 [0.38465378]
 [0.38586292]
 [0.37872148]
 [0.38526666]
 [0.38392428]
 [0.3841995 ]
 [0.38351035]
 [0.38138896]
 [0.382906  ]
 [0.38375795]
 [0.3828603 ]
 [0.3854537 ]
 [0.38405672]
 [0.38581437]
 [0.38426447]
 [0.38533345]
 [0.38529122]
 [0.385679  ]
 [0.38419554]
 [0.3845977 ]
 [0.38613498]
 [0.38472474]
 [0.3837611 ]
 [0.38419297]
 [0.38290554]
 [0.38614726]
 [0.3846085 ]
 [0.385001  ]
 [0.38496196]
 [0.38513172]
 [0.38488886]
 [0.3845238 ]
 [0.3850038 ]
 [0.38550577]
 [0.38486516]
 [0.38400224]
 [0.3847957 ]
 [0.3834898 ]
 [0.38283485]
 [0.38370264]
 [0.38553652]
 [0.38452536]
 [0.38305563]
 [0.38399723]
 [0.38315606]]
10 0
[[0.38788366]
 [0.38838857]
 [0.3887679 ]
 [0.39043   ]
 [0.39234722]
 [0.3885176 ]
 [0.38763374]
 [0.39087093]
 [0.39327323]
 [0.38680258]
 [0.38586667]
 [0.38406286]


In [37]:
# Make dataframe of predictions
prediction = pd.DataFrame(prediction)
prediction.index = test_recordings
prediction.columns = [class_dict[str(i)] for i in range(n_classes)]
print(prediction.columns)
prediction.to_csv(output_path)

Index(['Sicalis_luteola'], dtype='object')


In [38]:
display(prediction)

Unnamed: 0,Sicalis_luteola
project_2764_site_22266_2020_1_20200101_120000.p3.flac,0.393757
project_2764_site_22266_2020_1_20200101_150000.p4.flac,0.3964
project_2764_site_22266_2020_1_20200101_060700.p37.flac,0.392815
project_2764_site_22266_2020_1_20200101_150000.p3.flac,0.387954
project_2764_site_22266_2020_1_20200101_070000.p5.flac,0.39427
project_2764_site_22266_2020_1_20200101_060700.p34.flac,0.394082
project_2764_site_22266_2020_1_20200101_060700.p41.flac,0.392085
project_2764_site_22266_2020_1_20200101_140000.p1.flac,0.389691
project_2764_site_22266_2020_1_20200101_060700.p44.flac,0.394315
project_2764_site_22266_2020_1_20200101_120000.p1.flac,0.393348
