In [1]:
import os

import warnings
warnings.filterwarnings(action='ignore')

import pandas as pd
import librosa
import numpy as np

from sklearn.utils import shuffle
from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt

from sklearn.metrics import confusion_matrix

import imageio
import glob

import tensorflow as tf

In [2]:
SAMPLE_RATE = 32000
SIGNAL_LENGTH = 5 # seconds
SPEC_SHAPE = (48, 128) # height x width
FMIN = 500
FMAX = 12500
dimension = (48, 128, 1)
SPEC_SHAPE = (48, 128)
random_seed = 123456
#bird_list = ['amerob', 'blujay', 'cangoo', 'gockin', 'norcar', 'rewbla', 'sonspa', 'swaspa']

In [3]:
soundscapes_ids = [28933, 42907]
soundscapes = pd.read_csv('birdclef/train_soundscape_labels.csv',)
soundscapes = soundscapes.query('audio_id in @soundscapes_ids')
names = pd.unique(soundscapes['birds'])
#print(names)
bird_list = []
for i in range (0, len(names)):
    split = names[i].split(' ')
    bird_list = np.append(bird_list, split)
print
bird_list = np.unique(bird_list)
bird_list = list(bird_list)
bird_list.remove('nocall')

In [4]:
def predict_argmax(soundscape_path, model_path='best_model.h5', threshold=0.3):
    
    # Load the best checkpoint
    model = tf.keras.models.load_model(model_path)

    # Open it with librosa
    
    sig = []
    for path in soundscape_path:
        s, rate = librosa.load(path, sr=SAMPLE_RATE)
        sig = np.concatenate((sig, s))
    
    # Store results so that we can analyze them later
    data = {'row_id': [], 'prediction': [], 'score': []}

    # Split signal into 5-second chunks
    # Just like we did before (well, this could actually be a seperate function)
    sig_splits = []
    for i in range(0, len(sig), int(SIGNAL_LENGTH * SAMPLE_RATE)):
        split = sig[i:i + int(SIGNAL_LENGTH * SAMPLE_RATE)]

        # End of signal?
        if len(split) < int(SIGNAL_LENGTH * SAMPLE_RATE):
            break

        sig_splits.append(split)

    # Get the spectrograms and run inference on each of them
    # This should be the exact same process as we used to
    # generate training samples!
    seconds, scnt, counter = 0, 0, 0
    for chunk in sig_splits:

        # Keep track of the end time of each chunk
        if seconds == 600:
            seconds = 0
            counter += 1
        seconds += 5

        # Get the spectrogram
        hop_length = int(SIGNAL_LENGTH * SAMPLE_RATE / (SPEC_SHAPE[1] - 1))
        mel_spec = librosa.feature.melspectrogram(y=chunk, 
                                                  sr=SAMPLE_RATE, 
                                                  n_fft=1024, 
                                                  hop_length=hop_length, 
                                                  n_mels=SPEC_SHAPE[0], 
                                                  fmin=FMIN, 
                                                  fmax=FMAX)

        mel_spec = librosa.power_to_db(mel_spec, ref=np.max) 

        # Normalize to match the value range we used during training.
        # That's something you should always double check!
        mel_spec -= mel_spec.min()
        mel_spec /= mel_spec.max()

        # Add channel axis to 2D array
        mel_spec = np.expand_dims(mel_spec, -1)

        # Add new dimension for batch size
        mel_spec = np.expand_dims(mel_spec, 0)
        # Predict
        p = model.predict(mel_spec)[0]
        # Get highest scoring species
        idx = p.argmax()
        species = bird_list[idx]
        score = p[idx]

        # Prepare submission entry
        data['row_id'].append(soundscape_path[counter].split(os.sep)[-1].rsplit('_', 1)[0] + 
                              '_' + str(seconds))    

        # Decide if it's a "nocall" or a species by applying a threshold
        if score > threshold:
            data['prediction'].append(species)
            scnt += 1
        else:
            data['prediction'].append('nocall')

        # Add the confidence score as well
        data['score'].append(score)

    return data

In [5]:

def prepare_results(results, soundscape_paths):
    # Make a new data frame

    # Merge with ground truth so we can inspect
    gt = pd.read_csv('birdclef/train_soundscape_labels.csv',)
    results = pd.merge(gt, results, on='row_id')

    trained_bird_list = pd.DataFrame(bird_list, columns = ['trained_labels'])
    soundscape_bird_list = pd.DataFrame(pd.unique(results['birds']), columns = ['soundscape_labels'])

    new_soundscape_list = []
    for elem in soundscape_bird_list['soundscape_labels']:
        new_soundscape_list += elem.split(' ')

    soundscape_bird_list = pd.unique(new_soundscape_list)
    new_list = pd.DataFrame(list(trained_bird_list['trained_labels']) + (list(soundscape_bird_list)), 
                            columns=['bird_names'])
    labels = pd.unique(new_list['bird_names'])
    #pd.unique(pd.DataFrame(new_list))
    labels.sort()
    #results.loc[results['birds'] ]
    for i in range(0, len(results)):
        if (results['birds'][i] not in labels):
            results = results.drop([i])

    return results


In [6]:
def confusionMatrix(ground_truth, predictions, labels):
    assert len(ground_truth) == len(predictions)
    zeros = np.zeros((len(labels)+1,len(labels)+2))
    matrix = pd.DataFrame(zeros, columns = np.append(labels, ['not_in_data', 'sum_labels']), 
                          index = np.append(labels, ['sum_predictions']))
    for i in range(0,len(ground_truth)):
        if (predictions[i] in labels):
            matrix[predictions[i]][ground_truth[i]] += 1
        else:
            matrix['not_in_data'][ground_truth[i]] += 1
    diagonal_sum = 0
    
    
    sum_labels = matrix.sum(axis=1)
    sum_predictions = matrix.sum(axis=0)
    matrix['sum_labels'] = sum_labels
    matrix.loc['sum_predictions'] = sum_predictions
    
    for elem in labels[:-1]:
        diagonal_sum += matrix[elem][elem]
    accuracy = diagonal_sum/len(ground_truth)
    
    return (matrix,accuracy)

In [13]:
#predict_argmax(soundscape_path = ['birdclef/train_soundscapes/28933_SSW_20170408.ogg'], 
#                             model_path='models/Bird10_P450_7C_4P_3D.h5', threshold=0.21)

In [8]:
def evaluate_soundscape(soundscape_paths = ['birdclef/train_soundscapes/28933_SSW_20170408.ogg'], 
                        model_path='models/best_model.h5', threshold=0.3):
    results = pd.DataFrame(predict_argmax(soundscape_path=soundscape_paths, model_path=model_path, threshold=threshold),
                           columns = ['row_id', 'prediction', 'score'])
    results = prepare_results(results = results, soundscape_paths = soundscape_paths)
    return confusionMatrix(list(results['birds']), list(results['prediction']), sorted(list(pd.unique(results['birds']))))
    

In [9]:
result = evaluate_soundscape(model_path='best_model.h5')
print(result[1])
result[0]

0.15


Unnamed: 0,amerob,blujay,cangoo,gockin,nocall,norcar,rewbla,sonspa,swaspa,not_in_data,sum_labels
amerob,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,3.0
blujay,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0
cangoo,5.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0
gockin,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,3.0
nocall,4.0,0.0,3.0,1.0,10.0,0.0,0.0,0.0,0.0,0.0,18.0
norcar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
rewbla,3.0,0.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,7.0
sonspa,5.0,0.0,10.0,15.0,8.0,0.0,0.0,0.0,0.0,0.0,38.0
swaspa,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
sum_predictions,20.0,0.0,17.0,17.0,24.0,0.0,0.0,0.0,0.0,2.0,0.0


In [10]:
result = evaluate_soundscape(model_path='models/Bird10_P198_4C_4P_2D.h5')
print(result[1])
result[0]

0.0125


Unnamed: 0,amerob,blujay,cangoo,gockin,nocall,norcar,rewbla,sonspa,swaspa,not_in_data,sum_labels
amerob,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0
blujay,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,2.0
cangoo,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,6.0
gockin,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,3.0
nocall,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,17.0,18.0
norcar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
rewbla,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,7.0
sonspa,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,37.0,38.0
swaspa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0
sum_predictions,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,76.0,0.0


In [11]:
result = evaluate_soundscape(model_path='models/Bird10_P450_7C_4P_3D.h5')
print(result[1])
result[0]

0.0375


Unnamed: 0,amerob,blujay,cangoo,gockin,nocall,norcar,rewbla,sonspa,swaspa,not_in_data,sum_labels
amerob,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0
blujay,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
cangoo,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0
gockin,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0
nocall,0.0,0.0,0.0,18.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0
norcar,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
rewbla,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0
sonspa,0.0,0.0,0.0,38.0,0.0,0.0,0.0,0.0,0.0,0.0,38.0
swaspa,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
sum_predictions,0.0,0.0,0.0,80.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
result = evaluate_soundscape(soundscape_paths = ['birdclef/train_soundscapes/28933_SSW_20170408.ogg', 
                                                 'birdclef/train_soundscapes/42907_SSW_20170708.ogg',], 
                             model_path='models/distributedModel.h5', threshold=0.21)
print(result[1])
result[0]

0.015


Unnamed: 0,amerob,bkcchi,blujay,cangoo,gockin,nocall,norcar,reevir1,rewbla,sonspa,swaspa,yebsap,not_in_data,sum_labels
amerob,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0
bkcchi,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0
blujay,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
cangoo,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0
gockin,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0
nocall,0.0,0.0,0.0,0.0,93.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,93.0
norcar,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
reevir1,0.0,0.0,0.0,0.0,34.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,34.0
rewbla,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0
sonspa,0.0,0.0,0.0,0.0,38.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,38.0
