In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
#Load in the libraries we need 
import torch
import numpy as np
import pandas as pd
import argparse
from glob import glob
import librosa
from model import get_model, create_model # this creates our model

from dataloader import get_dataloader, get_dataloader_single_clip # this is the data file
from metrics import LWLRAP, label_ranking_average_precision_score # metrics
from tqdm.auto import tqdm

  'The interface of "soundfile" backend is planned to change in 0.8.0 to '


### Load in the dataset to evaluate or test on

Here we are using Christian's test set

In [3]:
df = pd.read_csv('../labels_test_CD_20210309.csv');df.head()

Unnamed: 0,Label,File,Event_ID,X_min,X_max,Y_min,Y_max,Species,EngName,Group,Date,recID,wave,duration
0,AshDro,extr_1670_B09_20190727_060000_All_Day_3h.txt,AshDro_1,3657.588,9824.903,1.788462,4.25,Dicrurus leucophaeus,Ashy Drongo,Birds,27/07/2019,B09,extr_1670_B09_20190727_060000_All_Day_3h.wav,6167.315
1,AshDro,extr_1670_B09_20190727_060000_All_Day_3h.txt,AshDro_2,10622.568,12178.988,2.153846,4.153846,Dicrurus leucophaeus,Ashy Drongo,Birds,27/07/2019,B09,extr_1670_B09_20190727_060000_All_Day_3h.wav,1556.42
2,AshDro,extr_1676_B01_20190718_060000_All_Day_3h.txt,AshDro_3,291.829,2295.72,1.361702,3.593381,Dicrurus leucophaeus,Ashy Drongo,Birds,18/07/2019,B01,extr_1676_B01_20190718_060000_All_Day_3h.wav,2003.891
3,AshDro2,extr_1463_B05_20181212_000009_All_Day.txt,AshDro2_1,5243.02,8965.874,1.73236,2.355231,Dicrurus leucophaeus,Ashy Drongo,Birds,12/12/2018,B05,extr_1463_B05_20181212_000009_All_Day.wav,3722.854
4,AshDro3,extr_1673_B05_20190729_060000_All_Day_3h.txt,AshDro3_1,5700.389,7821.012,1.73494,5.686747,Dicrurus leucophaeus,Ashy Drongo,Birds,29/07/2019,B05,extr_1673_B05_20190729_060000_All_Day_3h.wav,2120.623


In [4]:
df['filepath'] = df.wave

## Load the classes
We need to load the classes that the models were trained on, in this case, that is the 51 class model

In [5]:
classes = np.loadtxt('class_birds_51.txt', dtype='str', delimiter='\n')
classes = list(classes)

We first preprocess the data, so that it can be used by our dataloaders. 

This includes adding a recording_id so that it may load the correct audio file, and a species_id if you are evaluating some data. There is also the oppertunity to drop classes that are not in the classes file to test how well it performs on just these classes.  

This function below ouputs a a preprocessed CSV file, a prediction and labels file as well. 

In [1]:
def preprocess(df, classes=[], drop_classes= False, mode='test'):
           
    #appends a species id
    #if the species is not within the classes it is labeled as an unknown#
    if mode !='test':
        df['species_id'] = [-1 if s not in classes else classes.index(s) for s in df.Species] #change here if you have chosen to use label instead of species. 
        if drop_classes == True:
            df = df[df.Species.isin(classes)] #you need to change this too if you have changed to label
    df = df.reset_index(drop=True)
    #preprare a output file of the predictions from the model
    species_cols = [f'{classes[i]}' for i in range(len(classes))]
    cv_preds = pd.DataFrame(columns=species_cols)
    cv_preds['filepath'] = df['filepath'].drop_duplicates()
    cv_preds.loc[:, species_cols] = 0
    cv_preds = cv_preds.reset_index(drop=True)
    
    if mode == 'test':
        return df, cv_preds, species_cols
    return df, cv_preds,species_cols, cv_preds.copy()

## Evaluation loop

This is the evaluation loop, that loads the model, creates a dataloader and passes the data to the model. It then processes the predictions from the model, and addits it to a prediction csv, and the appropriate label to the labels csv file. 


In [7]:
def evaluation_loop(paths,species_cols, df, config, cv_preds, labels_df, model, device):

    for path in paths:
        print(f'loading model {path}')
        model.load_state_dict(torch.load(path))
        model.eval()
        #load the data into dataloaders
        dataloader = get_dataloader(df, config=config, mode='val')
        tk = tqdm(dataloader, total=len(dataloader))
        sub_index = 0
        with torch.no_grad():
            #we go through all of the data
            for i, (im,l) in enumerate(tk):
                i=1
                #pass it to the cpu or gpu
                im = im.to(device)
                #predict on the data
                for i, x_partial in enumerate(torch.split(im, 1, dim=1)):
                    x_partial = x_partial.squeeze(1)
                    if i == 0:
                        preds = model(x_partial)
                    else:
                        # take max over predictions
                        preds = torch.max(preds, model(x_partial))
                    #get the confidence score of each species and add it to the csv file
                o = preds.sigmoid().cpu().numpy()
                for val, ll in zip(o,l.cpu().numpy()):
                    cv_preds.loc[sub_index, species_cols] += val
                    labels_df.loc[sub_index, species_cols] = ll
                    sub_index += 1
    return cv_preds, labels_df

## Inference loop
This is similar to the above loop, the only difference is that it only passes a predictions csv file back.  
This is the loop you would use to predict on data, you don't know what is within the audio files. 

In [8]:
def inference_loop(paths,species_cols, df,config, cv_preds,  model, device):

    for path in paths:
        print(f'loading model {path}')
        model.load_state_dict(torch.load(path))
        model.eval()
        #load the data into dataloaders
        dataloader = get_dataloader(df, config=config, mode='test')

        tk = tqdm(dataloader, total=len(dataloader))
        sub_index = 0
        with torch.no_grad():
            #we go through all of the data
            for i, im in enumerate(tk):
                i=1
                #pass it to the cpu or gpu
                im = im.to(device)
                #predict on the data
                for i, x_partial in enumerate(torch.split(im, 1, dim=1)):
                    x_partial = x_partial.squeeze(1)
                    if i == 0:
                        preds = model(x_partial)
                    else:
                        # take max over predictions
                        preds = torch.max(preds, model(x_partial))
                    #get the confidence score of each species and add it to the csv file
                o = preds.sigmoid().cpu().numpy()
                for val in o:
                    cv_preds.loc[sub_index, species_cols] += val
                    sub_index += 1
    return cv_preds

## Config
The config controls, the batch size( number of images on the GPU), num_works, the sliding window length, duration and other  parameters that may be changed. It also contains the path to where the models weights reside and the folder that contains the audio files.  

In [9]:
class Config:
    batch_size = 8 #number of images on the GPU. if you have out of memory issues, reducing  this number may help. 
    num_workers = 0 #number of threads to use when processing, more means faster but can be unstable on windows. 
    sliding_window = 2.98 #sliding window length
    num_classes = len(classes) #number of classes
    sr = 32000 #the samplerate to resample the audio file too
    fmin=0
    fmax=13000
    duration = 2.98 # duration of the frame to look at 
    total_duration = 14.9 # duration of the audio files
    nmels = 128 # number of nmel buckets to use when creating a melspectrogram
    data_root = '../test_data_cd' #change this if you wish for it to point to another audio root
    model_path = 'models_51' #change this to use other models
    mode = 'val'
    output_csv = 'output.csv'

In [10]:
def main(df,classes=[], drop_classes=False, config=Config()):
    #we preprocess the data
    proccessed = preprocess(df, classes=classes, drop_classes=drop_classes, mode = config.mode)
    if config.mode =='test':
        df, cv_preds, species_cols = proccessed
    else:
        df, cv_preds, species_cols, labels_df = proccessed
    # if there is a GPU we load the audio onto the GPU
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    #we load a model from the models class
    #and get the paths of models from the models path
    paths = glob(f'{config.model_path}/*.pth')
    model = create_model(len(classes))
    model.to(device)
    if config.mode !='test':
        cv_preds, labels_df = evaluation_loop(paths,species_cols, df,config,  cv_preds, labels_df, model, device)
    else:
        cv_preds = inference_loop(paths,species_cols, df,config,  cv_preds,  model, device)
    #divide by length of folds
    cv_preds.loc[:, species_cols] /=len(paths)

    print(f'Saving predictions to {config.output_csv}')
    cv_preds.to_csv(config.output_csv, index=False)
    if config.mode=='test':
        return cv_preds
    #we evaluate the model using LWlRAP and LRAP
    preds = cv_preds.loc[:,species_cols].values.astype(np.float32)
    preds = torch.from_numpy(preds)

    labels = labels_df.loc[:,species_cols].values.astype(np.float32)
    labels = torch.from_numpy(labels)

    print(f"Label weighted label ranking average precision: {LWLRAP(preds, labels):.6}")
    preds = preds.numpy()
    labels = labels.numpy()
    print(f'Label ranking average precision:{label_ranking_average_precision_score(labels, preds):.6}')
    return cv_preds, labels_df

This will evaluate on all of the data within the CSV file

In [11]:
p,l = main(df, classes, drop_classes=False)

loading model models_51\model_0.pth


  0%|          | 0/20 [00:00<?, ?it/s]

loading model models_51\model_1.pth


  0%|          | 0/20 [00:00<?, ?it/s]

loading model models_51\model_2.pth


  0%|          | 0/20 [00:00<?, ?it/s]

loading model models_51\model_3.pth


  0%|          | 0/20 [00:00<?, ?it/s]

loading model models_51\model_4.pth


  0%|          | 0/20 [00:00<?, ?it/s]

Saving predictions to output.csv
Label weighted label ranking average precision: 0.620193
Label ranking average precision:0.624025


This will evaluate on only classes the model has been trained with, and that are within the CSV file

In [12]:
p,l = main(df, classes, drop_classes=True)

loading model models_51\model_0.pth


  0%|          | 0/18 [00:00<?, ?it/s]

loading model models_51\model_1.pth


  0%|          | 0/18 [00:00<?, ?it/s]

loading model models_51\model_2.pth


  0%|          | 0/18 [00:00<?, ?it/s]

loading model models_51\model_3.pth


  0%|          | 0/18 [00:00<?, ?it/s]

loading model models_51\model_4.pth


  0%|          | 0/18 [00:00<?, ?it/s]

Saving predictions to output.csv
Label weighted label ranking average precision: 0.65085
Label ranking average precision:0.637668


In [13]:
#changing the mode of the config file from eval to test
config = Config()
config.mode='test'

This will predict on the audio files that are within the CSV file, it will not evaluate on the CSV file. 

In [14]:
p = main(df, classes, drop_classes=False, config=config)

loading model models_51\model_0.pth


  0%|          | 0/20 [00:00<?, ?it/s]

loading model models_51\model_1.pth


  0%|          | 0/20 [00:00<?, ?it/s]

loading model models_51\model_2.pth


  0%|          | 0/20 [00:00<?, ?it/s]

loading model models_51\model_3.pth


  0%|          | 0/20 [00:00<?, ?it/s]

loading model models_51\model_4.pth


  0%|          | 0/20 [00:00<?, ?it/s]

Saving predictions to output.csv


In [15]:
## To predict on images that can't fit in memory

In [16]:
def prediction_for_clip(clip: np.ndarray, config,  model, device):
    
    dataloader = get_dataloader_single_clip(clip, config)
    preds_ = []
    for i, (x) in enumerate(dataloader):
        with torch.no_grad():
            x = x.to(device)
            x = x.float()
            preds = model(x)# We do the same as above, but we don't mix the labels up, 

        preds_.extend(preds.sigmoid().cpu().numpy())

    return np.array(preds_).max(axis=0) #return the CSV

In [17]:
from collections import defaultdict
def prediction(test_df,classes, config, path,): 
    #framewise predictions for getting the onset and offset of bird calls
    #using one model
    df, cv_preds, species_cols = preprocess(test_df, classes=classes, drop_classes=False, mode = config.mode)
    
    model = get_model(path, n_class = len(classes))
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()
    model.to(device)
    sub_idx=0
    for filepath in tqdm(cv_preds.filepath): # for the audio files
        clip, _ = librosa.load(f'{config.data_root}/{filepath}',sr=None,
                               res_type="kaiser_fast") #we load the audio

        pred = prediction_for_clip(clip, config,  model, device)

        cv_preds.loc[sub_idx,species_cols] = pred
        sub_idx+=1
    print(f'Saving predictions to {config.output_csv}')
    cv_preds.to_csv(config.output_csv, index=False)
    return cv_preds

In [18]:
preds = prediction(df, classes, config, glob(f'{config.model_path}/*.pth')[0])

  0%|          | 0/153 [00:00<?, ?it/s]

Saving predictions to output.csv


In [19]:
preds

Unnamed: 0,Tawny-bellied screech-owl,Pygmy owl,Silky-tailed nightjar,Barred owl - variable,Laughing falcon,Little chachalaca,Spectacled owl,Chicken,Grey mourner,White-winged potoo,...,Horornis vulcanius,Psilopogon armillaris,Heleia javanica,Alcippe pyrrhoptera,Psilopogon corvinus,long-tailed potoo,Pteruthius aenobarbus,Ficedula hyperythra,Arborophila javanica,filepath
0,0.008259,0.029858,0.009418,0.008108,0.002204,0.002622,0.004517,0.016741,0.002744,0.002361,...,0.005706,0.005329,0.032830,0.031523,0.008304,0.005790,0.143888,0.015439,0.009301,extr_1670_B09_20190727_060000_All_Day_3h.wav
1,0.000369,0.016525,0.001364,0.001553,0.000612,0.002166,0.001092,0.000475,0.001020,0.000520,...,0.052136,0.019826,0.292184,0.282649,0.009682,0.000432,0.032428,0.144607,0.019558,extr_1676_B01_20190718_060000_All_Day_3h.wav
2,0.005511,0.009227,0.004814,0.001872,0.002108,0.006632,0.006717,0.013879,0.003887,0.001219,...,0.001675,0.005838,0.001983,0.029115,0.000848,0.001020,0.012286,0.006302,0.002228,extr_1463_B05_20181212_000009_All_Day.wav
3,0.003901,0.005864,0.003884,0.007020,0.003759,0.004148,0.033579,0.007160,0.002459,0.005101,...,0.072637,0.010719,0.031941,0.088450,0.010597,0.003352,0.168614,0.012370,0.079762,extr_1673_B05_20190729_060000_All_Day_3h.wav
4,0.004797,0.010295,0.005602,0.007318,0.003046,0.002011,0.002527,0.005770,0.003906,0.003106,...,0.028021,0.017732,0.132747,0.031087,0.027683,0.003280,0.023212,0.438726,0.022567,extr_2153_B04_20190226_000231_All_Day.wav
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
148,0.007061,0.012608,0.004447,0.212155,0.009318,0.001985,0.002980,0.010450,0.009558,0.003434,...,0.166098,0.234202,0.006446,0.016063,0.025584,0.006624,0.009194,0.045231,0.012313,extr_1607_B12_20190726_060014_All_Day_3h.wav
149,0.010535,0.021525,0.030080,0.031205,0.005755,0.003748,0.011632,0.004574,0.026140,0.016765,...,0.995829,0.015496,0.020828,0.023169,0.012823,0.002989,0.048845,0.024853,0.006212,extr_5014_B06_20181006_064554_All_Day [-7.2658...
150,0.015084,0.008051,0.007471,0.022827,0.004960,0.004617,0.008907,0.019349,0.027564,0.005640,...,0.132943,0.010238,0.062941,0.050058,0.007866,0.007683,0.052997,0.045417,0.011808,extr_5001_B01_20180925_064600_All_Day [-7.2861...
151,0.196457,0.008152,0.000743,0.010174,0.001055,0.000259,0.001998,0.044363,0.003152,0.001199,...,0.433292,0.020000,0.077400,0.040497,0.129467,0.001289,0.011987,0.028881,0.001424,extr_5003_B02_20180920_063200_All_Day [-7.2733...
