In [1]:
#Load in the libraries we need 
import torch
import numpy as np
import pandas as pd
import argparse
from glob import glob
import librosa
from model import get_model, create_model # this creates our model

from dataloader import get_dataloader # this is the data file
from metrics import LWLRAP, label_ranking_average_precision_score # metrics
from tqdm.auto import tqdm

  'The interface of "soundfile" backend is planned to change in 0.8.0 to '


### Load in the dataset to evaluate or test on

Here we are using Christian's test set

In [33]:
df = pd.read_csv('../labels_test_CD_20210309.csv');df.head()

Unnamed: 0,Label,File,Event_ID,X_min,X_max,Y_min,Y_max,Species,EngName,Group,Date,recID,wave,duration
0,AshDro,extr_1670_B09_20190727_060000_All_Day_3h.txt,AshDro_1,3657.588,9824.903,1.788462,4.25,Dicrurus leucophaeus,Ashy Drongo,Birds,27/07/2019,B09,extr_1670_B09_20190727_060000_All_Day_3h.wav,6167.315
1,AshDro,extr_1670_B09_20190727_060000_All_Day_3h.txt,AshDro_2,10622.568,12178.988,2.153846,4.153846,Dicrurus leucophaeus,Ashy Drongo,Birds,27/07/2019,B09,extr_1670_B09_20190727_060000_All_Day_3h.wav,1556.42
2,AshDro,extr_1676_B01_20190718_060000_All_Day_3h.txt,AshDro_3,291.829,2295.72,1.361702,3.593381,Dicrurus leucophaeus,Ashy Drongo,Birds,18/07/2019,B01,extr_1676_B01_20190718_060000_All_Day_3h.wav,2003.891
3,AshDro2,extr_1463_B05_20181212_000009_All_Day.txt,AshDro2_1,5243.02,8965.874,1.73236,2.355231,Dicrurus leucophaeus,Ashy Drongo,Birds,12/12/2018,B05,extr_1463_B05_20181212_000009_All_Day.wav,3722.854
4,AshDro3,extr_1673_B05_20190729_060000_All_Day_3h.txt,AshDro3_1,5700.389,7821.012,1.73494,5.686747,Dicrurus leucophaeus,Ashy Drongo,Birds,29/07/2019,B05,extr_1673_B05_20190729_060000_All_Day_3h.wav,2120.623


## Load the classes
We need to load the classes that the models were trained on, in this case, that is the 51 class model

In [3]:
classes = np.loadtxt('class_birds_51.txt', dtype='str', delimiter='\n')
classes = list(classes)

We first preprocess the data, so that it can be used by our dataloaders. 

This includes adding a recording_id so that it may load the correct audio file, and a species_id if you are evaluating some data. There is also the oppertunity to drop classes that are not in the classes file to test how well it performs on just these classes.  

This function below ouputs a a preprocessed CSV file, a prediction and labels file as well. 

In [4]:
def preprocess(df, classes=[], drop_classes= False, mode='test'):
           
    df['recording_id'] = [f[:-4] for f in df.File]
    #appends a species id
    #if the species is not within the classes it is labeled as an unknown#
    if mode !='test':
        df['species_id'] = [-1 if s not in classes else classes.index(s) for s in df.Species] 
        if drop_classes == True:
            df = df[df.Species.isin(classes)]
    df = df.reset_index(drop=True)
    #preprare a output file of the predictions from the model
    species_cols = [f'{classes[i]}' for i in range(len(classes))]
    cv_preds = pd.DataFrame(columns=species_cols)
    cv_preds['recording_id'] = df['recording_id'].drop_duplicates()
    cv_preds.loc[:, species_cols] = 0
    cv_preds = cv_preds.reset_index(drop=True)
    
    if mode == 'test':
        return df, cv_preds, species_cols
    return df, cv_preds,species_cols, cv_preds.copy()

## Evaluation loop

This is the evaluation loop, that loads the model, creates a dataloader and passes the data to the model. It then processes the predictions from the model, and addits it to a prediction csv, and the appropriate label to the labels csv file. 


In [5]:
def evaluation_loop(paths,species_cols, df, config, cv_preds, labels_df, model, device):

    for path in paths:
        print(f'loading model {path}')
        model.load_state_dict(torch.load(path))
        model.eval()
        #load the data into dataloaders
        dataloader = get_dataloader(df, config=config, mode='val')

        tk = tqdm(dataloader, total=len(dataloader))
        sub_index = 0
        with torch.no_grad():
            #we go through all of the data
            for i, (im,l) in enumerate(tk):
                #pass it to the cpu or gpu
                im = im.to(device)
                #predict on the data
                for i, x_partial in enumerate(torch.split(im, 1, dim=1)):
                    x_partial = x_partial.squeeze(1)
                    if i == 0:
                        preds = model(x_partial)
                    else:
                        # take max over predictions
                        preds = torch.max(preds, model(x_partial))
                    #get the confidence score of each species and add it to the csv file
                o = preds.sigmoid().cpu().numpy()
                for val, ll in zip(o,l.cpu().numpy()):
                    cv_preds.loc[sub_index, species_cols] += val
                    labels_df.loc[sub_index, species_cols] = ll
                    sub_index += 1
    return cv_preds, labels_df

## Inference loop
This is similar to the above loop, the only difference is that it only passes a predictions csv file back.  
This is the loop you would use to predict on data, you don't know what is within the audio files. 

In [6]:
def inference_loop(paths,species_cols, df,config, cv_preds,  model, device):

    for path in paths:
        print(f'loading model {path}')
        model.load_state_dict(torch.load(path))
        model.eval()
        #load the data into dataloaders
        dataloader = get_dataloader(df, config=config, mode='test')

        tk = tqdm(dataloader, total=len(dataloader))
        sub_index = 0
        with torch.no_grad():
            #we go through all of the data
            for i, im in enumerate(tk):
                #pass it to the cpu or gpu
                im = im.to(device)
                #predict on the data
                for i, x_partial in enumerate(torch.split(im, 1, dim=1)):
                    x_partial = x_partial.squeeze(1)
                    if i == 0:
                        preds = model(x_partial)
                    else:
                        # take max over predictions
                        preds = torch.max(preds, model(x_partial))
                    #get the confidence score of each species and add it to the csv file
                o = preds.sigmoid().cpu().numpy()
                for val in o:
                    cv_preds.loc[sub_index, species_cols] += val
                    sub_index += 1
    return cv_preds

## Config
The config controls, the batch size( number of images on the GPU), num_works, the sliding window length, duration and other  parameters that may be changed. It also contains the path to where the models weights reside and the folder that contains the audio files.  

In [7]:
class Config:
    batch_size = 8 #number of images on the GPU. if you have out of memory issues, reducing  this number may help. 
    num_workers = 0 #number of threads to use when processing, more means faster but can be unstable on windows. 
    sliding_window = 2.98 #sliding window length
    num_classes = len(classes) #number of classes
    sr = 32_000 #the samplerate to resample the audio file too
    duration = 2.98 # duration of the frame to look at 
    total_duration = 14.9 # duration of the audio files
    nmels = 128 # number of nmel buckets to use when creating a melspectrogram
    data_root = '../Test_data_CD' #change this if you wish for it to point to another audio root
    model_path = 'models_51' #change this to use other models
    mode = 'val'
    output_csv = 'output.csv'

In [8]:
def main(df,classes=[], drop_classes=False, config=Config()):
    #we preprocess the data
    proccessed = preprocess(df, classes=classes, drop_classes=drop_classes, mode = config.mode)
    if config.mode =='test':
        df, cv_preds, species_cols = proccessed
    else:
        df, cv_preds, species_cols, labels_df = proccessed
    # if there is a GPU we load the audio onto the GPU
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    #we load a model from the models class
    #and get the paths of models from the models path
    paths = glob(f'{config.model_path}/*.pth')
    model = create_model(len(classes))
    model.to(device)
    if config.mode !='test':
        cv_preds, labels_df = evaluation_loop(paths,species_cols, df,config,  cv_preds, labels_df, model, device)
    else:
        cv_preds = inference_loop(paths,species_cols, df,config,  cv_preds,  model, device)
    #divide by length of folds
    cv_preds.loc[:, species_cols] /=len(paths)

    print(f'Saving predictions to {config.output_csv}')
    cv_preds.to_csv(config.output_csv, index=False)
    if config.mode=='test':
        return cv_preds
    #we evaluate the model using LWlRAP and LRAP
    preds = cv_preds.loc[:,species_cols].values.astype(np.float32)
    preds = torch.from_numpy(preds)

    labels = labels_df.loc[:,species_cols].values.astype(np.float32)
    labels = torch.from_numpy(labels)

    print(f"Label weighted label ranking average precision: {LWLRAP(preds, labels):.6}")
    preds = preds.numpy()
    labels = labels.numpy()
    print(f'Label ranking average precision:{label_ranking_average_precision_score(labels, preds):.6}')
    return cv_preds, labels_df

This will evaluate on all of the data within the CSV file

In [10]:
p,l = main(df, classes, drop_classes=False)

loading model models_51\model_0.pth


  0%|          | 0/20 [00:00<?, ?it/s]

loading model models_51\model_1.pth


  0%|          | 0/20 [00:00<?, ?it/s]

loading model models_51\model_2.pth


  0%|          | 0/20 [00:00<?, ?it/s]

loading model models_51\model_3.pth


  0%|          | 0/20 [00:00<?, ?it/s]

loading model models_51\model_4.pth


  0%|          | 0/20 [00:00<?, ?it/s]

Saving predictions to output.csv
Label weighted label ranking average precision: 0.820846
Label ranking average precision:0.850382


This will evaluate on only classes the model has been trained with, and that are within the CSV file

In [None]:
p,l = main(df, classes, drop_classes=True)

In [None]:
#changing the mode of the config file from eval to test
config = Config()
config.mode='test'

This will predict on the audio files that are within the CSV file, it will not evaluate on the CSV file. 

In [None]:
p = main(df, classes, drop_classes=False, config=config)