In [69]:
import numpy as np
import pandas as pd
import librosa
import matplotlib.pyplot as plt
import scipy
from tqdm import tqdm

from sklearn.preprocessing import minmax_scale
from sklearn.model_selection import train_test_split

import io

import warnings
warnings.filterwarnings('ignore')

In [None]:
# Plotting graph of duration of overall training data
bird_list = args.train
bird_list_15 = bird_list.loc[bird_list['duration'] < 15]
bird_list_60 = bird_list.loc[bird_list['duration'] > 15]
bird_list_60 = bird_list_60.loc[bird_list_60['duration'] < 60]
bird_list_120 = bird_list.loc[bird_list['duration'] > 60]
bird_list_120 = bird_list_120.loc[bird_list_120['duration'] < 120]
bird_list_other = bird_list.loc[bird_list['duration'] > 120]

X = ['0-15','15-60','60-120','>120']
Y = []
Y.append(len(bird_list_15))
Y.append(len(bird_list_60))
Y.append(len(bird_list_120))
Y.append(len(bird_list_other))

fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.bar(X,Y)
# naming the x axis 
plt.xlabel('Duration') 
# naming the y axis 
plt.ylabel('No. Birds') 

plt.title('Number of training data by duration')
plt.show

In [70]:
class args:
    DIR = os.getcwd()
    ROOT = 'birdsong-recognition/train_audio'
    ROOT_PATH = os.path.join(DIR,ROOT)
    # Sample rate
    sr = 44100
    # Training and test data
    train = pd.read_csv('./birdsong-recognition/train.csv')
    test = pd.read_csv('./birdsong-recognition/test.csv')
    # Birds
    birds = train['ebird_code'].unique()

FileNotFoundError: [Errno 2] File ./birdsong-recognition/sample_submission.csv does not exist: './birdsong-recognition/sample_submission.csv'

In [71]:
def load_audio(path, start=0, duration=None):
    data, sr = librosa.load(path, sr=args.sr, offset=start, duration=duration)
    if duration is None:
        duration = librosa.get_duration(data,sr)
    zeros = np.zeros(int(duration*args.sr), dtype=data.dtype)
    zeros[:len(data)] = data
    data = minmax_scale(zeros-zeros.mean(), feature_range=(-1,1))
    return data

In [72]:
# Source: https://www.kaggle.com/hidehisaarai1213/inference-pytorch-birdcall-resnet-baseline
BIRD_CODE = {
    'aldfly': 0, 'ameavo': 1, 'amebit': 2, 'amecro': 3, 'amegfi': 4,
    'amekes': 5, 'amepip': 6, 'amered': 7, 'amerob': 8, 'amewig': 9,
    'amewoo': 10, 'amtspa': 11, 'annhum': 12, 'astfly': 13, 'baisan': 14,
    'baleag': 15, 'balori': 16, 'banswa': 17, 'barswa': 18, 'bawwar': 19,
    'belkin1': 20, 'belspa2': 21, 'bewwre': 22, 'bkbcuc': 23, 'bkbmag1': 24,
    'bkbwar': 25, 'bkcchi': 26, 'bkchum': 27, 'bkhgro': 28, 'bkpwar': 29,
    'bktspa': 30, 'blkpho': 31, 'blugrb1': 32, 'blujay': 33, 'bnhcow': 34,
    'boboli': 35, 'bongul': 36, 'brdowl': 37, 'brebla': 38, 'brespa': 39,
    'brncre': 40, 'brnthr': 41, 'brthum': 42, 'brwhaw': 43, 'btbwar': 44,
    'btnwar': 45, 'btywar': 46, 'buffle': 47, 'buggna': 48, 'buhvir': 49,
    'bulori': 50, 'bushti': 51, 'buwtea': 52, 'buwwar': 53, 'cacwre': 54,
    'calgul': 55, 'calqua': 56, 'camwar': 57, 'cangoo': 58, 'canwar': 59,
    'canwre': 60, 'carwre': 61, 'casfin': 62, 'caster1': 63, 'casvir': 64,
    'cedwax': 65, 'chispa': 66, 'chiswi': 67, 'chswar': 68, 'chukar': 69,
    'clanut': 70, 'cliswa': 71, 'comgol': 72, 'comgra': 73, 'comloo': 74,
    'commer': 75, 'comnig': 76, 'comrav': 77, 'comred': 78, 'comter': 79,
    'comyel': 80, 'coohaw': 81, 'coshum': 82, 'cowscj1': 83, 'daejun': 84,
    'doccor': 85, 'dowwoo': 86, 'dusfly': 87, 'eargre': 88, 'easblu': 89,
    'easkin': 90, 'easmea': 91, 'easpho': 92, 'eastow': 93, 'eawpew': 94,
    'eucdov': 95, 'eursta': 96, 'evegro': 97, 'fiespa': 98, 'fiscro': 99,
    'foxspa': 100, 'gadwal': 101, 'gcrfin': 102, 'gnttow': 103, 'gnwtea': 104,
    'gockin': 105, 'gocspa': 106, 'goleag': 107, 'grbher3': 108, 'grcfly': 109,
    'greegr': 110, 'greroa': 111, 'greyel': 112, 'grhowl': 113, 'grnher': 114,
    'grtgra': 115, 'grycat': 116, 'gryfly': 117, 'haiwoo': 118, 'hamfly': 119,
    'hergul': 120, 'herthr': 121, 'hoomer': 122, 'hoowar': 123, 'horgre': 124,
    'horlar': 125, 'houfin': 126, 'houspa': 127, 'houwre': 128, 'indbun': 129,
    'juntit1': 130, 'killde': 131, 'labwoo': 132, 'larspa': 133, 'lazbun': 134,
    'leabit': 135, 'leafly': 136, 'leasan': 137, 'lecthr': 138, 'lesgol': 139,
    'lesnig': 140, 'lesyel': 141, 'lewwoo': 142, 'linspa': 143, 'lobcur': 144,
    'lobdow': 145, 'logshr': 146, 'lotduc': 147, 'louwat': 148, 'macwar': 149,
    'magwar': 150, 'mallar3': 151, 'marwre': 152, 'merlin': 153, 'moublu': 154,
    'mouchi': 155, 'moudov': 156, 'norcar': 157, 'norfli': 158, 'norhar2': 159,
    'normoc': 160, 'norpar': 161, 'norpin': 162, 'norsho': 163, 'norwat': 164,
    'nrwswa': 165, 'nutwoo': 166, 'olsfly': 167, 'orcwar': 168, 'osprey': 169,
    'ovenbi1': 170, 'palwar': 171, 'pasfly': 172, 'pecsan': 173, 'perfal': 174,
    'phaino': 175, 'pibgre': 176, 'pilwoo': 177, 'pingro': 178, 'pinjay': 179,
    'pinsis': 180, 'pinwar': 181, 'plsvir': 182, 'prawar': 183, 'purfin': 184,
    'pygnut': 185, 'rebmer': 186, 'rebnut': 187, 'rebsap': 188, 'rebwoo': 189,
    'redcro': 190, 'redhea': 191, 'reevir1': 192, 'renpha': 193, 'reshaw': 194,
    'rethaw': 195, 'rewbla': 196, 'ribgul': 197, 'rinduc': 198, 'robgro': 199,
    'rocpig': 200, 'rocwre': 201, 'rthhum': 202, 'ruckin': 203, 'rudduc': 204,
    'rufgro': 205, 'rufhum': 206, 'rusbla': 207, 'sagspa1': 208, 'sagthr': 209,
    'savspa': 210, 'saypho': 211, 'scatan': 212, 'scoori': 213, 'semplo': 214,
    'semsan': 215, 'sheowl': 216, 'shshaw': 217, 'snobun': 218, 'snogoo': 219,
    'solsan': 220, 'sonspa': 221, 'sora': 222, 'sposan': 223, 'spotow': 224,
    'stejay': 225, 'swahaw': 226, 'swaspa': 227, 'swathr': 228, 'treswa': 229,
    'truswa': 230, 'tuftit': 231, 'tunswa': 232, 'veery': 233, 'vesspa': 234,
    'vigswa': 235, 'warvir': 236, 'wesblu': 237, 'wesgre': 238, 'weskin': 239,
    'wesmea': 240, 'wessan': 241, 'westan': 242, 'wewpew': 243, 'whbnut': 244,
    'whcspa': 245, 'whfibi': 246, 'whtspa': 247, 'whtswi': 248, 'wilfly': 249,
    'wilsni1': 250, 'wiltur': 251, 'winwre3': 252, 'wlswar': 253, 'wooduc': 254,
    'wooscj2': 255, 'woothr': 256, 'y00475': 257, 'yebfly': 258, 'yebsap': 259,
    'yehbla': 260, 'yelwar': 261, 'yerwar': 262, 'yetvir': 263
}

In [73]:
def load_to_mfcc(file_path, start_time=0, duration=None):
    data = load_audio(file_path, start=start_time, duration=duration)
    mfcc = librosa.feature.mfcc(y=data, sr=args.sr, n_mfcc=13, n_fft=int(0.02*args.sr),hop_length=int(0.01*args.sr))
    return mfcc

In [74]:
# Max = maximum number of files from each bird
def get_features(max):
    labels = []
    features = []
    train = args.train
    for label, name in enumerate(tqdm(args.birds)):
        i = 0
        list_birds = train.loc[train['ebird_code'] == name]
        filenames = list_birds['filename']
        for f in filenames:
            if (i >= max):
                break
            file_path = f"{args.ROOT_PATH}/{name}/{f}"
            # Loads in first 15s
            mfcc = load_to_mfcc(file_path, duration=5)
            features.append(mfcc)
            labels.append(label)
            i += 1
    return features, labels

In [75]:
features, labels = get_features(4)
print("\n")
print(f"length of feature = {len(features)}")
print(f"length of labels = {len(labels)}")

TypeError: get_features() got an unexpected keyword argument 'duration_max'

length of feature = 1238


In [76]:
from scipy.spatial.distance import euclidean

from fastdtw import fastdtw

def calc_dtw(mfcc1, mfcc2):
    distance, path = fastdtw(mfcc1,mfcc2, dist=euclidean)
    return distance, path
dist, path = calc_dtw(features[1], features[1])
print(dist)

0.0


In [77]:
# Based on: https://www.c-sharpcorner.com/article/knn-k-nearest-neighbors/
def knn(data, query, k):
    neighbor_distances_and_indices = []
    
    # 3. For each example in the data
    for index, example in enumerate(data):
        # 3.1 Calculate the distance between the query example and the current
        # example from the data.
        distance, path = calc_dtw(example[:-1], query)
        
        # 3.2 Add the distance and the index of the example to an ordered collection
        neighbor_distances_and_indices.append((distance, index))
    
    # 4. Sort the ordered collection of distances and indices from
    # smallest to largest (in ascending order) by the distances
    sorted_neighbor_distances_and_indices = sorted(neighbor_distances_and_indices)
    
    # 5. Pick the first K entries from the sorted collection
    k_nearest_distances_and_indices = sorted_neighbor_distances_and_indices[:k]

    return k_nearest_distances_and_indices

In [78]:
def predict_birds(mfcc):
    predictions = knn(features, mfcc, k=3)
    pred_list = []
    dist0 = predictions[0][0]
    for dist, index in predictions:
        if abs(dist-dist0) < 200:
            pred_list.append(args.birds[labels[index]])
    return pred_list

In [79]:
train_x, test_x, train_y, test_y = train_test_split(features, labels, test_size=0.1)
print("Split into:\n")

print(f"# Training data: {len(train_x)}")
print(f"# Testing data: {len(test_x)}")

Split into:

# Training data: 950
# Testing data: 106


In [49]:
num_correct = 0
for index,val in enumerate(tqdm(test_x)):
    pred = predict_birds(val)[0]
    bird = args.birds[test_y[index]]
    if (pred == bird):
        num_correct += 1
print(f"\naccuracy = {num_correct/len(test_x)}")

100%|██████████| 106/106 [12:34<00:00,  7.12s/it]
accuracy = 1.0

