In [1]:
import pandas as pd
import numpy as np
import glob
import librosa
import torchaudio.transforms
from tqdm.notebook import tqdm
from matplotlib import pyplot as plt
import torch
import torchaudio
import os

In [2]:
DATASET_PATH = '../../data/raw/pam21'

In [3]:
SR = 32000

In [4]:
meta = pd.read_csv(f'{DATASET_PATH}/annotations.csv')
meta.head()

Unnamed: 0,Filename,Start Time (s),End Time (s),Low Freq (Hz),High Freq (Hz),Species eBird Code
0,NES_001_S01_20190914_043000.flac,29.7,33.0,0,3098,compot1
1,NES_001_S01_20190914_043000.flac,53.8,57.0,0,4084,compot1
2,NES_001_S01_20190914_043000.flac,75.4,78.4,0,3662,compot1
3,NES_001_S01_20190914_043000.flac,99.1,102.1,0,3521,compot1
4,NES_001_S01_20190914_043000.flac,122.7,126.4,0,3239,compot1


In [5]:
meta.iloc[:,3].value_counts()

Low Freq (Hz)
0       430
1694    160
1016    155
2033    145
1864    127
       ... 
756       1
2129      1
1707      1
1848      1
5081      1
Name: count, Length: 1074, dtype: int64

# Convert Zenodo dataset to 5sec format
Assumes format from 2023 competition Kenya dataset, which has start=end time

In [6]:
SEG_LEN = 5
def process_file(group: pd.DataFrame):
    last_event = group['Start Time (s)'].max()
    num_segments = int(last_event // SEG_LEN + 1)
    
    
    # create labels list
    label_bins = [list() for _ in range(num_segments)]
    for idx, row in group.iterrows():
        bin_start = int(row.iloc[1] // SEG_LEN)
        bin_end = min(int(row.iloc[2] // SEG_LEN), len(label_bins)-1)
        bird = row.iloc[5]
        if bird == '????':
            continue
        print(bin_start,bin_end, bird, len(label_bins))
        for bin in range(bin_start, bin_end+1):
            if bird not in label_bins[bin]:
                label_bins[bin].append(bird)
        
    # create new dataframe
    filename = group.iloc[0,0]
    offsets = np.arange(0,num_segments)*SEG_LEN
    filenames = [f'{filename[:-5]}_{o}.ogg' for o in offsets]
    labels = [str(l) for l in label_bins]
    
    return pd.DataFrame({
        'filename': filenames,
        'offset': offsets,
        'labels': labels
    })

meta_5s = meta.groupby('Filename').apply(process_file).reset_index(drop=True)
meta_5s.head()

5 6 compot1 633
10 11 compot1 633
15 15 compot1 633
19 20 compot1 633
24 25 compot1 633
30 31 compot1 633
34 35 compot1 633
38 39 compot1 633
43 43 compot1 633
47 48 compot1 633
52 52 compot1 633
57 58 compot1 633
62 62 compot1 633
66 67 compot1 633
71 72 compot1 633
76 77 compot1 633
81 82 compot1 633
87 87 compot1 633
91 92 compot1 633
96 97 compot1 633
101 102 compot1 633
106 107 compot1 633
111 112 compot1 633
118 118 compot1 633
122 123 compot1 633
128 128 compot1 633
136 136 compot1 633
145 146 compot1 633
150 151 compot1 633
155 156 compot1 633
162 162 compot1 633
168 169 compot1 633
174 174 compot1 633
180 181 compot1 633
187 188 compot1 633
202 203 compot1 633
215 215 compot1 633
220 221 compot1 633
227 227 compot1 633
235 236 compot1 633
260 260 compot1 633
317 317 compot1 633
607 607 yebori1 720
626 627 yebori1 720
630 632 yebori1 720
633 636 yebori1 720
637 638 yebori1 720
641 644 yebori1 720
644 645 yebori1 720
647 649 yebori1 720
650 652 yebori1 720
655 655 banana 720
655

  meta_5s = meta.groupby('Filename').apply(process_file).reset_index(drop=True)


Unnamed: 0,filename,offset,labels
0,NES_001_S01_20190914_043000_0.ogg,0,[]
1,NES_001_S01_20190914_043000_5.ogg,5,[]
2,NES_001_S01_20190914_043000_10.ogg,10,[]
3,NES_001_S01_20190914_043000_15.ogg,15,[]
4,NES_001_S01_20190914_043000_20.ogg,20,[]


In [7]:
import ast

# Set the primary label, arbitrarily selects a bird, can be used for splitter
def set_primary(row):
    labels = ast.literal_eval(row['labels'])
    if len(labels) > 0:
        row['primary_label'] = labels[0]
    return row

meta_5s['rating'] = 5
meta_5s['primary_label'] = 'silent'
meta_5s = meta_5s.apply(set_primary, axis=1)
meta_5s.head()

Unnamed: 0,filename,offset,labels,rating,primary_label
0,NES_001_S01_20190914_043000_0.ogg,0,[],5,silent
1,NES_001_S01_20190914_043000_5.ogg,5,[],5,silent
2,NES_001_S01_20190914_043000_10.ogg,10,[],5,silent
3,NES_001_S01_20190914_043000_15.ogg,15,[],5,silent
4,NES_001_S01_20190914_043000_20.ogg,20,[],5,silent


In [11]:
meta_5s[meta_5s['labels'] != '[]'].head(20)

Unnamed: 0,filename,offset,labels,rating,primary_label
5,NES_001_S01_20190914_043000_25.ogg,25,['compot1'],5,compot1
6,NES_001_S01_20190914_043000_30.ogg,30,['compot1'],5,compot1
10,NES_001_S01_20190914_043000_50.ogg,50,['compot1'],5,compot1
11,NES_001_S01_20190914_043000_55.ogg,55,['compot1'],5,compot1
15,NES_001_S01_20190914_043000_75.ogg,75,['compot1'],5,compot1
19,NES_001_S01_20190914_043000_95.ogg,95,['compot1'],5,compot1
20,NES_001_S01_20190914_043000_100.ogg,100,['compot1'],5,compot1
24,NES_001_S01_20190914_043000_120.ogg,120,['compot1'],5,compot1
25,NES_001_S01_20190914_043000_125.ogg,125,['compot1'],5,compot1
30,NES_001_S01_20190914_043000_150.ogg,150,['compot1'],5,compot1


In [9]:
meta_5s.to_csv(f'{DATASET_PATH}/train_metadata.csv', index=False)

# Create 5 sec segments

In [12]:
import soundfile as sf

if not os.path.exists(f'{DATASET_PATH}/train_audio'):
    os.makedirs(f'{DATASET_PATH}/train_audio')

audio_loaded = None
audio_loaded_name = None
for i, row in tqdm(meta_5s.iterrows(), total=len(meta_5s)):
    source_file = '_'.join(row['filename'].split('_')[:-1])
    target_file = f'{DATASET_PATH}/train_audio/{row["filename"]}.ogg'
    if os.path.exists(target_file):
        continue   
    
    if audio_loaded_name != source_file:
        audio_loaded, _  = librosa.load(f'{DATASET_PATH}/soundscape_data/{source_file}.flac', sr=SR)
        audio_loaded_name = source_file
    
    # select segment by offset
    offset = row['offset'] * SR
    audio = audio_loaded[offset:offset+min(len(audio_loaded), SR*SEG_LEN)]
    
    # crop or pad
    if len(audio) > SR * SEG_LEN:
        audio = audio[:SR * SEG_LEN]
    if len(audio) < SR * SEG_LEN:
        audio = np.pad(audio, (0, SR * SEG_LEN - len(audio)))
    
    # save audio to ogg
    sf.write(target_file[:-4], audio, SR, format='ogg')

  0%|          | 0/23887 [00:00<?, ?it/s]