In [1]:
import pandas as pd
import numpy as np
import glob
import librosa
import torchaudio.transforms
from tqdm.notebook import tqdm
from matplotlib import pyplot as plt
import torch
import torchaudio
import os

In [2]:
DATASET_PATH = '../../data/raw/pam22'

In [3]:
SR = 32000

In [4]:
meta = pd.read_csv(f'{DATASET_PATH}/annotations.csv')
meta.head()

Unnamed: 0,Filename,Start Time (s),End Time (s),Low Freq (Hz),High Freq (Hz),Species eBird Code
0,UHH_001_S01_20161121_150000.flac,6.8,8.2,2678,6053,hawama
1,UHH_001_S01_20161121_150000.flac,13.7,14.0,1362,8749,hawama
2,UHH_001_S01_20161121_150000.flac,20.7,21.1,1417,8040,hawama
3,UHH_001_S01_20161121_150000.flac,27.3,27.7,3091,5873,hawama
4,UHH_001_S01_20161121_150000.flac,28.6,28.8,3194,6594,hawama


In [5]:
meta.iloc[:,3].value_counts()

Low Freq (Hz)
0       813
2642    170
2749    163
2194    157
2989    155
       ... 
218       1
1162      1
273       1
701       1
4613      1
Name: count, Length: 4039, dtype: int64

# Convert Zenodo dataset to 5sec format
Assumes format from 2023 competition Kenya dataset, which has start=end time

In [6]:
SEG_LEN = 5
def process_file(group: pd.DataFrame):
    last_event = group['Start Time (s)'].max()
    num_segments = int(last_event // SEG_LEN + 1)
    
    
    # create labels list
    label_bins = [list() for _ in range(num_segments)]
    for idx, row in group.iterrows():
        bin_start = int(row.iloc[1] // SEG_LEN)
        bin_end = min(int(row.iloc[2] // SEG_LEN), len(label_bins)-1)
        bird = row.iloc[5]
        if bird == '????':
            continue
        print(bin_start,bin_end, bird, len(label_bins))
        for bin in range(bin_start, bin_end+1):
            if bird not in label_bins[bin]:
                label_bins[bin].append(bird)
        
    # create new dataframe
    filename = group.iloc[0,0]
    offsets = np.arange(0,num_segments)*SEG_LEN
    filenames = [f'{filename[:-5]}_{o}.ogg' for o in offsets]
    labels = [str(l) for l in label_bins]
    
    return pd.DataFrame({
        'filename': filenames,
        'offset': offsets,
        'labels': labels
    })

meta_5s = meta.groupby('Filename').apply(process_file).reset_index(drop=True)
meta_5s.head()

1 1 hawama 60
2 2 hawama 60
4 4 hawama 60
5 5 hawama 60
5 5 hawama 60
7 7 hawama 60
8 8 hawama 60
8 8 hawama 60
9 9 hawama 60
10 10 hawama 60
13 14 hawama 60
14 14 hawama 60
15 15 hawama 60
16 16 hawama 60
16 16 hawama 60
16 16 hawama 60
17 17 hawama 60
18 18 hawama 60
20 20 hawama 60
21 21 hawama 60
22 22 hawama 60
23 24 hawama 60
27 27 hawama 60
28 28 hawama 60
30 30 hawama 60
31 31 hawama 60
32 32 hawama 60
32 33 hawama 60
33 33 hawama 60
33 34 hawama 60
36 36 hawama 60
38 38 hawama 60
38 38 hawama 60
39 40 hawama 60
40 41 hawama 60
44 44 hawama 60
45 45 hawama 60
48 48 hawama 60
48 48 hawama 60
49 49 hawama 60
49 49 hawama 60
50 50 hawama 60
50 50 hawama 60
54 54 hawama 60
55 55 hawama 60
56 56 hawama 60
47 47 ercfra 60
37 37 jabwar 60
37 37 jabwar 60
37 37 jabwar 60
37 37 jabwar 60
37 37 jabwar 60
37 37 jabwar 60
39 39 jabwar 60
39 39 jabwar 60
39 39 jabwar 60
39 39 jabwar 60
39 39 jabwar 60
39 39 jabwar 60
39 39 jabwar 60
39 39 jabwar 60
39 39 jabwar 60
40 40 jabwar 60
40 40 jabw

  meta_5s = meta.groupby('Filename').apply(process_file).reset_index(drop=True)


Unnamed: 0,filename,offset,labels
0,UHH_001_S01_20161121_150000_0.ogg,0,[]
1,UHH_001_S01_20161121_150000_5.ogg,5,['hawama']
2,UHH_001_S01_20161121_150000_10.ogg,10,['hawama']
3,UHH_001_S01_20161121_150000_15.ogg,15,[]
4,UHH_001_S01_20161121_150000_20.ogg,20,['hawama']


In [7]:
import ast

# Set the primary label, arbitrarily selects a bird, can be used for splitter
def set_primary(row):
    labels = ast.literal_eval(row['labels'])
    if len(labels) > 0:
        row['primary_label'] = labels[0]
    return row

meta_5s['rating'] = 5
meta_5s['primary_label'] = 'silent'
meta_5s = meta_5s.apply(set_primary, axis=1)
meta_5s.head()

Unnamed: 0,filename,offset,labels,rating,primary_label
0,UHH_001_S01_20161121_150000_0.ogg,0,[],5,silent
1,UHH_001_S01_20161121_150000_5.ogg,5,['hawama'],5,hawama
2,UHH_001_S01_20161121_150000_10.ogg,10,['hawama'],5,hawama
3,UHH_001_S01_20161121_150000_15.ogg,15,[],5,silent
4,UHH_001_S01_20161121_150000_20.ogg,20,['hawama'],5,hawama


In [8]:
meta_5s[meta_5s['labels'] != '[]'].head(20)

  meta_5s[meta_5s['labels'] != '[]'][meta_5s['filename'].str.startswith('PER_001_S01_20190116_100007Z')].sort_values('offset').head(20)


Unnamed: 0,filename,offset,labels,rating,primary_label


In [9]:
meta_5s.to_csv(f'{DATASET_PATH}/train_metadata.csv', index=False)

# Create 5 sec segments

In [10]:
import soundfile as sf

if not os.path.exists(f'{DATASET_PATH}/train_audio'):
    os.makedirs(f'{DATASET_PATH}/train_audio')

audio_loaded = None
audio_loaded_name = None
for i, row in tqdm(meta_5s.iterrows(), total=len(meta_5s)):
    source_file = '_'.join(row['filename'].split('_')[:-1])
    target_file = f'{DATASET_PATH}/train_audio/{row["filename"]}.ogg'
    if os.path.exists(target_file):
        continue   
    
    if audio_loaded_name != source_file:
        audio_loaded, _  = librosa.load(f'{DATASET_PATH}/soundscape_data/{source_file}.flac', sr=SR)
        audio_loaded_name = source_file
    
    # select segment by offset
    offset = row['offset'] * SR
    audio = audio_loaded[offset:offset+min(len(audio_loaded), SR*SEG_LEN)]
    
    # crop or pad
    if len(audio) > SR * SEG_LEN:
        audio = audio[:SR * SEG_LEN]
    if len(audio) < SR * SEG_LEN:
        audio = np.pad(audio, (0, SR * SEG_LEN - len(audio)))
    
    # save audio to ogg
    sf.write(target_file[:-4], audio, SR, format='ogg')

  0%|          | 0/34499 [00:00<?, ?it/s]