### Import dependencies

In [2]:
import os
import shutil
import opendatasets as od
import json

import numpy as np
from matplotlib import pyplot as plt

import librosa as li
import soundfile as sf
from scipy import signal

### Function definitions

In [None]:
# Generate butterworth highpass coefficients
def butter_highpass(cutoff, fs, order=5):
    nyq = 0.5 * fs
    normal_cutoff = cutoff / nyq
    b, a = signal.butter(order, normal_cutoff, btype='high', analog=False)
    return b, a


# Apply filter to signal
def apply_butter_highpass(data, cutoff, fs, order=5):
    b, a = butter_highpass(cutoff, fs, order=order)
    y = signal.filtfilt(b, a, data)
    return y

In [None]:
# Remove sections of silence or low intensity signal
def remove_silence(signal, thresh=18, hop=2048, plot=False):
    splits = li.effects.split(
        y=signal, top_db=thresh, frame_length=(hop * 2), hop_length=hop
    )
    if plot:
        peak = np.max(signal)
        plt.subplots(figsize=(12, 4))
        plt.plot(signal)
        plt.vlines(splits, ymin=-peak, ymax=peak, color='red')
        plt.show()

    stripped_audio = []

    for s in splits:
        split = signal[s[0]: s[1]]
        stripped_audio.extend(split)

    return np.asarray(stripped_audio)

In [None]:
# Split audio into segments of desired length
def split_audio(signal, target_length, samplerate, plot=False):
    duration = li.get_duration(y=signal, sr=samplerate)
    n_segments = np.ceil(duration / target_length)
    audio_segments = []

    for n in range(int(n_segments)):
        s = signal[
            samplerate * n * target_length: samplerate * (n + 1) * target_length
        ]

        if len(s) < target_length * samplerate:
            s = np.pad(s, (0, target_length * samplerate - len(s)), 'constant')

        audio_segments.append(s)

        if plot:
            plt.plot(s, alpha=1 / n_segments)

    if plot:
        plt.show()

    return audio_segments

In [None]:
# Apply all signal processing functions to audio and return segments
def generate_preprocessed_samples(path, sr, length, hp=700):

    y, sr = li.load(path, sr=sr, mono=True)  # Load audio file

    y = apply_butter_highpass(
        data=y, cutoff=hp, fs=sr, order=5
    )  # Apply high-pass filter

    # Delete silent sections
    y = remove_silence(y, thresh=18, hop=2048, plot=False)

    audio_segments = split_audio(
        y, target_length=length, samplerate=sr
    )  # Split into segments of desired length

    return audio_segments

# Audio dataset download and preparation

Download and preparation of the [British Birdson Dataset](www.kaggle.com/datasets/rtatman/british-birdsong-dataset), a balanced and curated compilation of bird songs the Xeno-Canto database, consisting of 88 species from the UK. The collection was made by Dan Stowell and shared in Kaggle by Rachel Tatman.

## Download

In [None]:
dataset_url = (
    'https://www.kaggle.com/datasets/rtatman/british-birdsong-dataset?resource=download'
)
dowload_path = '..\\datasets\\'

od.download(dataset_url, data_dir=dowload_path)

dataset_path = dowload_path + 'british-birdsong-dataset\\'

In [None]:
# Parse metadata csv
metadata = pd.read_csv(
    dataset_path + 'birdsong_metadata.csv',
    usecols=['file_id', 'genus', 'species', 'english_cname'],
)

## Preparation

In [None]:
# Target sample rate for resampling audio files
SAMPLE_RATE = 16000

# Target length for audio segments (in seconds)
SAMPLE_LENGTH = 3

Read all audio files from directory, apply filter, remove silent sections and split into segments. Save each segment to disk, with filename according to scientific bird name corresponding to dataset metadata.

In [None]:
old_path = dataset_path + 'songs\\songs\\'
audio_path = dataset_path + 'audio\\'
spec_path = dataset_path + 'spectrograms\\'

try:
    os.mkdir(audio_path)
    print('Created new folder: ' + audio_path)
except:
    print(audio_path + ' already exists\n')

try:
    os.mkdir(spec_path)
    print('Created new folder: ' + spec_path)
except:
    print(spec_path + ' already exists\n')

for file in os.listdir(old_path):
    id = file.lstrip('xc').rstrip('.flac')
    data = metadata.loc[metadata['file_id'] == int(id)]

    name = data['genus'].item() + '_' + data['species'].item()
    subfolder = audio_path + name + '\\'
    try:
        os.mkdir(subfolder)
    except:
        pass

    # Preprocess audio and get segments of desired length
    audio_segments = generate_preprocessed_samples(
        old_path + file, sr=SAMPLE_RATE, length=SAMPLE_LENGTH
    )

    # Iterate through splitted audio segments and save each one as a separate flac file
    for i, segment in enumerate(audio_segments):
        new_filename = f'{subfolder}{name}_{id}_{i}.flac'
        sf.write(new_filename, segment, SAMPLE_RATE, format='flac', subtype='PCM_16')

shutil.rmtree(dataset_path + '\\songs')

print('Finished generating audio samples!')

# Audio preprocessing and feature extraction

In [None]:
# Get dataset path
dataset_path = '..\\datasets\\british-birdsong-dataset\\'
# Get audio path
audio_path = dataset_path + "audio\\"
# Get spectrogram path
spec_path = dataset_path + "spectrograms\\"

Read samples from disk and store file path, label, labels mapping and MFCC data into JSON file.

In [None]:
def preprocess_audio_dataset(data_path, json_path=None, mfcc_count=13, hop=512, fft_len=2048):
    data_dict = {
        'label_map': [],
        'encoded_labels': [],
        'mfccs': [],
        'files': [],
        'spectrograms': [],
    }

    for i, (path, _, files) in enumerate(os.walk(data_path)):
        if path == data_path:  # Ignore parent folder
            continue

        # Add unique labels to label_map list
        label = path.split('\\')[-1]
        if label not in data_dict['label_map']:
            data_dict['label_map'].append(label)

        # Create subfolder for spectrograms
        os.mkdir(os.path.join(spec_path + label))

        for f in files:
            # Add encoded label to encoded_labels list
            index = data_dict['label_map'].index(label)
            data_dict['encoded_labels'].append(index)

            # Add file path to files list
            data_dict['files'].append(os.path.join(path, f))

            # Load audio and add MFCCs to list
            y, sr = li.load(os.path.join(path, f), sr=None, mono=True)
            mfccs = li.feature.mfcc(y=y, sr=sr, n_mfcc=mfcc_count, hop_length=hop, n_fft=fft_len)

            # Cast np.array to list is needed to save as JSON file
            data_dict['mfccs'].append(mfccs.transpose().tolist())

            # Generate mel spectrogram, add spec path to dict and save to NPY file
            mel_spec = li.feature.melspectrogram(
                y=y, sr=sr, n_fft=fft_len, hop_length=hop, power=0.5, fmin=200, n_mels=128
            ).transpose()  # Transpose may not be needed!

            npy_filename = f.replace('.flac', '.npy')
            npy_filename = f'{spec_path}\\{label}\\{npy_filename}'
            np.save(npy_filename, mel_spec)
            data_dict['spectrograms'].append(npy_filename)

    # Store data dictionary in JSON file
    if json_path:
        with open(json_path, 'w') as jf:
            json.dump(data_dict, jf, indent=4)
            print(f'Successfully saved preprocessed data to {json_path}!')
            file_count = len(data_dict['files'])
            print(f'{file_count} audio samples were processed!')

In [None]:
json_path = dataset_path + 'preprocessed_data.json'
preprocess_audio_dataset(audio_path, json_path=json_path, mfcc_count=25)