In [6]:
import librosa
import tensorflow as tf
import tensorflow_io as tfio

def build_artificial_dataset(num_samples: int):
    data = []
    sampling_rates = []

    for i in range(num_samples):
        y, sr = librosa.load(librosa.ex('nutcracker'))
        data.append(y)
        sampling_rates.append(sr)
    features_dataset = tf.data.Dataset.from_tensor_slices(data)
    labels_dataset = tf.data.Dataset.from_tensor_slices(sampling_rates)
    dataset = tf.data.Dataset.zip((features_dataset, labels_dataset))

    return dataset

ds = build_artificial_dataset(10)

In [7]:
from audiomentations import Compose, AddGaussianNoise, PitchShift, Shift

augmentations_pipeline = Compose(
    [
        AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
        PitchShift(min_semitones=-4, max_semitones=4, p=0.5),
        Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5),
    ]
)

In [8]:
def apply_pipeline(y, sr):
    shifted = augmentations_pipeline(y, sr)
    return shifted


@tf.function
def tf_apply_pipeline(feature, sr, ):
    """
    Applies the augmentation pipeline to audio files
    @param y: audio data
    @param sr: sampling rate
    @return: augmented audio data
    """
    augmented_feature = tf.numpy_function(
        apply_pipeline, inp=[feature, sr], Tout=tf.float32, name="apply_pipeline"
    )

    return augmented_feature, sr


def augment_audio_dataset(dataset: tf.data.Dataset):
    dataset = dataset.map(tf_apply_pipeline)

    return dataset

In [9]:
ds = augment_audio_dataset(ds)
ds = ds.map(lambda y, sr: (tf.expand_dims(y, axis=-1), sr))