In [1]:
import audiomentations as A
import librosa
import soundfile as sf
import os

# Augmentations

The objective of this function is to enhance audio files, thereby expanding the dataset with diverse variations. Currently, we're incorporating eight types of Augmentation, a number that can be effortlessly expanded in the future by simply appending an additional index to both the function and the dictionary.

In [9]:
aug_mapping = {0: 'TimeStretch', 1: 'PitchShift', 2: 'BackgroundNoise', 3: 'ShortNoises', 4: 'TimeMask', 5: 'SevenBandParametricEQ', 6: 'Padding', 7: 'AddGaussianSNR'}


def index_to_transformation(index: int, background = None):
    if background:
        backgroung_noise_path = f"D:/noises/{background}" # path to custom background noise
    else:
        backgroung_noise_path = "D:/noises/heavy_rain.wav" # path to default background noise
    short_noise_path = "D:/noises/horn.wav" # path to short noise/noises directory

    if index == 0:
        return A.TimeStretch(min_rate=0.9, max_rate=1.1,p=1.0) # TimeStretch: Changes the speed without changing the pitch
    elif index == 1:
        return A.PitchShift(min_semitones=-2, max_semitones=2,p=1.0) # PitchShift: Shifts the pitch up or down without changing the tempo
    elif index == 2:
        return A.AddBackgroundNoise(sounds_path= backgroung_noise_path, p=1.0) # AddBackgroundNoise: Mixes in another sound to add background noise
    elif index == 3:
        return A.AddShortNoises(sounds_path= short_noise_path, p=1.0) # AddShortNoises: Mixes in various short noise sounds
    elif index == 4:
        return A.TimeMask(min_band_part=0.1, max_band_part=0.15, fade=True, p=1.0) # TimeMask: Makes a random part of the audio silent
    elif index == 5:
        return A.SevenBandParametricEQ(p=1.0) # SevenBandParametricEQ: Adjusts the volume of 7 frequency bands
    elif index == 6:
        return A.Padding(p=1.0) # Padding: Replaces a random part of the beginning or end with padding
    elif index == 7:
        return A.AddGaussianSNR(p=1.0, min_snr_in_db=30, max_snr_in_db=90) # AddGaussianSNR: Injects gaussian noise using a randomly chosen signal-to-noise ratio

# Implementation

To execute the procedure, it's necessary to define the paths for both our data and output directories. Subsequently, subfolders for each augmentation are established within the output directory, wherein the newly augmented files are stored in a format identical to that found in the data directory.

In [22]:
data_directory =  "D:/data/b3" # path to your data_directory
output_directory = "D:/augumented_audio/new4" # path to your output directory

for dirpath, dirnames, filenames in os.walk(data_directory):
    for filename in filenames:
        path = os.path.join(dirpath, filename)
        audio, sr = librosa.load(path, sr=None)

        for index in range(len(aug_mapping)):
            transform = index_to_transformation(index)
            augmented_sound  = transform(audio, sample_rate=sr)

            file_subfolder = dirpath.split("\\")[-1]
            output_subfolder = f"{output_directory}/{aug_mapping[index]}/{file_subfolder}"
            if not os.path.exists(output_subfolder):
                os.makedirs(output_subfolder)
            
            sf.write(f"{output_subfolder}/{filename}",augmented_sound, sr)

