### Data augmentation

This script is used to enhance the database by augmenting (change pitch and volume randomly) the wav snippets for swallowing sounds (underrepresented class in database).

In [None]:
import librosa
import soundfile as sf
import numpy as np
import os

In [2]:
def random_pitch_volume_augment(input_file, output_file, pitch_range, volume_range):
    """
    Apply random pitch and volume augmentation to a single WAV file.
    
    Args:
        input_file (str): Path to the input WAV file.
        output_file (str): Path to save the augmented WAV file.
        pitch_range (tuple): Range for pitch shift (min_semitones, max_semitones).
        volume_range (tuple): Range for volume adjustment (min_dB, max_dB).
    """
    # Load the audio file
    y, sr = librosa.load(input_file, sr=None)
    
    # Apply random pitch shift, ensuring it's not zero
    while True:
        pitch_shift = np.random.uniform(pitch_range[0], pitch_range[1])
        if pitch_shift != 0:
            break
    y = librosa.effects.pitch_shift(y, sr=sr, n_steps=pitch_shift)
    
    # Apply random volume adjustment, ensuring it's not zero
    while True:
        volume_adjustment = np.random.uniform(volume_range[0], volume_range[1])
        if volume_adjustment != 0:
            break
    y = y * (10 ** (volume_adjustment / 20))
    
    # Save the augmented audio file
    sf.write(output_file, y, sr)

In [8]:
input_dir="/Users/jannisdaiber/Documents/Repos/github/ProjectMedicalWearables/Database/snippet_wavs/participant05/session01",
output_dir = "/Users/jannisdaiber/Documents/Repos/github/ProjectMedicalWearables/Database/snippet_wavs/participant05/session01",
pitch_range=(-1, 1),  # Pitch shift range in semitones
volume_range=(-4, 7)  # Volume adjustment range in dB

os.makedirs(output_dir, exist_ok=True)
for file_name in os.listdir(input_dir):
    if file_name.endswith(".wav") and file_name.startswith("swallowing"):
        input_file = os.path.join(input_dir, file_name)
        base_name = os.path.splitext(file_name)[0] 
        output_file = os.path.join(output_dir, f"{base_name}_aug.wav")
        random_pitch_volume_augment(input_file, output_file, pitch_range, volume_range)


swallowing_water_50.806_51.806_aug.wav:   Pitch shift: -0.6272748873304037, Volume shift: -0.7528225544710994
swallowing_cookie_103.943_104.943_aug.wav:   Pitch shift: 0.6853760956869943, Volume shift: 4.897471679462086
swallowing_water_46.24_47.24_aug.wav:   Pitch shift: -0.3211447602300679, Volume shift: -2.4971086693195046
swallowing_chip_80.823_81.823_aug.wav:   Pitch shift: 0.4485007453371732, Volume shift: 5.98189548426215
swallowing_apple_115.018_116.018_aug.wav:   Pitch shift: -0.8343850923832259, Volume shift: -2.210092602759978
swallowing_carrot_67.126_68.126_aug.wav:   Pitch shift: -0.4071094540471525, Volume shift: -3.4639537549001513
swallowing_yoghurt_87.623_88.623_aug.wav:   Pitch shift: -0.9250210209922975, Volume shift: 5.99545253813147
swallowing_bread_33.029_34.029_aug.wav:   Pitch shift: -0.40250888245466787, Volume shift: 1.288367332209555
swallowing_bread_30.309_31.309_aug.wav:   Pitch shift: 0.16635921892182126, Volume shift: -1.4419428301254964
