# Understanding Audio Data Augmentation from a more fundamental standpoint.

**Helper function for plotting**

In [None]:
import librosa.display
import matplotlib.pyplot as plt


def _plot_signal_and_augmented_signal(signal, augmented_signal, sr):
    fig, ax = plt.subplots(nrows=2)
    librosa.display.waveplot(signal, sr=sr, ax=ax[0])
    ax[0].set(title="Original signal")
    librosa.display.waveplot(augmented_signal, sr=sr, ax=ax[1])
    ax[1].set(title="Augmented signal")
    plt.show()

# Functions

***add_white_noise***: Adds random noise to the audio signal.

***time_stretch***: Changes the speed of the audio without affecting pitch.

***pitch_scale***: Shifts the pitch of the audio without changing its duration.

***random_gain***: Randomly adjusts the volume of the audio.

***invert_polarity***: Flips the audio waveform upside down.

***plot_signal_and_augmented_signal***: Visualizes the original and modified audio signals.

***create_and_save_augmented_audio***: Saves the augmented audio as a new file.


In [None]:
import random
import librosa
import numpy as np
import soundfile as sf
import IPython.display
from IPython.display import Audio  # Import for audio visualization

# Python 3.8
# Install required libraries: matplotlib, librosa, sounddevice (optional for audio playback)

def add_white_noise(signal, noise_percentage_factor):
    noise = np.random.normal(0, signal.std(), signal.shape)  # Use shape for proper noise dimension
    augmented_signal = signal + noise * noise_percentage_factor
    return augmented_signal

def time_stretch(signal, sr, time_stretch_rate):
    """Time stretching using librosa"""
    augmented_signal = librosa.effects.time_stretch(signal, rate=time_stretch_rate)  # Specify rate
    return augmented_signal


def pitch_scale(signal, sr, num_semitones):
    """Pitch scaling using librosa"""
    augmented_signal = librosa.effects.pitch_shift(signal, sr=sr, n_steps= num_semitones)
    return augmented_signal


def random_gain(signal, min_factor=0.1, max_factor=0.12):
    gain_rate = random.uniform(min_factor, max_factor)
    augmented_signal = signal * gain_rate
    return augmented_signal

def invert_polarity(signal):
    return signal * -1

def plot_signal_and_augmented_signal(signal, augmented_signal, sr):
    """Plots original and augmented signals using matplotlib (install if needed)"""
    import matplotlib.pyplot as plt

    plt.figure(figsize=(12, 6))

    plt.subplot(211)
    plt.plot(signal)
    plt.title("Original Signal")

    plt.subplot(212)
    plt.plot(augmented_signal)
    plt.title("Augmented Signal")

    plt.tight_layout()
    plt.show()

def create_and_save_augmented_audio(original_signal, sr, function_name, augmented_signal):
    """Creates and saves a separate audio file for each augmentation"""
    filename = f"augmented_{function_name}.wav"
    sf.write(filename, augmented_signal, sr)

if __name__ == "__main__":
    signal, sr = librosa.load("/content/scale.wav")

    # Apply functions and create separate audio files
    augmented_signal_noise = add_white_noise(signal.copy(), 0.1)
    create_and_save_augmented_audio(signal, sr, "noise", augmented_signal_noise)

    augmented_signal_stretch = time_stretch(signal.copy(), sr, 1.2)
    create_and_save_augmented_audio(signal, sr, "stretch", augmented_signal_stretch)

    augmented_signal_pitch = pitch_scale(signal.copy(), sr, 3)
    create_and_save_augmented_audio(signal, sr, "pitch", augmented_signal_pitch)

    augmented_signal_gain = random_gain(signal.copy())
    create_and_save_augmented_audio(signal, sr, "gain", augmented_signal_gain)

    augmented_signal_inverted = invert_polarity(signal.copy())
    create_and_save_augmented_audio(signal, sr, "inverted", augmented_signal_inverted)




# Showcasing each generated augmented file

Showcasing each generated augmented file

In [None]:
IPython.display.Audio("augmented_noise.wav")

In [None]:
IPython.display.Audio("augmented_stretch.wav")

In [None]:
IPython.display.Audio("augmented_pitch.wav")

In [None]:
IPython.display.Audio("augmented_gain.wav")

In [None]:
IPython.display.Audio("augmented_inverted.wav")