In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import os

DATA_DIR = "/content/drive/MyDrive/DS340_Final_Project/Audio_Song_Actors_01-24"
OUTPUT_DIR = "/content/drive/MyDrive/DS340_Final_Project/augmented_spectrograms"

os.makedirs(OUTPUT_DIR, exist_ok=True)


In [None]:
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
from tqdm import tqdm
import global

In [None]:
def add_noise(y, noise_factor=0.02):
    """
    Add random white noise to an audio signal.
    noise_factor: controls the intensity of the noise.
    """
    noise = np.random.randn(len(y))
    augmented = y + noise_factor * noise
    return augmented

def pitch_shift(y, sr, n_steps=2):
    """
    Shift the pitch of the audio by n_steps semitones.
    Positive n_steps -> higher pitch, negative -> lower pitch.
    """
    return librosa.effects.pitch_shift(y, sr=sr, n_steps=2)

def time_stretch(y, rate=0.8):
    """
    Speed up (rate > 1) or slow down (rate < 1) the audio.
    """
    return librosa.effects.time_stretch(y, rate=0.8)


In [None]:
def make_and_save_spectrogram(y, sr, save_path):
    """
    Given an audio waveform (y) and sample rate (sr),
    generate and save a Mel-spectrogram image to 'save_path'.
    """
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    S_dB = librosa.power_to_db(S, ref=np.max)

    plt.figure(figsize=(3,3))
    librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel')
    plt.axis('off')

    plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
    plt.close()
    #save memory


In [None]:
def process_audio_file(file_path, output_dir,
                       do_noise=True, do_pitch=True, do_stretch=True):
    """
    Load an audio file, optionally apply various augmentations,
    generate spectrogram images, and save them.
    """

    y, sr = librosa.load(file_path, sr=None)


    base_name = os.path.splitext(os.path.basename(file_path))[0]

    orig_save_name = f"{base_name}_orig.png"
    orig_save_path = os.path.join(output_dir, orig_save_name)
    make_and_save_spectrogram(y, sr, orig_save_path)

    if do_noise:
        y_noise = add_noise(y, noise_factor=0.5) #choose noise factor
        noise_save_name = f"{base_name}_noise.png"
        noise_save_path = os.path.join(output_dir, noise_save_name)
        make_and_save_spectrogram(y_noise, sr, noise_save_path)

    if do_pitch:
        y_shifted = pitch_shift(y, sr, n_steps=2) #choose pitch level
        pitch_save_name = f"{base_name}_pitch.png"
        pitch_save_path = os.path.join(output_dir, pitch_save_name)
        make_and_save_spectrogram(y_shifted, sr, pitch_save_path)

    if do_stretch:
        y_stretched = time_stretch(y, rate=0.8) #choose pace rate
        stretch_save_name = f"{base_name}_stretch.png"
        stretch_save_path = os.path.join(output_dir, stretch_save_name)
        make_and_save_spectrogram(y_stretched, sr, stretch_save_path)


In [None]:
import glob


all_wav_files = glob.glob(os.path.join(DATA_DIR, '**', '*.wav'), recursive=True)

print(f"Found {len(all_wav_files)} .wav files to process.")

for file_path in all_wav_files:
    process_audio_file(file_path, OUTPUT_DIR)


Found 1012 .wav files to process.
