In [None]:
from google.colab import drive
drive.mount('/content/drive') #Mount Google Drive

Mounted at /content/drive


In [None]:
import os
import librosa
import soundfile as sf
!pip install audiomentations
from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift
from audiomentations.core.transforms_interface import BaseWaveformTransform
import numpy as np
import glob

tess_path = '/content/drive/MyDrive/TESS'  # Path to original TESS dataset
augmented_path = '/content/drive/MyDrive/TESS_augmented/'  # Path to save augmented files
os.makedirs(augmented_path, exist_ok=True)

sample_rate = 22050

# Adding Gaussian noise
augment = Compose([
    AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),  # Add noise with 50% probability
    TimeStretch(min_rate=0.8, max_rate=1.2, p=0.5),  # Time stretch change with 50% probability
    PitchShift(min_semitones=-2.0, max_semitones=2.0, p=0.5),  # Pitch shift with 50% probability
])


def augment_and_save(file_path, output_dir, sample_rate=22050):

    audio, sr = librosa.load(file_path, sr=sample_rate) #load audio

    augmented_audio = augment(samples=audio, sample_rate=sr) # Apply augmentations

    base_name = os.path.basename(file_path)
    output_file = os.path.join(output_dir, f"Aug_{os.path.splitext(base_name)[0]}.wav") # set output filename to Aug_original_.wav


    sf.write(output_file, augmented_audio, sr) # Save augmented audio

wav_files = glob.glob(os.path.join(tess_path, '**', '*.wav'), recursive=True)

print(f"Found {len(wav_files)} audio files in TESS dataset.")

for wav_file in wav_files:
    # Determine the emotion and speaker subfolder to preserve structure in augmented data
    relative_path = os.path.relpath(wav_file, tess_path)
    emotion_dir = os.path.dirname(relative_path)
    output_subdir = os.path.join(augmented_path, emotion_dir)
    os.makedirs(output_subdir, exist_ok=True)

    # Augment and save
    augment_and_save(wav_file, output_subdir, sample_rate)

print("Data augmentation complete! Augmented files saved to /MyDrive/TESS_augmented/")

Found 2775 audio files in TESS dataset.
Data augmentation complete! Augmented files saved to /MyDrive/TESS_augmented/


In [None]:
import shutil

original_path = '/content/drive/MyDrive/TESS/' # Path to original TESS dataset
augmented_path = '/content/drive/MyDrive/TESS_augmented/' # Path to save augmented files
combined_path = '/content/drive/MyDrive/TESS_combined/' #Path to combined new datset
os.makedirs(combined_path, exist_ok=True)

# Get list of label subdirectories from original dataset
subdirs = [d for d in os.listdir(original_path) if os.path.isdir(os.path.join(original_path, d))]

#Function to copy files from a source directory to a target directory
def copy_files(source_dir, target_dir):
    wav_files = glob.glob(os.path.join(source_dir, '*.wav'))
    for wav_file in wav_files:
        base_name = os.path.basename(wav_file)
        target_file = os.path.join(target_dir, base_name)
        shutil.copy2(wav_file, target_file)

total = 0
# Loop through each label subdirectory to combine original and augmented files
for subdir in subdirs:
    orig_sub = os.path.join(original_path, subdir)
    aug_sub = os.path.join(augmented_path, subdir)
    comb_sub = os.path.join(combined_path, subdir)
    os.makedirs(comb_sub, exist_ok=True)

#copy originals and augmenteds to combined subdir
    if os.path.exists(aug_sub):
        copy_files(orig_sub, comb_sub)
        copy_files(aug_sub, comb_sub)
        total += len(glob.glob(os.path.join(orig_sub, '*.wav'))) + len(glob.glob(os.path.join(aug_sub, '*.wav')))

print(f"Combined {total} files in {combined_path}")

Combined 5550 files in /content/drive/MyDrive/TESS_combined/
