In [1]:
import os
from tqdm import tqdm
import numpy as np
import librosa
import soundfile as sf
import matplotlib.pyplot as plt
import pandas as pd
from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift, Shift, Gain, PolarityInversion, TimeMask, FrequencyMask, SpecFrequencyMask, RoomSimulator, AddBackgroundNoise, Trim, BandStopFilter, Padding

In [9]:
general_aug = Compose([
    AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.005, p=.5),
    TimeStretch(min_rate=0.8, max_rate=1.25, p=.5),
    PitchShift(min_semitones=-3, max_semitones=3, p=.5),
    Shift(min_fraction=-0.3, max_fraction=0.3, p=.5),
], shuffle=True)

spec_aug = Compose([
    TimeMask(min_band_part=0.1, max_band_part=.2, p=.5),
    # FrequencyMask(min_frequency_band=0.4, max_frequency_band=0.8, p=1),
    BandStopFilter(p=1)
])

In [3]:
files = []
emo = []

for dirpath, dirnames, filenames in os.walk('../Datasets/combined/train/'):
    for filename in tqdm(filenames):

        splits = filename.split('-')

        sound = os.path.join(dirpath, filename)

        emotion = int(splits[2])

        files.append(sound)
        emo.append(emotion)
    
df = pd.DataFrame({'sound': files, 'emotion': emo})
    

100%|██████████| 864/864 [00:00<00:00, 284493.54it/s]


In [4]:
df.sample(10)

Unnamed: 0,sound,emotion
56,../Datasets/combined/train/03-01-01-01-02-02-2...,1
3,../Datasets/combined/train/03-01-01-01-01-01-0...,1
75,../Datasets/combined/train/03-01-02-01-01-02-0...,2
552,../Datasets/combined/train/03-01-06-01-02-01-1...,6
288,../Datasets/combined/train/03-01-04-01-01-01-0...,4
814,../Datasets/combined/train/03-01-08-02-01-01-1...,8
108,../Datasets/combined/train/03-01-02-01-02-02-1...,2
803,../Datasets/combined/train/03-01-08-01-02-02-1...,8
849,../Datasets/combined/train/03-01-08-02-02-01-2...,8
421,../Datasets/combined/train/03-01-05-01-01-02-1...,5


In [10]:
for _, row in tqdm(df.iterrows()):
    data, sr = librosa.load(row['sound'], sr=48000)
    data_aug_gen = general_aug(samples=data, sample_rate=sr)
    data_aug_spec = spec_aug(samples=data, sample_rate=sr)

    y = row['sound'].split('/')[-1]

    sf.write('../Datasets/combined/train_gen_aug/' + y, data_aug_gen, sr)
    sf.write('../Datasets/combined/train_spec_aug/' + y, data_aug_spec, sr)

864it [04:17,  3.35it/s]
