In [22]:
from data_loader import data_loader
from pathlib import Path
import pandas as pd
from tqdm import tqdm
import os
import librosa
import numpy as np
import soundfile as sf
import random
from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift, Shift, Gain, PolarityInversion, TimeMask, FrequencyMask, SpecFrequencyMask, RoomSimulator, AddBackgroundNoise
from IPython.display import Audio

In [23]:
timemask = TimeMask(min_band_part=0.1, max_band_part=1, p=1)
freqmask = FrequencyMask(min_frequency_band=0.1, max_frequency_band=1, p=1)

def noise(data):
    noise_amp = 0.035*np.random.uniform()*np.amax(data)
    data = data + noise_amp*np.random.normal(size=data.shape[0])
    return data

def shift(data):
    shift_range = int(np.random.uniform(low=-5, high = 5)*1000)
    return np.roll(data, shift_range)

def pitch(data, pitch_factor=0.7):
    return librosa.effects.pitch_shift(data, 16000, pitch_factor)

def spec_augment(spec: np.ndarray, num_mask=2, 
                 freq_masking_max_percentage=0.15, time_masking_max_percentage=0.2):

    spec = spec.copy()
    for i in range(num_mask):
        all_frames_num, all_freqs_num = spec.shape
        freq_percentage = random.uniform(0.0, freq_masking_max_percentage)
        
        num_freqs_to_mask = int(freq_percentage * all_freqs_num)
        f0 = np.random.uniform(low=0.0, high=all_freqs_num - num_freqs_to_mask)
        f0 = int(f0)
        spec[:, f0:f0 + num_freqs_to_mask] = 0

        time_percentage = random.uniform(0.0, time_masking_max_percentage)
        
        num_frames_to_mask = int(time_percentage * all_frames_num)
        t0 = np.random.uniform(low=0.0, high=all_frames_num - num_frames_to_mask)
        t0 = int(t0)
        spec[t0:t0 + num_frames_to_mask, :] = 0
    
    return spec


  freqmask = FrequencyMask(min_frequency_band=0.1, max_frequency_band=1, p=1)


In [24]:
dl = data_loader(
  '../Datasets/Crema/',
  '../Datasets/Ravdess/',
  '../Datasets/Savee/',
  '../Datasets/Tess/'
)

In [25]:
tess_df = dl.get_crema_df()
dir = '../Datasets/custom_db/crema'
# Path(dir).mkdir(parents=True, exist_ok=True)
tess_df.shape

(7442, 2)

In [26]:
train, val, test = dl.split_df(tess_df, ratio_train=0.7, ratio_val=0.10, ratio_test=0.20)

In [27]:
train['Emotion'].value_counts(), val['Emotion'].value_counts(), test['Emotion'].value_counts()

(sad        890
 angry      890
 disgust    890
 fear       889
 happy      889
 neutral    760
 Name: Emotion, dtype: int64,
 fear       128
 disgust    127
 angry      127
 sad        127
 happy      127
 neutral    109
 Name: Emotion, dtype: int64,
 happy      255
 disgust    254
 angry      254
 fear       254
 sad        254
 neutral    218
 Name: Emotion, dtype: int64)

In [28]:
def move_files(df: pd.DataFrame, folder):
    dest_folder = dir + '/' + folder + '/'
    Path(dest_folder).mkdir(parents=True, exist_ok=True)
    for i, row in tqdm(df.iterrows(), total=df.shape[0]):
        data, sr = librosa.load(row['File_Path'])
        data, _ = librosa.effects.trim(data, top_db=25)
        if sr != 16000:
            data = librosa.resample(data, orig_sr=sr, target_sr=16000)
        # src = os.path.abspath(row['File_Path'])
        dest = os.path.abspath(dest_folder + row['Emotion'] + '_' + str(i) + '.wav')
        sf.write(dest, data, 16000)
        # os.system(f'cp {src} {dest}')

In [29]:
move_files(train, 'train')
move_files(val, 'val')
move_files(test, 'test')

100%|██████████| 5208/5208 [12:49<00:00,  6.76it/s]
100%|██████████| 745/745 [01:50<00:00,  6.76it/s]
100%|██████████| 1489/1489 [03:48<00:00,  6.51it/s]


## Augmentation

In [30]:
audio = []
emo = []

for dirs, _, files in os.walk(dir + '/train'):
    for file in files:
        path = os.path.join(dirs, file)
        audio.append(path)
        emo.append(file.split('_')[0])

aug_df = pd.DataFrame({'File_Path': audio, 'Emotion': emo})
aug_df

Unnamed: 0,File_Path,Emotion
0,../Datasets/custom_db/crema/train/happy_3199.wav,happy
1,../Datasets/custom_db/crema/train/sad_2089.wav,sad
2,../Datasets/custom_db/crema/train/disgust_4755...,disgust
3,../Datasets/custom_db/crema/train/happy_356.wav,happy
4,../Datasets/custom_db/crema/train/fear_248.wav,fear
...,...,...
5203,../Datasets/custom_db/crema/train/sad_1230.wav,sad
5204,../Datasets/custom_db/crema/train/happy_1079.wav,happy
5205,../Datasets/custom_db/crema/train/sad_6397.wav,sad
5206,../Datasets/custom_db/crema/train/neutral_5964...,neutral


In [31]:
Path(dir + '/augment').mkdir(parents=True, exist_ok=True)

for i, row in tqdm(aug_df.iterrows(), total=aug_df.shape[0]):
    data, sr = librosa.load(row['File_Path'], sr=None)

    if sr != 16000:
        Audio(data, rate=sr)
        raise Exception(f'Sample rate is {sr} Hz of file: ' + row['File_Path'])

    data_noise = noise(data)
    sf.write(dir + '/augment' + '/' + row['Emotion'] + '_' + str(i) + '_' + 'noise', data_noise, sr, format='wav')

    data_ts = shift(data)
    sf.write(dir + '/augment' + '/' + row['Emotion'] + '_' + str(i) + '_' + 'timeshift', data_ts, sr, format='wav')

    data_ps = pitch(data)
    sf.write(dir + '/augment' + '/' + row['Emotion'] + '_' + str(i) + '_' + 'pitchshift', data_ps, sr, format='wav')

    try:
        data_tm = timemask(data, sr)
        sf.write(dir + '/augment' + '/' + row['Emotion'] + '_' + str(i) + '_' + 'timemask', data_tm, sr, format='wav')
    except Exception as e:
        print('Error while applying timemask of', row['File_Path'], 'Error:', e)

    try:
        data_fm = freqmask(data, sr)
        sf.write(dir + '/augment' + '/' + row['Emotion'] + '_' + str(i) + '_' + 'freqmask', data_fm, sr, format='wav')
    except Exception as e:
        print('Error while applying freqmask of', row['File_Path'], 'Error:', e)



  return librosa.effects.pitch_shift(data, 16000, pitch_factor)
 15%|█▍        | 758/5208 [04:01<22:17,  3.33it/s]

Error while applying freqmask of ../Datasets/custom_db/crema/train/disgust_542.wav Error: empty range for randrange() (16, 4, -12)


 18%|█▊        | 961/5208 [05:08<21:06,  3.35it/s]

Error while applying freqmask of ../Datasets/custom_db/crema/train/neutral_95.wav Error: empty range for randrange() (16, 7, -9)


 25%|██▍       | 1276/5208 [06:48<21:43,  3.02it/s]

Error while applying freqmask of ../Datasets/custom_db/crema/train/fear_3688.wav Error: empty range for randrange() (16, 10, -6)


 27%|██▋       | 1387/5208 [07:23<18:26,  3.45it/s]

Error while applying freqmask of ../Datasets/custom_db/crema/train/neutral_6978.wav Error: empty range for randrange() (16, 10, -6)


 29%|██▉       | 1508/5208 [08:02<18:47,  3.28it/s]

Error while applying freqmask of ../Datasets/custom_db/crema/train/sad_5888.wav Error: empty range for randrange() (16, 1, -15)


 34%|███▍      | 1769/5208 [09:28<17:20,  3.31it/s]

Error while applying freqmask of ../Datasets/custom_db/crema/train/fear_1974.wav Error: empty range for randrange() (16, 16, 0)


 41%|████      | 2122/5208 [11:23<15:47,  3.26it/s]

Error while applying freqmask of ../Datasets/custom_db/crema/train/happy_1667.wav Error: empty range for randrange() (16, 3, -13)


 47%|████▋     | 2462/5208 [13:13<13:54,  3.29it/s]

Error while applying freqmask of ../Datasets/custom_db/crema/train/disgust_2968.wav Error: empty range for randrange() (16, 13, -3)


 52%|█████▏    | 2695/5208 [14:29<12:43,  3.29it/s]

Error while applying freqmask of ../Datasets/custom_db/crema/train/sad_5390.wav Error: empty range for randrange() (16, 1, -15)


 68%|██████▊   | 3531/5208 [19:03<08:47,  3.18it/s]

Error while applying freqmask of ../Datasets/custom_db/crema/train/angry_4610.wav Error: empty range for randrange() (16, 4, -12)


 75%|███████▍  | 3887/5208 [20:59<06:38,  3.31it/s]

Error while applying freqmask of ../Datasets/custom_db/crema/train/sad_2401.wav Error: empty range for randrange() (16, 7, -9)


 80%|███████▉  | 4150/5208 [22:25<05:27,  3.23it/s]

Error while applying freqmask of ../Datasets/custom_db/crema/train/neutral_676.wav Error: empty range for randrange() (16, 12, -4)


100%|██████████| 5208/5208 [28:05<00:00,  3.09it/s]
