# Training on emotions from audio recordings

In [None]:
import re
import os
import sys
import librosa
import random
import numpy as np
import pandas as pd

# Extracting features

## Extracting features from public datasets

* RAVDESS
* * 1440 files; 24 actors(12 males, 12 females);
* TESS
* * "2800 files from TESS. A set of 200 target words were spoken in the carrier phrase "Say the word _____' by two actresses (aged 26 and 64 years) and recordings were made of the set portraying each of seven emotions (anger, disgust, fear, happiness, pleasant surprise, sadness, and neutral). There are 2800 stimuli in total. Two actresses were recruited from the Toronto area. Both actresses speak English as their first language, are university educated, and have musical training. Audiometric testing indicated that both actresses have thresholds within the normal range"
* CREMA-D
* * "7,442 original clips from 91 actors. These clips were from 48 male and 43 female actors between the ages of 20 and 74 coming from a variety of races and ethnicities (African America, Asian, Caucasian, Hispanic, and Unspecified). Actors spoke from a selection of 12 sentences. The sentences were presented using one of six different emotions (Anger, Disgust, Fear, Happy, Neutral, and Sad) and four different emotion levels (Low, Medium, High, and Unspecified)."
* SAVEE
* * 4 speakers; all males; anger, disgust, fear, happiness, sadness and surprise

In [82]:
def create_librosa_features(dir_path, return_names=False, prefix='features_'):
    '''Creates features matrix (76 features x N time windows) for each audio file in the directory path\
    Input: 
    dir_path: Path of directory containing audio files
    
    Output: None
    Feature files are written to the same directory path'''
    files = [
        i for i in os.listdir(dir_path)
        if os.path.isfile(os.path.join(dir_path, i)) & i.endswith('.wav')
    ]
    ii = 1
    feature_names = ['mfcc' + str(i) for i in np.arange(40)] + [
                     'delta_mfcc' + str(i) for i in np.arange(40)
                ] + ['delta_delta_mfcc' + str(i) for i in np.arange(40)] + [
                     'poly' + str(i) for i in np.arange(11)] + [
                     'spectral_bw', 'spectral_centroid'
                ] + ['spectral_cnst' + str(i) for i in np.arange(7)
                ] + ['spectral_flat', 'spectral_rolloff', 'rms', 'zcr']

    for recordingname in files:
        if ii % 100 == 0:
            print(ii)
        filename_read = os.path.join(dir_path, recordingname)
        y, sr = librosa.load(filename_read)
        mfcc = librosa.feature.mfcc(y=y, n_mfcc=40)
        delta_mfcc = librosa.feature.delta(mfcc)
        features = np.vstack([
            mfcc,  #40x
            delta_mfcc,  #40x  
            librosa.feature.delta(delta_mfcc),  #40x
            librosa.feature.poly_features(y=y, order=10),  #11x
            librosa.feature.spectral_bandwidth(y=y),  #1x
            librosa.feature.spectral_centroid(y=y),  #1x
            librosa.feature.spectral_contrast(y=y),  #7x
            librosa.feature.spectral_flatness(y=y),  #1
            librosa.feature.spectral_rolloff(y=y),  #1
            librosa.feature.rms(y=y),  #1
            librosa.feature.zero_crossing_rate(y=y),  #1x
        ]),
        ii += 1
        np.savetxt(os.path.join(dir_path,
                                prefix + recordingname + '.csv.gz'),
                   features[0],
                   delimiter=',')
        os.remove(filename_read)
    if return_names:
        return feature_names

In [None]:
feature_names = create_librosa_features('../data/SAVEE/', return_names=True)
create_librosa_features('../data/TESS')
create_librosa_features('../data/RAVDESS')
create_librosa_features('../data/CREMA_D')

## Metadata

Data: https://github.com/marcogdepinto/emotion-classification-from-audio-files
1440 samples, 24 people (50/50 M:F) at different intensities

In [65]:
metadata_ravdess = pd.read_csv(
    '../data/ravdess_metadata.csv')

In [66]:
emotion_dict = {
    'ANG': 'angry',
    'HAP': 'happy',
    'SAD': 'sad',
    'NEU': 'neutral',
    'DIS': 'disgust',
    'FEA': 'fearful',
    'CAL': 'calm',
    'SUR': 'surprise',
    'ps': 'surprise',
    'fear': 'fearful',
    'f': 'fearful',
    'h': 'happy',
    'n': 'neutral',
    'sa': 'sad',
    'su': 'surprise',
    'a': 'angry',
    'd': 'disgust'
}

### RAVDESS

In [67]:
# read metadata files
metadata_ravdess = pd.read_csv(
    '../data/ravdess_metadata.csv')
# add sex information
metadata_ravdess['Sex'] = metadata_ravdess.Filename.apply(
    lambda x: "Female"
    if int(x.split('-')[-1].split('.')[0]) % 2 == 0 else "Male")
# add filename as feature
metadata_ravdess['feature_filename'] = metadata_ravdess['Filename'].apply(
    lambda x: 'features0622_' + x + '.csv.gz')
# add path where file exists
metadata_ravdess[
    'directory'] = '../data/RAVDESS/'
# lowercase emotion labesl
metadata_ravdess.Emotion = [i.lower() for i in metadata_ravdess.Emotion]

### CREMA_D 

In [73]:
# read file
metadata_crema = pd.read_csv(
    '../data/crema_metadata.csv')

# list of audio files in dataset
files = os.listdir('../datasets/CREMA_D/')
Actor = []
Statement = []
Emotion = []
Intensity = []

# Intensity of expression
intensity_dict = {
    'LO': 'Low',
    'MD': 'Normal',
    'HI': 'Strong',
    'XX': 'Unspecified',
    'X': 'Unspecified',
}

for i in files:
    split_i = i.split('_')
    _, pid, sentence, mood, ity = split_i
    Actor += [pid]
    Statement += [sentence]
    Emotion += [emotion_dict[mood]]
    Intensity += [intensity_dict[ity.split('.')[0]]]

metadata_crema2 = pd.DataFrame(
    data={
        'Actor': Actor,
        'Statement': Statement,
        'Emotion': Emotion,
        'Intensity': Intensity,
        'feature_filename': files
    })
metadata_crema2['directory'] = '../datasets/CREMA_D/'

metadata_crema['ActorID'] = pd.Series(metadata_crema['ActorID'], dtype=str)
metadata_crema = metadata_crema2.merge(metadata_crema,
                                       left_on='Actor',
                                       right_on='ActorID')

### SAVEE 

In [68]:
Actor = []
Emotion = []
Statement = []
dir_path = '../data/SAVEE/'
files = os.listdir(dir_path)
for i in files:
    split_i = i.split('_')[1:]
    Actor += [split_i[0]]
    Emotion +=[emotion_dict[re.sub('\d+','',split_i[1].split('.')[0])]]
    Statement +=[re.sub('[a-zA-Z]','',split_i[1].split('.')[0])]

metadata_savee = pd.DataFrame(data={'Actor':Actor,
                                   'Emotion':Emotion,
                                   'directory':dir_path,
                                    'Statement': Statement,
                                   'feature_filename':files})
metadata_savee['Sex']='Male'

### TESS

In [72]:
## TESS Dataset
Emotion = []
dir_path = '../data/TESS/'
Actor = []
Statement = []
files = [i for i in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, i))]
for i in files:
    split_i = i.split('_')
    Statement += [split_i[2]]
    Actor += [split_i[1]]
    split_i = split_i[3].split('.')[0]
    if split_i in emotion_dict:
        split_i = emotion_dict[split_i]
    Emotion +=[split_i]

metadata_tess = pd.DataFrame(data={'Actor':Actor,
                                   'Emotion':Emotion,
                                   'Statement': Statement,
                                   'directory':dir_path,
                                   'feature_filename':files})
metadata_tess['Sex'] = 'Female'

In [74]:
# combine metadata objects
df_metadata = pd.concat([metadata_crema, metadata_ravdess, metadata_tess, metadata_savee])
df_metadata = df_metadata.drop(columns=['Filename','Modality','Channel','Repetition', 'ActorID'])

In [76]:
# write CSV
df_metadata.to_csv('../data/trainingdata_metadata0622.csv', index=False)

In [None]:
# read metadata CSV for 4 training datasets
# df_metadata = pd.read_csv('../data/trainingdata_metadata0622.csv')

# Loading arrays 

In [77]:
def create_summary_matrix(df,
                          feature_names,
                          feature_filename='feature_filename',
                          directory='directory'):
    '''Returns 2-D summary statistics matrix from 3-D features
    df: dataframe containing filenames for the 3-D features matrices
    feature_filename: df column name where 3-D features filenames are stored
    directory: df column name for the directory path where 3-D features files are stored
    
    merged_df: returned 2-D matrix with Samples on 1-dim and summary features on 2-dim'''
    feature_size = 0
    median_arr = []  #np.zeros(56)
    max_arr = []  #np.zeros(56)
    min_arr = []  #np.zeros(56)
    var_arr = []  #np.zeros(56)
    fnames = []
    ii = 0
    for index, row in df_metadata.iterrows():
        if ii % 3000 == 0:
            print(ii)
        fnames += [row['feature_filename']]
        recordingname = row['directory'] + row['feature_filename']
        x = np.loadtxt(recordingname, delimiter=',')
        if ii == 0:
            feature_size = x.shape[0]
            median_arr = np.zeros(feature_size)
            max_arr = np.zeros(feature_size)
            min_arr = np.zeros(feature_size)
            var_arr = np.zeros(feature_size)

        median_arr = np.vstack([median_arr, np.median(x, axis=1)])
        min_arr = np.vstack([min_arr, np.min(x, axis=1)])
        max_arr = np.vstack([max_arr, np.max(x, axis=1)])
        var_arr = np.vstack([var_arr, np.var(x, axis=1)])
        ii += 1

    median_arr = median_arr[1:, :]
    min_arr = min_arr[1:, :]
    max_arr = max_arr[1:, :]
    var_arr = var_arr[1:, :]

    merged_df = pd.DataFrame(np.hstack([median_arr, min_arr, max_arr,
                                        var_arr]))

    all_feature_names = []

    for j in ['median', 'min', 'max', 'var']:
        all_feature_names += [str.join('_', [j, i]) for i in feature_names]

    assert (len(all_feature_names) == (feature_size * 4))
    merged_df.columns = all_feature_names
    merged_df[feature_filename] = fnames
    return merged_df

In [78]:
# ordered feature names
merged_df = create_summary_matrix(df_metadata, feature_names)

0
3000
6000
9000
12000


In [79]:
# adding metadata to summary features
training_sum_df = merged_df.merge(df_metadata, left_on='feature_filename',right_on='feature_filename')

In [80]:
training_sum_df.to_csv('../data/training_summary_data0622.csv', index=False)