In [2]:
from os import listdir
import pandas as pd

# Tess Data Set

In [3]:
# Get a list of all the audio files in the data set
folders = listdir('../Data/tess')

af = []
for folder in folders:
    temp = listdir('../Data/tess/'+folder)
    af += temp

# Ryerson data set

In [32]:
# Get a list of all the audio files in the data set
folders = listdir('../Data/ryerson')

af = []
for folder in folders:
    temp = listdir('../Data/ryerson/'+folder)
    af += temp

In [33]:
# create dictionaries for recoding the data
emotion_dict = {
    '01': 'neutral',
    '02': 'calm', # eliminate
    '03': 'happy',
    '04': 'sad',
    '05': 'angry',
    '06': 'fearful',
    '07': 'disgust',
    '08': 'surprised' # eliminate
}

intensity_dict = {
    '01': 'medium',
    '02': 'high'
}

statement_dict = {
    '01': 'Kids are talking by the door',
    '02': 'Dogs are sitting by the door'
}

# get meta data from the file name
emotion = [file[6:8] for file in af]
intensity = [file[9:11] for file in af]
statement = [file[12:14] for file in af]
repitition = [file[15:17] for file in af]
actor = [file[18:20] for file in af]

# replace numbers with strings
emotion = [emotion_dict[num] for num in emotion]
intensity = [intensity_dict[num] for num in intensity]
statement = [statement_dict[num] for num in statement]

# create a data dictionary
data = {
    'file': af,
    'emotion': emotion,
    'intensity': intensity,
    'statement': statement,
    'repitition': repitition,
    'actor': actor
}

# create data frame
df = pd.DataFrame(data = data)

# export csv
df.to_csv('ryerson_meta_all.csv', index = False)

# subset the data to common emotions, first repition, and high intensity
# keep only samples for the listed emotions
emot_samp = ['neutral', 'sad', 'disgust', 'happy', 'angry', 'fearful']
df = df[df['emotion'].isin(emot_samp)]
# keep only the first repitition
df = df[df['repitition'] == '01']
# recode neutral to high intensity
df.loc[df['emotion'] == 'neutral', 'intensity'] = 'high'
# drop everything that isn't high intensity
df = df[df['intensity'] == 'high']

df.to_csv('ryerson_meta_sub.csv', index = False)

# CREMA-D Data Set

In [73]:
# list files in the data set
af = listdir('../Data/crema-d/AudioWAV')

# Get meta data from file names
actor_id = [file[0:4] for file in af]
statement = [file[5:8] for file in af]
emotion = [file[9:12] for file in af]
intensity = [file[13:15] for file in af]

# Create dicitonaries to recode data
statement_dict = {
    'IEO': "It's eleven o'clock",
    'TIE': "That is exactly what happened",
    'IOM': "I'm on my way to the meeting",
    'IWW': "I wonder what this is about",
    'TAI': "The airplane is almost full",
    'MTI': "Maybe tomorrow it will be cold",
    'IWL': "I would like a new alarm clock",
    'ITH': "I think I have a doctor's appointment",
    'DFA': "Don't forget a jacket",
    'ITS': "I think I've seen this before",
    'TSI': "The surface is slick",
    'WSI': "We'll stop in a couple of minutes",
}

emotion_dict = {
    'ANG': 'anger',
    'DIS': 'disgust',
    'FEA': 'fearful',
    'HAP': 'happy',
    'NEU': 'neutral',
    'SAD': 'sad'
}

intensity_dict = {
    'LO': 'low',
    'MD': 'medium',
    'HI': 'high',
    'XX': 'unspecified',
    'X.': 'unspecified'
}

# replace numbers with strings
emotion = [emotion_dict[num] for num in emotion]
intensity = [intensity_dict[num] for num in intensity]
statement = [statement_dict[num] for num in statement]

# create a data dictionary
data = {
    'file': af,
    'emotion': emotion,
    'intensity': intensity,
    'statement': statement,
    'actor': actor_id
}

# create data frame
df = pd.DataFrame(data = data)

# export csv
df.to_csv('cremad_meta_all.csv', index = False)

# drop low and medium intensity so that each neutral statement per actor only has one counterpart per emotion
df = df[df['intensity'].isin(['high', 'unspecified'])]
df.to_csv('cremad_meta_sub.csv', index = False)