In [1]:
import pandas as pd
import numpy as np

import os
import sys

import plotly.express as px

import warnings
if not sys.warnoptions:
    warnings.simplefilter("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning) 

In [2]:
RAVDESS_DIR_PATH = "./data/Ravdess/audio_speech_actors_01-24/"
CREMA_DIR_PATH = "./data/Crema/"
TESS_DIR_PATH = "./data/Tess/"
SAVEE_DIR_PATH = "./data/Savee/"

##### Ravdess Dataframe

This portion of the RAVDESS contains 1440 files: 60 trials per actor x 24 actors = 1440. The RAVDESS contains 24 professional actors (12 female, 12 male), vocalizing two lexically-matched statements in a neutral North American accent. Speech emotions includes calm, happy, sad, angry, fearful, surprise, and disgust expressions. Each expression is produced at two levels of emotional intensity (normal, strong), with an additional neutral expression.

File naming convention

Each of the 1440 files has a unique filename. The filename consists of a 7-part numerical identifier (e.g., 03-01-06-01-02-01-12.wav). These identifiers define the stimulus characteristics:

Filename identifiers as per the official RAVDESS website:

* Modality (01 = full-AV, 02 = video-only, 03 = audio-only).
* Vocal channel (01 = speech, 02 = song).
* Emotion (01 = neutral, 02 = calm, 03 = happy, 04 = sad, 05 = angry, 06 = fearful, 07 = disgust, 08 = surprised).
* Emotional intensity (01 = normal, 02 = strong). NOTE: There is no strong intensity for the 'neutral' emotion.
* Statement (01 = "Kids are talking by the door", 02 = "Dogs are sitting by the door").
* Repetition (01 = 1st repetition, 02 = 2nd repetition).
* Actor (01 to 24. Odd numbered actors are male, even numbered actors are female).

In [3]:
ravdess_directory_list = os.listdir(RAVDESS_DIR_PATH)

file_emotion = []
file_path = []
for dir in ravdess_directory_list:
    # as their are 20 different actors in our previous directory we need to extract files for each actor.
    actor = os.listdir(RAVDESS_DIR_PATH + dir)
    for file in actor:
        part = file.split('.')[0]
        part = part.split('-')
        # third part in each file represents the emotion associated to that file.
        file_emotion.append(int(part[2]))
        file_path.append(RAVDESS_DIR_PATH + dir + '/' + file)
        
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
Ravdess_df = pd.concat([emotion_df, path_df], axis=1)

# changing integers to actual emotions.
Ravdess_df.Emotions.replace({1:'neutral', 2:'calm', 3:'happy', 4:'sad', 5:'angry', 6:'fear', 7:'disgust', 8:'surprise'}, inplace=True)
Ravdess_df.head()

Unnamed: 0,Emotions,Path
0,neutral,./data/Ravdess/audio_speech_actors_01-24/Actor...
1,neutral,./data/Ravdess/audio_speech_actors_01-24/Actor...
2,neutral,./data/Ravdess/audio_speech_actors_01-24/Actor...
3,neutral,./data/Ravdess/audio_speech_actors_01-24/Actor...
4,calm,./data/Ravdess/audio_speech_actors_01-24/Actor...


#### Crema

CREMA-D is a data set of 7,442 original clips from 91 actors. These clips were from 48 male and 43 female actors between the ages of 20 and 74 coming from a variety of races and ethnicities (African America, Asian, Caucasian, Hispanic, and Unspecified).

Actors spoke from a selection of 12 sentences. The sentences were presented using one of six different emotions (Anger, Disgust, Fear, Happy, Neutral and Sad) and four different emotion levels (Low, Medium, High and Unspecified).

Filename identifiers:

* the second value in the file path separated by underscore contains the emotion we're looking for

In [4]:
crema_directory_list = os.listdir(CREMA_DIR_PATH)

file_emotion = []
file_path = []

for file in crema_directory_list:
    # storing file paths
    file_path.append(CREMA_DIR_PATH + file)
    # storing file emotions
    part=file.split('_')
    if part[2] == 'SAD':
        file_emotion.append('sad')
    elif part[2] == 'ANG':
        file_emotion.append('angry')
    elif part[2] == 'DIS':
        file_emotion.append('disgust')
    elif part[2] == 'FEA':
        file_emotion.append('fear')
    elif part[2] == 'HAP':
        file_emotion.append('happy')
    elif part[2] == 'NEU':
        file_emotion.append('neutral')
    else:
        file_emotion.append('Unknown')
        
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
Crema_df = pd.concat([emotion_df, path_df], axis=1)
Crema_df.head()

Unnamed: 0,Emotions,Path
0,angry,./data/Crema/1001_DFA_ANG_XX.wav
1,disgust,./data/Crema/1001_DFA_DIS_XX.wav
2,fear,./data/Crema/1001_DFA_FEA_XX.wav
3,happy,./data/Crema/1001_DFA_HAP_XX.wav
4,neutral,./data/Crema/1001_DFA_NEU_XX.wav


#### Tess

There are a set of 200 target words were spoken in the carrier phrase "Say the word _' by two actresses (aged 26 and 64 years) and recordings were made of the set portraying each of seven emotions (anger, disgust, fear, happiness, pleasant surprise, sadness, and neutral). There are 2800 data points (audio files) in total.

* each dir contains audio files for each emotion.
* emotion is the third value in the file path separated by underscore.

In [5]:
tess_directory_list = os.listdir(TESS_DIR_PATH)

file_emotion = []
file_path = []

for dir in tess_directory_list:
    directories = os.listdir(TESS_DIR_PATH + dir)
    for file in directories:
        part = file.split('.')[0]
        part = part.split('_')[2]
        if part=='ps':
            file_emotion.append('surprise')
        else:
            file_emotion.append(part)
        file_path.append(TESS_DIR_PATH + dir + '/' + file)
        
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
Tess_df = pd.concat([emotion_df, path_df], axis=1)
Tess_df.head()

Unnamed: 0,Emotions,Path
0,angry,./data/Tess/OAF_angry/OAF_back_angry.wav
1,angry,./data/Tess/OAF_angry/OAF_bar_angry.wav
2,angry,./data/Tess/OAF_angry/OAF_base_angry.wav
3,angry,./data/Tess/OAF_angry/OAF_bath_angry.wav
4,angry,./data/Tess/OAF_angry/OAF_bean_angry.wav


#### Savee

This results in a total of 120 utterances per speaker, for example:

Common: She had your dark suit in greasy wash water all year.<br>
Anger: Who authorized the unlimited expense account?<br>
Disgust: Please take this dirty table cloth to the cleaners for me.<br>
Fear: Call an ambulance for medical assistance.<br>
Happiness: Those musicians harmonize marvelously.<br>
Sadness: The prospect of cutting back spending is an unpleasant one for any governor.<br>
Surprise: The carpet cleaners shampooed our oriental rug.<br>
Neutral: The best way to learn is to solve extra problems.<br>

In [6]:
savee_directory_list = os.listdir(SAVEE_DIR_PATH)

file_emotion = []
file_path = []

for file in savee_directory_list:
    file_path.append(SAVEE_DIR_PATH + file)
    part = file.split('_')[1]
    ele = part[:-6]
    if ele=='a':
        file_emotion.append('angry')
    elif ele=='d':
        file_emotion.append('disgust')
    elif ele=='f':
        file_emotion.append('fear')
    elif ele=='h':
        file_emotion.append('happy')
    elif ele=='n':
        file_emotion.append('neutral')
    elif ele=='sa':
        file_emotion.append('sad')
    else:
        file_emotion.append('surprise')
        
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
Savee_df = pd.concat([emotion_df, path_df], axis=1)
Savee_df.head()

Unnamed: 0,Emotions,Path
0,angry,./data/Savee/DC_a01.wav
1,angry,./data/Savee/DC_a02.wav
2,angry,./data/Savee/DC_a03.wav
3,angry,./data/Savee/DC_a04.wav
4,angry,./data/Savee/DC_a05.wav


##### Preparation 
Prepare the final dataset for analysis and preprocessing

In [7]:
# creating Dataframe using all the 4 dataframes we created so far.
dataset = pd.concat([Ravdess_df, Crema_df, Tess_df, Savee_df], axis = 0)
dataset.to_csv("./data/dataset.csv",index=False)
dataset.head()

Unnamed: 0,Emotions,Path
0,neutral,./data/Ravdess/audio_speech_actors_01-24/Actor...
1,neutral,./data/Ravdess/audio_speech_actors_01-24/Actor...
2,neutral,./data/Ravdess/audio_speech_actors_01-24/Actor...
3,neutral,./data/Ravdess/audio_speech_actors_01-24/Actor...
4,calm,./data/Ravdess/audio_speech_actors_01-24/Actor...
