# 1. Imports

In [6]:
import os
import pandas as pd
import numpy as np

# 2. Data Collection

In [7]:
RAVDESS = "./data/ravdess-emotional-speech-audio/"
CREMA = "./data/cremad/"
TESS = "./data/toronto-emotional-speech-set-tess/"
SAVEE = "./data/surrey-audiovisual-expressed-emotion-savee/"

## 2.1. Ravdess dataframe

In [8]:
ravdess_actors_list = os.listdir(RAVDESS)
audio_emotion = []
audio_path = []
audio_gender = []
audio_intensity = []

for file in ravdess_actors_list:
    actor = os.listdir(os.path.join(RAVDESS,file))
    for audio in actor:
        list_audio = audio.split('.')[0] # splitting by the '.' into '**-**-**' . 'wav' and grab the first element
        list_audio = list_audio.split('-') # splitting by the dash
        gender_code = int(list_audio[6])
        
        audio_emotion.append(int(list_audio[2])) # the third element describes the emotion class
        audio_gender.append('female' if gender_code & 1 == 0 else 'male') # the sixth element describes the voice gender 
        audio_intensity.append('normal' if int(list_audio[3]) == 1 else 'high')
        audio_path.append(os.path.join(RAVDESS,file,audio))

ravdess_df = pd.DataFrame({'emotions': audio_emotion, 'genders': audio_gender, 'audio_intensity': audio_intensity, 'audio_path': audio_path,})

# Mapping the values to emotions
emotion_dict = {
    1: 'neutral',
    2: 'calm',
    3: 'happy',
    4: 'sad',
    5: 'angry',
    6: 'fearful',
    7: 'disgust',
    8: 'surprised',
}

# Replace the values in the column with emotions
ravdess_df['emotions'] = ravdess_df['emotions'] .replace(emotion_dict)
ravdess_df['dataset'] = 'ravdess'
ravdess_df.head()

Unnamed: 0,emotions,genders,audio_intensity,audio_path,dataset
0,neutral,male,normal,./data/ravdess-emotional-speech-audio/Actor_01...,ravdess
1,neutral,male,normal,./data/ravdess-emotional-speech-audio/Actor_01...,ravdess
2,neutral,male,normal,./data/ravdess-emotional-speech-audio/Actor_01...,ravdess
3,neutral,male,normal,./data/ravdess-emotional-speech-audio/Actor_01...,ravdess
4,calm,male,normal,./data/ravdess-emotional-speech-audio/Actor_01...,ravdess


## 2.2. Crema dataframe

In [9]:
crema_list = os.listdir(CREMA)
audio_emotion = []
audio_path = []
audio_intensity = []
gender_list = []

female_id_list = [
    '1002', '1003', '1004', '1006', '1007', '1008', '1009', '1010', '1012', '1013', '1018', 
    '1020', '1021', '1024', '1025', '1028', '1029', '1030', '1037', '1043', '1046', '1047', 
    '1049', '1052', '1053', '1054', '1055', '1056', '1058', '1060', '1061', '1063', '1072', 
    '1073', '1074', '1075', '1076', '1078', '1079', '1082', '1084', '1089', '1091',
]

emotion_dict = {
    'HAP' : 'happy',
    'NEU' : 'neutral',
    'SAD' : 'sad',
    'ANG' : 'angry',
    'FEA' : 'fear',
    'DIS' : 'disgust',
}

intensity_dict = {
    'XX' : 'normal',
    'X' : 'normal',
    'LO' : 'low',
    'MD' : 'normal',
    'HI': 'high'
}

for audio in crema_list:
    list_audio = audio.split('.')[0] # splitting by the '.' into '**-**-**' . 'wav' and grab the first element
    list_audio = list_audio.split('_') # splitting by the underline
    audio_emotion.append(list_audio[2])
    audio_intensity.append(list_audio[3])
    audio_path.append(os.path.join(CREMA,audio))
    gender_list.append('female' if list_audio[0] in female_id_list else 'male')
    

crema_df = pd.DataFrame({'emotions': audio_emotion, 'genders': gender_list, 'audio_intensity': audio_intensity, 'audio_path': audio_path})
crema_df['emotions'] = crema_df['emotions'].replace(emotion_dict)
crema_df['audio_intensity'] = crema_df['audio_intensity'].replace(intensity_dict)
crema_df['dataset'] = 'crema'
crema_df.head()

Unnamed: 0,emotions,genders,audio_intensity,audio_path,dataset
0,angry,male,normal,./data/cremad/1001_DFA_ANG_XX.wav,crema
1,disgust,male,normal,./data/cremad/1001_DFA_DIS_XX.wav,crema
2,fear,male,normal,./data/cremad/1001_DFA_FEA_XX.wav,crema
3,happy,male,normal,./data/cremad/1001_DFA_HAP_XX.wav,crema
4,neutral,male,normal,./data/cremad/1001_DFA_NEU_XX.wav,crema


## 2.3. TESS dataframe

In [10]:
tess_dir_list = os.listdir(TESS)
path_list = []
gender_list = []
emotion_list = [] 
audio_intensity = []

emotion_dict = {
    'happy'   : 'happy',
    'neutral' : 'neutral',
    'sad'     : 'sad',
    'ps'     : 'surprised',
    'angry'   : 'angry',
    'fear'    : 'fear',
    'disgust'  : 'disgust',
}

for directory in tess_dir_list:
    audio_files = os.listdir(os.path.join(TESS, directory))
    for audio_file in audio_files:
        part = audio_file.split('.')[0]
        path_list.append(os.path.join(TESS,directory,audio_file))
        gender_list.append('female') # female only dataset
        audio_intensity.append('normal') # normal only dataset
        emotion_list.append(part.split('_')[2])
            
tess_df = pd.DataFrame({'emotions': emotion_list, 'genders': gender_list, 'audio_intensity': audio_intensity, 'audio_path': path_list})
tess_df['emotions'] = tess_df['emotions'].replace(emotion_dict)
tess_df['dataset'] = 'tess'
tess_df.head()

Unnamed: 0,emotions,genders,audio_intensity,audio_path,dataset
0,angry,female,normal,./data/toronto-emotional-speech-set-tess/OAF_a...,tess
1,angry,female,normal,./data/toronto-emotional-speech-set-tess/OAF_a...,tess
2,angry,female,normal,./data/toronto-emotional-speech-set-tess/OAF_a...,tess
3,angry,female,normal,./data/toronto-emotional-speech-set-tess/OAF_a...,tess
4,angry,female,normal,./data/toronto-emotional-speech-set-tess/OAF_a...,tess


## 2.3. SAVEE dataframe

In [11]:
savee_dir_list = os.listdir(SAVEE)
path_list = []
gender_list = []
emotion_list = []
audio_intensity = []

emotion_dict = {
    'h'  : 'happy',
    'n'  : 'neutral',
    'sa' : 'sad',
    'a'  : 'angry',
    'f'  : 'fear',
    'd'  : 'disgust',
    'su' : 'surprised'
}

for audio_file in savee_dir_list:
    part = audio_file.split('_')[1]
    path_list.append(os.path.join(SAVEE,audio_file))
    gender_list.append('male') # male only dataset
    emotion_list.append(part[:-6])
    audio_intensity.append('normal') # normal only dataset
        
savee_df = pd.DataFrame({'emotions': emotion_list, 'genders': gender_list, 'audio_intensity': audio_intensity, 'audio_path': path_list})
savee_df['emotions'] = savee_df['emotions'].replace(emotion_dict)
savee_df['dataset'] = 'savee'

savee_df.head()

Unnamed: 0,emotions,genders,audio_intensity,audio_path,dataset
0,angry,male,normal,./data/surrey-audiovisual-expressed-emotion-sa...,savee
1,angry,male,normal,./data/surrey-audiovisual-expressed-emotion-sa...,savee
2,angry,male,normal,./data/surrey-audiovisual-expressed-emotion-sa...,savee
3,angry,male,normal,./data/surrey-audiovisual-expressed-emotion-sa...,savee
4,angry,male,normal,./data/surrey-audiovisual-expressed-emotion-sa...,savee


In [12]:
merged_df = pd.concat([ravdess_df, crema_df, tess_df, savee_df])

In [13]:
merged_df

Unnamed: 0,emotions,genders,audio_intensity,audio_path,dataset
0,neutral,male,normal,./data/ravdess-emotional-speech-audio/Actor_01...,ravdess
1,neutral,male,normal,./data/ravdess-emotional-speech-audio/Actor_01...,ravdess
2,neutral,male,normal,./data/ravdess-emotional-speech-audio/Actor_01...,ravdess
3,neutral,male,normal,./data/ravdess-emotional-speech-audio/Actor_01...,ravdess
4,calm,male,normal,./data/ravdess-emotional-speech-audio/Actor_01...,ravdess
...,...,...,...,...,...
475,surprised,male,normal,./data/surrey-audiovisual-expressed-emotion-sa...,savee
476,surprised,male,normal,./data/surrey-audiovisual-expressed-emotion-sa...,savee
477,surprised,male,normal,./data/surrey-audiovisual-expressed-emotion-sa...,savee
478,surprised,male,normal,./data/surrey-audiovisual-expressed-emotion-sa...,savee


# 3. EDA