In [6]:
# import libraries
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from matplotlib.pyplot import specgram
import pandas as pd
import glob
from sklearn.metrics import confusion_matrix
import IPython.display as ipd # for audio
import os 
import sys
import warnings
if not sys.warnoptions:
    warnings.simplefilter("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [19]:
# load data

TESS = "/home/bukya/Desktop/Speech_Emotion_Recognition/toronto-emotional-speech-set-tess/tess toronto emotional speech set data/TESS Toronto emotional speech set data/"

RAV = "/home/bukya/Desktop/Speech_Emotion_Recognition/ravdess-emotional-speech-audio/audio_speech_actors_01-24/"
SAVEE = "/home/bukya/Desktop/Speech_Emotion_Recognition/ALL/"
CREMA = "/home/bukya/Desktop/Speech_Emotion_Recognition/AudioWAV/"


# run above files
dir_list = os.listdir(TESS)
dir_list[0:5]

['OAF_disgust',
 'YAF_pleasant_surprised',
 'OAF_neutral',
 'YAF_angry',
 'YAF_sad']

# Extracting Labels and Path

# 1. SAVEE Dataset(Male Speakers)

In [42]:
# Get the data location for SAVEE
dir_list_savee = os.listdir(SAVEE)
x = dir_list_savee[0:5][1]
x[-8:-6], x # file emotion label pattern

('_f', 'JK_f05.wav')

In [43]:
# files prefix letters 'a' = 'anger', 'd' = 'disgust'
# example file 'JK_sa01.wav' is sad, file 'JK_su09.wav' is surprise


# Get the data location for SAVEE
dir_list = os.listdir(SAVEE)

# parse the filename to get the emotions
emotion=[] # for emotion labels
path = [] #  for path of the audio files

for i in dir_list:
    if i[-8:-6]=='_a':
        emotion.append('male_angry')
    elif i[-8:-6]=='_d':
        emotion.append('male_disgust')
    elif i[-8:-6]=='_f':
        emotion.append('male_fear')
    elif i[-8:-6]=='_h':
        emotion.append('male_happy')
    elif i[-8:-6]=='_n':
        emotion.append('male_neutral')
    elif i[-8:-6]=='sa':
        emotion.append('male_sad')
    elif i[-8:-6]=='su':
        emotion.append('male_surprise')
    else:
        emotion.append('male_error') 
    #append path of each file, example "/home/bukya/Desktop/Speech_Emotion_Recognition/ALL/" + JK_f05.wav
    path.append(SAVEE + i) 
    
# creating data frame for emotion labels and path
SAVEE_df = pd.DataFrame(emotion, columns = ['labels'])
SAVEE_df['source'] = 'SAVEE' # source file
SAVEE_df = pd.concat([SAVEE_df, pd.DataFrame(path, columns = ['path'])], axis=1)#combining path and emotion labels
SAVEE_df.labels.value_counts()

male_neutral     120
male_sad          60
male_angry        60
male_surprise     60
male_happy        60
male_fear         60
male_disgust      60
Name: labels, dtype: int64

In [45]:
# explore data
SAVEE_df.head()

Unnamed: 0,labels,source,path
0,male_neutral,SAVEE,/home/bukya/Desktop/Speech_Emotion_Recognition...
1,male_fear,SAVEE,/home/bukya/Desktop/Speech_Emotion_Recognition...
2,male_surprise,SAVEE,/home/bukya/Desktop/Speech_Emotion_Recognition...
3,male_sad,SAVEE,/home/bukya/Desktop/Speech_Emotion_Recognition...
4,male_disgust,SAVEE,/home/bukya/Desktop/Speech_Emotion_Recognition...


In [47]:
SAVEE_df.shape

(480, 3)

In [53]:
'''# Lets play a happy track
fname = SAVEE + 'DC_h11.wav'  
data, sampling_rate = librosa.load(fname)
plt.figure(figsize=(15, 5))
librosa.display.waveplot(data, sr=sampling_rate)

# Lets play the audio 
ipd.Audio(fname)'''

"# Lets play a happy track\nfname = SAVEE + 'DC_h11.wav'  \ndata, sampling_rate = librosa.load(fname)\nplt.figure(figsize=(15, 5))\nlibrosa.display.waveplot(data, sr=sampling_rate)\n\n# Lets play the audio \nipd.Audio(fname)"

# 2. RAVDESS dataset(Actors)


Actor(01 to 24. Odd numbered actors are male, even numbered actors are female).

Emotion (01 = neutral, 02 = calm, 03 = happy, 04 = sad, 05 = angry, 06 = fearful, 07 = disgust, 08 = surprised).


File name pattern example 02-01-06-01-02-01-12.wav,
This means the meta data for the audio file is: Video-only (02),
Speech (01),
Fearful (06),
Normal intensity (01),
Statement "dogs" (02),
1st Repetition (01),
12th Actor (12) - Female (as the actor ID number is even),

In [56]:
# load the data
dir_list = os.listdir(RAV)
dir_list.sort()
dir_list[0:3]

['Actor_01', 'Actor_02', 'Actor_03']

In [68]:
# load the data
dir_list = os.listdir(RAV)
dir_list.sort()

# create lists for emotion, gender, path
emotion = []
gender = []
path = []
for i in dir_list:
    fname = os.listdir(RAV + i)
    
    for f in fname:
        # split 02-01-06-01-02-01-12.wav to 02-01-06-01-02-01-12, 06 is emotion- fearful, 
        part = f.split('.')[0].split('-') 
        #print(part)
        emotion.append(int(part[2])) # 2nd index represent emotion
        temp = int(part[6]) # last index represent gender
        if temp%2 == 0:
            temp = "female"
        else:
            temp = "male"
        gender.append(temp)
        path.append(RAV + i + '/' + f) # RAV + Actor_01 + / + wav file

# create data frame for above lists 
RAV_df = pd.DataFrame(emotion)
RAV_df = RAV_df.replace({1:'neutral', 2:'neutral', 3:'happy', 4:'sad', 5:'angry', 6:'fear', 7:'disgust', 8:'surprise'})

RAV_df = pd.concat([pd.DataFrame(gender),RAV_df],axis=1)

RAV_df.columns = ['gender','emotion']

RAV_df['labels'] =RAV_df.gender + '_' + RAV_df.emotion

RAV_df['source'] = 'RAVDESS'  
RAV_df = pd.concat([RAV_df,pd.DataFrame(path, columns = ['path'])],axis=1)

RAV_df = RAV_df.drop(['gender', 'emotion'], axis=1)
RAV_df.labels.value_counts()

female_neutral     144
male_neutral       144
female_fear         96
male_fear           96
female_disgust      96
female_angry        96
male_happy          96
female_happy        96
female_surprise     96
male_sad            96
male_angry          96
male_surprise       96
female_sad          96
male_disgust        96
Name: labels, dtype: int64

In [70]:
RAV_df.head()

Unnamed: 0,labels,source,path
0,male_angry,RAVDESS,/home/bukya/Desktop/Speech_Emotion_Recognition...
1,male_disgust,RAVDESS,/home/bukya/Desktop/Speech_Emotion_Recognition...
2,male_happy,RAVDESS,/home/bukya/Desktop/Speech_Emotion_Recognition...
3,male_happy,RAVDESS,/home/bukya/Desktop/Speech_Emotion_Recognition...
4,male_neutral,RAVDESS,/home/bukya/Desktop/Speech_Emotion_Recognition...


# 3. TESS dataset(2 speakers, a young female and an older female)


In [79]:
# load data
dir_list = os.listdir(TESS)
dir_list.sort()
dir_list[0:5], dir_list[8:12]

(['OAF_Fear', 'OAF_Pleasant_surprise', 'OAF_Sad', 'OAF_angry', 'OAF_disgust'],
 ['YAF_disgust', 'YAF_fear', 'YAF_happy', 'YAF_neutral'])

In [80]:
# load data
dir_list = os.listdir(TESS)
dir_list.sort()

path = []
emotion = []

for i in dir_list:
    fname = os.listdir(TESS + i)
    for f in fname:
        if i == 'OAF_angry' or i == 'YAF_angry':
            emotion.append('female_angry')
        elif i == 'OAF_disgust' or i == 'YAF_disgust':
            emotion.append('female_disgust')
        elif i == 'OAF_Fear' or i == 'YAF_fear':
            emotion.append('female_fear')
        elif i == 'OAF_happy' or i == 'YAF_happy':
            emotion.append('female_happy')
        elif i == 'OAF_neutral' or i == 'YAF_neutral':
            emotion.append('female_neutral')                                
        elif i == 'OAF_Pleasant_surprise' or i == 'YAF_pleasant_surprised':
            emotion.append('female_surprise')               
        elif i == 'OAF_Sad' or i == 'YAF_sad':
            emotion.append('female_sad')
        else:
            emotion.append('Unknown')
        path.append(TESS + i + "/" + f)

TESS_df = pd.DataFrame(emotion, columns = ['labels'])
TESS_df['source'] = 'TESS'
TESS_df = pd.concat([TESS_df,pd.DataFrame(path, columns = ['path'])],axis=1)
TESS_df.labels.value_counts()

female_disgust     400
female_happy       400
female_sad         400
female_fear        400
female_neutral     400
female_angry       400
female_surprise    400
Name: labels, dtype: int64

In [82]:
TESS_df.head()

Unnamed: 0,labels,source,path
0,female_fear,TESS,/home/bukya/Desktop/Speech_Emotion_Recognition...
1,female_fear,TESS,/home/bukya/Desktop/Speech_Emotion_Recognition...
2,female_fear,TESS,/home/bukya/Desktop/Speech_Emotion_Recognition...
3,female_fear,TESS,/home/bukya/Desktop/Speech_Emotion_Recognition...
4,female_fear,TESS,/home/bukya/Desktop/Speech_Emotion_Recognition...


# 4. CREMA-D dataset(different speakers from movies)


In [87]:
# load data
dir_list = os.listdir(CREMA)
dir_list.sort()
dir_list[0:3], dir_list[0].split('_')

(['1001_DFA_ANG_XX.wav', '1001_DFA_DIS_XX.wav', '1001_DFA_FEA_XX.wav'],
 ['1001', 'DFA', 'ANG', 'XX.wav'])

In [88]:
# load data
dir_list = os.listdir(CREMA)
dir_list.sort()

gender = []
emotion = []
path = []
female = [1002,1003,1004,1006,1007,1008,1009,1010,1012,1013,1018,1020,1021,1024,1025,1028,1029,1030,1037,1043,1046,1047,1049,
          1052,1053,1054,1055,1056,1058,1060,1061,1063,1072,1073,1074,1075,1076,1078,1079,1082,1084,1089,1091]

for i in dir_list: 
    part = i.split('_') # split '1001_DFA_ANG_XX.wav' to ['1001', 'DFA', 'ANG', 'XX.wav']
    
    if int(part[0]) in female: # index 0 represent gender
        temp = 'female'
    else:
        temp = 'male'
    gender.append(temp)
    if part[2] == 'SAD' and temp == 'male': # index 2 is emotion
        emotion.append('male_sad')
    elif part[2] == 'ANG' and temp == 'male':
        emotion.append('male_angry')
    elif part[2] == 'DIS' and temp == 'male':
        emotion.append('male_disgust')
    elif part[2] == 'FEA' and temp == 'male':
        emotion.append('male_fear')
    elif part[2] == 'HAP' and temp == 'male':
        emotion.append('male_happy')
    elif part[2] == 'NEU' and temp == 'male':
        emotion.append('male_neutral')
    elif part[2] == 'SAD' and temp == 'female':
        emotion.append('female_sad')
    elif part[2] == 'ANG' and temp == 'female':
        emotion.append('female_angry')
    elif part[2] == 'DIS' and temp == 'female':
        emotion.append('female_disgust')
    elif part[2] == 'FEA' and temp == 'female':
        emotion.append('female_fear')
    elif part[2] == 'HAP' and temp == 'female':
        emotion.append('female_happy')
    elif part[2] == 'NEU' and temp == 'female':
        emotion.append('female_neutral')
    else:
        emotion.append('Unknown')
    path.append(CREMA + i)
    
CREMA_df = pd.DataFrame(emotion, columns = ['labels'])
CREMA_df['source'] = 'CREMA'
CREMA_df = pd.concat([CREMA_df,pd.DataFrame(path, columns = ['path'])],axis=1)
CREMA_df.labels.value_counts()

male_fear         671
male_sad          671
male_angry        671
male_happy        671
male_disgust      671
female_disgust    600
female_sad        600
female_fear       600
female_angry      600
female_happy      600
male_neutral      575
female_neutral    512
Name: labels, dtype: int64

In [89]:
CREMA_df.head()

Unnamed: 0,labels,source,path
0,male_angry,CREMA,/home/bukya/Desktop/Speech_Emotion_Recognition...
1,male_disgust,CREMA,/home/bukya/Desktop/Speech_Emotion_Recognition...
2,male_fear,CREMA,/home/bukya/Desktop/Speech_Emotion_Recognition...
3,male_happy,CREMA,/home/bukya/Desktop/Speech_Emotion_Recognition...
4,male_neutral,CREMA,/home/bukya/Desktop/Speech_Emotion_Recognition...


# Combined Above Data Frames

In [92]:
# all data frames
df = pd.concat([SAVEE_df, RAV_df, TESS_df, CREMA_df], axis = 0)
print(df.labels.value_counts())
df.head()
#df.to_csv("All_Data_Frames.csv",index=False)

female_sad         1096
female_fear        1096
female_happy       1096
female_disgust     1096
female_angry       1096
female_neutral     1056
male_neutral        839
male_sad            827
male_happy          827
male_fear           827
male_angry          827
male_disgust        827
female_surprise     496
male_surprise       156
Name: labels, dtype: int64


Unnamed: 0,labels,source,path
0,male_neutral,SAVEE,/home/bukya/Desktop/Speech_Emotion_Recognition...
1,male_fear,SAVEE,/home/bukya/Desktop/Speech_Emotion_Recognition...
2,male_surprise,SAVEE,/home/bukya/Desktop/Speech_Emotion_Recognition...
3,male_sad,SAVEE,/home/bukya/Desktop/Speech_Emotion_Recognition...
4,male_disgust,SAVEE,/home/bukya/Desktop/Speech_Emotion_Recognition...


In [94]:
df.source.value_counts(), df.shape

(CREMA      7442
 TESS       2800
 RAVDESS    1440
 SAVEE       480
 Name: source, dtype: int64, (12162, 3))