# In this notebook we will import, augmentation and extract features from raw data

## Process data to dataframe

In [21]:
# Import libraries
import os
import pandas as pd
import numpy as np
import librosa
import IPython.display as ipd

In [8]:
# Define dataset paths
Ravdess = "../data/raw/ravdess/audio_speech_actors_01-24/"
Crema = "../data/raw/crema_d/AudioWAV/"
Tess = "../data/raw/tess/TESS Toronto emotional speech set data/"
Savee = "../data/raw/savee/ALL/"

In [23]:
# Check the sound of an example file
example_file = os.path.join(Ravdess, "Actor_01/03-01-01-01-01-01-01.wav")
data,sr = librosa.load(example_file)
ipd.Audio(data, rate=sr)

In [9]:
# Ravdess Datset

ravdess_directory_list = os.listdir(Ravdess)

file_emotion = []
file_path = []
for i in ravdess_directory_list:
    # as their are 24 different actors in our previous directory we need to extract files for each actor.
    actor = os.listdir(Ravdess + i)
    for f in actor:
        part = f.split('.')[0].split('-')
    # third part in each file represents the emotion associated to that file.
        file_emotion.append(int(part[2]))
        file_path.append(Ravdess + i + '/' + f)
        
print(actor[0])
print(part[0])
print(file_path[0])
print(int(part[2]))
print(f)

03-01-01-01-01-01-24.wav
03
../data/raw/ravdess/audio_speech_actors_01-24/Actor_01/03-01-01-01-01-01-01.wav
8
03-01-08-02-02-02-24.wav


In [10]:
# dataframe for emotion of All dataset files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

In [11]:
# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
Ravdess_df = pd.concat([emotion_df, path_df], axis=1)
# changing integers to actual emotions.
Ravdess_df.Emotions.replace({1:'neutral', 2:'neutral', 3:'happy', 4:'sad', 5:'angry', 6:'fear', 7:'disgust',
                             8:'surprise'},
                            inplace=True)
print(Ravdess_df.head())
print("______________________________________________")
print(Ravdess_df.tail())
print("_______________________________________________")
print(Ravdess_df.Emotions.value_counts())

  Emotions                                               Path
0  neutral  ../data/raw/ravdess/audio_speech_actors_01-24/...
1  neutral  ../data/raw/ravdess/audio_speech_actors_01-24/...
2  neutral  ../data/raw/ravdess/audio_speech_actors_01-24/...
3  neutral  ../data/raw/ravdess/audio_speech_actors_01-24/...
4  neutral  ../data/raw/ravdess/audio_speech_actors_01-24/...
______________________________________________
      Emotions                                               Path
1435  surprise  ../data/raw/ravdess/audio_speech_actors_01-24/...
1436  surprise  ../data/raw/ravdess/audio_speech_actors_01-24/...
1437  surprise  ../data/raw/ravdess/audio_speech_actors_01-24/...
1438  surprise  ../data/raw/ravdess/audio_speech_actors_01-24/...
1439  surprise  ../data/raw/ravdess/audio_speech_actors_01-24/...
_______________________________________________
Emotions
neutral     288
happy       192
sad         192
angry       192
fear        192
disgust     192
surprise    192
Name: count, dty

In [12]:
# Crema Dataset

crema_directory_list = os.listdir(Crema)

file_emotion = []
file_path = []

for file in crema_directory_list:
    # storing file paths
    file_path.append(Crema + file)
    # storing file emotions
    part=file.split('_')
    if part[2] == 'SAD':
        file_emotion.append('sad')
    elif part[2] == 'ANG':
        file_emotion.append('angry')
    elif part[2] == 'DIS':
        file_emotion.append('disgust')
    elif part[2] == 'FEA':
        file_emotion.append('fear')
    elif part[2] == 'HAP':
        file_emotion.append('happy')
    elif part[2] == 'NEU':
        file_emotion.append('neutral')
    else:
        file_emotion.append('Unknown')
        
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
Crema_df = pd.concat([emotion_df, path_df], axis=1)
Crema_df.head()
print(Crema_df.Emotions.value_counts())

Emotions
angry      1271
disgust    1271
fear       1271
happy      1271
sad        1271
neutral    1087
Name: count, dtype: int64


In [14]:
# Tess Dataset

tess_directory_list = os.listdir(Tess)

file_emotion = []
file_path = []

for dir in tess_directory_list:
    directories = os.listdir(Tess + dir)
    for file in directories:
        part = file.split('.')[0]
        part = part.split('_')[2]
        if part=='ps':
            file_emotion.append('surprise')
        else:
            file_emotion.append(part)
        file_path.append(Tess + dir + '/' + file)
        
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
Tess_df = pd.concat([emotion_df, path_df], axis=1)
Tess_df.head()
print(Tess_df.Emotions.value_counts())

Emotions
angry       400
disgust     400
fear        400
happy       400
neutral     400
surprise    400
sad         400
Name: count, dtype: int64


In [15]:
# SAVEE Dataset

savee_directory_list = os.listdir(Savee)

file_emotion = []
file_path = []

for file in savee_directory_list:
    file_path.append(Savee + file)
    part = file.split('_')[1]
    ele = part[:-6]
    if ele=='a':
        file_emotion.append('angry')
    elif ele=='d':
        file_emotion.append('disgust')
    elif ele=='f':
        file_emotion.append('fear')
    elif ele=='h':
        file_emotion.append('happy')
    elif ele=='n':
        file_emotion.append('neutral')
    elif ele=='sa':
        file_emotion.append('sad')
    else:
        file_emotion.append('surprise')
        
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
Savee_df = pd.concat([emotion_df, path_df], axis=1)
Savee_df.head()
print(Savee_df.Emotions.value_counts())

Emotions
neutral     120
angry        60
disgust      60
fear         60
happy        60
sad          60
surprise     60
Name: count, dtype: int64


In [16]:
# creating Dataframe using all the 4 dataframes we created so far.
data_path = pd.concat([Ravdess_df, Crema_df, Tess_df, Savee_df], axis = 0)
data_path.head()

Unnamed: 0,Emotions,Path
0,neutral,../data/raw/ravdess/audio_speech_actors_01-24/...
1,neutral,../data/raw/ravdess/audio_speech_actors_01-24/...
2,neutral,../data/raw/ravdess/audio_speech_actors_01-24/...
3,neutral,../data/raw/ravdess/audio_speech_actors_01-24/...
4,neutral,../data/raw/ravdess/audio_speech_actors_01-24/...


In [17]:
len(data_path)

12162

In [18]:
print(data_path.Emotions.value_counts())

Emotions
happy       1923
sad         1923
angry       1923
fear        1923
disgust     1923
neutral     1895
surprise     652
Name: count, dtype: int64
