### Install dependencies

In [53]:
import os
import pandas as pd

### Extract components (audio speech details) from the filename

In [54]:
# Define base directory
base_dir = "audio_speech"

# Go through files and extract names
data = []
file_names = []

for actor in os.listdir(base_dir): # loops through each actor's folder
    actor_path = os.path.join(base_dir, actor) # define the path name (audio_speech\Actor_01, etc)

    if os.path.isdir(actor_path):
        for audio_file in os.listdir(actor_path):
            if audio_file.endswith(".wav"):
                # remove the extension and split at "-" to extract each component of the name
                parts = audio_file.replace(".wav", "").split("-")
                # append to data
                data.append(parts)  
                file_names.append(audio_file)
    
data

[['03', '01', '01', '01', '01', '01', '01'],
 ['03', '01', '01', '01', '01', '02', '01'],
 ['03', '01', '01', '01', '02', '01', '01'],
 ['03', '01', '01', '01', '02', '02', '01'],
 ['03', '01', '02', '01', '01', '01', '01'],
 ['03', '01', '02', '01', '01', '02', '01'],
 ['03', '01', '02', '01', '02', '01', '01'],
 ['03', '01', '02', '01', '02', '02', '01'],
 ['03', '01', '02', '02', '01', '01', '01'],
 ['03', '01', '02', '02', '01', '02', '01'],
 ['03', '01', '02', '02', '02', '01', '01'],
 ['03', '01', '02', '02', '02', '02', '01'],
 ['03', '01', '03', '01', '01', '01', '01'],
 ['03', '01', '03', '01', '01', '02', '01'],
 ['03', '01', '03', '01', '02', '01', '01'],
 ['03', '01', '03', '01', '02', '02', '01'],
 ['03', '01', '03', '02', '01', '01', '01'],
 ['03', '01', '03', '02', '01', '02', '01'],
 ['03', '01', '03', '02', '02', '01', '01'],
 ['03', '01', '03', '02', '02', '02', '01'],
 ['03', '01', '04', '01', '01', '01', '01'],
 ['03', '01', '04', '01', '01', '02', '01'],
 ['03', '0

### Convert the data extracted to a dataframe

In [46]:
# Define column names
columns = [
    "Modality",
    "Vocal_channel",
    "Emotion",
    "Emotional_intensity",
    "Statement",
    "Repetition",
    "Actor"
]

# Create dataframe
df = pd.DataFrame(data, columns=columns)
df = df.astype(int)
df.insert(0, "file_name", file_names)
df.head(10)

Unnamed: 0,file_name,Modality,Vocal_channel,Emotion,Emotional_intensity,Statement,Repetition,Actor
0,03-01-01-01-01-01-01.wav,3,1,1,1,1,1,1
1,03-01-01-01-01-02-01.wav,3,1,1,1,1,2,1
2,03-01-01-01-02-01-01.wav,3,1,1,1,2,1,1
3,03-01-01-01-02-02-01.wav,3,1,1,1,2,2,1
4,03-01-02-01-01-01-01.wav,3,1,2,1,1,1,1
5,03-01-02-01-01-02-01.wav,3,1,2,1,1,2,1
6,03-01-02-01-02-01-01.wav,3,1,2,1,2,1,1
7,03-01-02-01-02-02-01.wav,3,1,2,1,2,2,1
8,03-01-02-02-01-01-01.wav,3,1,2,2,1,1,1
9,03-01-02-02-01-02-01.wav,3,1,2,2,1,2,1


### EDA

In [47]:
# Looking at Actor 1 recordings
df[df["Actor"] == 1]

Unnamed: 0,file_name,Modality,Vocal_channel,Emotion,Emotional_intensity,Statement,Repetition,Actor
0,03-01-01-01-01-01-01.wav,3,1,1,1,1,1,1
1,03-01-01-01-01-02-01.wav,3,1,1,1,1,2,1
2,03-01-01-01-02-01-01.wav,3,1,1,1,2,1,1
3,03-01-01-01-02-02-01.wav,3,1,1,1,2,2,1
4,03-01-02-01-01-01-01.wav,3,1,2,1,1,1,1
5,03-01-02-01-01-02-01.wav,3,1,2,1,1,2,1
6,03-01-02-01-02-01-01.wav,3,1,2,1,2,1,1
7,03-01-02-01-02-02-01.wav,3,1,2,1,2,2,1
8,03-01-02-02-01-01-01.wav,3,1,2,2,1,1,1
9,03-01-02-02-01-02-01.wav,3,1,2,2,1,2,1


In [48]:
# Checking number of audio recordings per Actor
df.groupby("Actor").count()

# Looks like no missing recordings (based on file count alone)

Unnamed: 0_level_0,file_name,Modality,Vocal_channel,Emotion,Emotional_intensity,Statement,Repetition
Actor,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,60,60,60,60,60,60,60
2,60,60,60,60,60,60,60
3,60,60,60,60,60,60,60
4,60,60,60,60,60,60,60
5,60,60,60,60,60,60,60
6,60,60,60,60,60,60,60
7,60,60,60,60,60,60,60
8,60,60,60,60,60,60,60
9,60,60,60,60,60,60,60
10,60,60,60,60,60,60,60


In [49]:
# Ensure it is all audio
df.groupby("Actor")["Modality"].unique()

Actor
1     [3]
2     [3]
3     [3]
4     [3]
5     [3]
6     [3]
7     [3]
8     [3]
9     [3]
10    [3]
11    [3]
12    [3]
13    [3]
14    [3]
15    [3]
16    [3]
17    [3]
18    [3]
19    [3]
20    [3]
21    [3]
22    [3]
23    [3]
24    [3]
Name: Modality, dtype: object

In [50]:
# Ensure it is all speech
df.groupby("Actor")["Vocal_channel"].unique()

Actor
1     [1]
2     [1]
3     [1]
4     [1]
5     [1]
6     [1]
7     [1]
8     [1]
9     [1]
10    [1]
11    [1]
12    [1]
13    [1]
14    [1]
15    [1]
16    [1]
17    [1]
18    [1]
19    [1]
20    [1]
21    [1]
22    [1]
23    [1]
24    [1]
Name: Vocal_channel, dtype: object

### Adding human readable tags

In [51]:
emotion_map = {
    1: "neutral",
    2: "calm",
    3: "happy",
    4: "sad",
    5: "angry",
    6: "fearful",
    7: "disgust",
    8: "surprised"
}

intensity_map = {1: "normal", 2: "strong"}
statement_map = {1: "Kids are talking by the door", 2: "Dogs are sitting by the door"}

df["Emotion_label"] = df["Emotion"].map(emotion_map)
df["Intensity_label"] = df["Emotional_intensity"].map(intensity_map)
df["Statement_label"] = df["Statement"].map(statement_map)
df["Actor_gender"] = df["Actor"].apply(lambda x: "male" if x % 2 else "female")

df.head()

Unnamed: 0,file_name,Modality,Vocal_channel,Emotion,Emotional_intensity,Statement,Repetition,Actor,Emotion_label,Intensity_label,Statement_label,Actor_gender
0,03-01-01-01-01-01-01.wav,3,1,1,1,1,1,1,neutral,normal,Kids are talking by the door,male
1,03-01-01-01-01-02-01.wav,3,1,1,1,1,2,1,neutral,normal,Kids are talking by the door,male
2,03-01-01-01-02-01-01.wav,3,1,1,1,2,1,1,neutral,normal,Dogs are sitting by the door,male
3,03-01-01-01-02-02-01.wav,3,1,1,1,2,2,1,neutral,normal,Dogs are sitting by the door,male
4,03-01-02-01-01-01-01.wav,3,1,2,1,1,1,1,calm,normal,Kids are talking by the door,male


### Export file as csv

In [52]:
df.to_csv("ravdess_labels.csv", index=False)