Extract features from EmoDB dataset

In [1]:
import librosa
import os
import numpy as np
import pandas as pd

# DataFrame with Input Samples

Create Pandas DataFrame with filename and label

In [2]:
label2name = {
    "dis": "disgusto",
    "neu": "neutro",
    "gio": "gioia",
    "pau": "paura",
    "tri": "tristezza",
    "rab": "rabbia",
    "sor": "sorpresa",
}

dir_path = '/datasets/nicolas_facchinetti/EMOVO/data'
data = []
for actor in os.listdir(dir_path):
    path = dir_path+f"/{actor}"
    for filename in os.listdir(path):
        filepath = os.path.join(path, filename)
        info = filename.split("-")
        samples, sr = librosa.load(filepath, sr=22050)
        
        length = len(samples)/sr
        cat = label2name[filename[0:3]]
        gender = actor[0]
        data.append([filepath, filename, cat, actor, gender, length])
    
    
df_info = pd.DataFrame(data, columns=['path', 'filename', 'label', 'actor', 'gender', 'length'])
df_info


Unnamed: 0,path,filename,label,actor,gender,length
0,/datasets/nicolas_facchinetti/EMOVO/data/m3/gi...,gio-m3-l1.wav,gioia,m3,m,5.205351
1,/datasets/nicolas_facchinetti/EMOVO/data/m3/gi...,gio-m3-b3.wav,gioia,m3,m,1.877370
2,/datasets/nicolas_facchinetti/EMOVO/data/m3/pa...,pau-m3-n5.wav,paura,m3,m,4.394694
3,/datasets/nicolas_facchinetti/EMOVO/data/m3/tr...,tri-m3-b2.wav,tristezza,m3,m,2.773333
4,/datasets/nicolas_facchinetti/EMOVO/data/m3/tr...,tri-m3-l4.wav,tristezza,m3,m,3.370703
...,...,...,...,...,...,...
583,/datasets/nicolas_facchinetti/EMOVO/data/m2/di...,dis-m2-b3.wav,disgusto,m2,m,2.474694
584,/datasets/nicolas_facchinetti/EMOVO/data/m2/so...,sor-m2-d1.wav,sorpresa,m2,m,2.090703
585,/datasets/nicolas_facchinetti/EMOVO/data/m2/pa...,pau-m2-b3.wav,paura,m2,m,1.962676
586,/datasets/nicolas_facchinetti/EMOVO/data/m2/tr...,tri-m2-n2.wav,tristezza,m2,m,2.368027


Sampes length analysis

In [3]:
df_info["length"].describe()

count    588.000000
mean       3.121005
std        1.357110
min        1.290703
25%        2.133333
50%        2.773333
75%        3.840000
max       13.994694
Name: length, dtype: float64

In [4]:
df_info.label.value_counts()

gioia        84
paura        84
tristezza    84
sorpresa     84
disgusto     84
neutro       84
rabbia       84
Name: label, dtype: int64

In [5]:
df_info.actor.value_counts()

m3    98
f1    98
f2    98
f3    98
m1    98
m2    98
Name: actor, dtype: int64

Save metadata

In [6]:
df_info.to_csv("/datasets/nicolas_facchinetti/processed_data/EMOVO/processed_metadata.csv", index=False)