In [1]:
import pandas as pd
import numpy as np
import pathlib
from tqdm.notebook import tqdm
import librosa
import awkward as ak
import matplotlib.pyplot as plt

# Local Import

In [2]:
dataset_folder = pathlib.Path("../Ravdess Audio file")  # change it with your file location

In [3]:
file_list = list(dataset_folder.iterdir()) # store all the files in a file list
for i, path in enumerate(file_list):
    if path.suffix != ".wav":
        file_list.pop(i)
file_list = sorted(file_list)
file_list

[WindowsPath('../Ravdess Audio file/03-01-01-01-01-01-01.wav'),
 WindowsPath('../Ravdess Audio file/03-01-01-01-01-01-02.wav'),
 WindowsPath('../Ravdess Audio file/03-01-01-01-01-01-03.wav'),
 WindowsPath('../Ravdess Audio file/03-01-01-01-01-01-04.wav'),
 WindowsPath('../Ravdess Audio file/03-01-01-01-01-01-05.wav'),
 WindowsPath('../Ravdess Audio file/03-01-01-01-01-01-06.wav'),
 WindowsPath('../Ravdess Audio file/03-01-01-01-01-01-07.wav'),
 WindowsPath('../Ravdess Audio file/03-01-01-01-01-01-08.wav'),
 WindowsPath('../Ravdess Audio file/03-01-01-01-01-01-09.wav'),
 WindowsPath('../Ravdess Audio file/03-01-01-01-01-01-10.wav'),
 WindowsPath('../Ravdess Audio file/03-01-01-01-01-01-11.wav'),
 WindowsPath('../Ravdess Audio file/03-01-01-01-01-01-12.wav'),
 WindowsPath('../Ravdess Audio file/03-01-01-01-01-01-13.wav'),
 WindowsPath('../Ravdess Audio file/03-01-01-01-01-01-14.wav'),
 WindowsPath('../Ravdess Audio file/03-01-01-01-01-01-15.wav'),
 WindowsPath('../Ravdess Audio file/03-0

In [4]:
array = list()
for path in tqdm(file_list):
    librosa_audio_segment, sr = librosa.load(path, sr=None)
    array.append([librosa_audio_segment])

  0%|          | 0/2462 [00:00<?, ?it/s]

In [5]:
b = ak.ArrayBuilder()
for path in tqdm(file_list):
    librosa_audio_segment, sr = librosa.load(path, sr=None)
    b.begin_list()
    b.begin_list()
    for value in librosa_audio_segment:
        b.real(value)
    b.end_list() 
    b.end_list() 
array = b.snapshot() # i will have my time series, for each row a time series

  0%|          | 0/2462 [00:00<?, ?it/s]

In [14]:
CATEGORICAL_FEATURES_NAMES = {
    "modality": {"01": "full-AV", "02": "video-only", "03": "audio-only"}, 
    "vocal_channel": {"01": "speech", "02": "song"},
    "emotion": {"01" : "neutral", "02" : "calm", "03" : "happy", "04" : "sad", "05" : "angry", "06" : "fearful", "07" : "disgust", "08" : "surprised"},
    "emotional_intensity": {"01" : "normal", "02" : "strong"},
    "statement": {"01" : "Kids are talking by the door", "02" : "Dogs are sitting by the door"},
    "repetition": {"01" : "1st", "02" : "2nd"},
    "actor": {str(i).zfill(2): str(i).zfill(2) for i in range(1, 25)}
}

In [16]:
df = list()
file_names = list()
for path in file_list:
    stem = path.stem.split("-")
    file_names.append(path.name)
    df.append(stem)
df = pd.DataFrame(df)
df.columns = list(CATEGORICAL_FEATURES_NAMES.keys())

for column in df.columns:
    df = df.replace(CATEGORICAL_FEATURES_NAMES)



In [17]:
df.head()

Unnamed: 0,modality,vocal_channel,emotion,emotional_intensity,statement,repetition,actor
0,audio-only,speech,neutral,normal,Kids are talking by the door,1st,1
1,audio-only,speech,neutral,normal,Kids are talking by the door,1st,2
2,audio-only,speech,neutral,normal,Kids are talking by the door,1st,3
3,audio-only,speech,neutral,normal,Kids are talking by the door,1st,4
4,audio-only,speech,neutral,normal,Kids are talking by the door,1st,5


In [18]:
#df['actor']=df['actor'].values
for i in range(len(df['actor'])):
   if(df['actor'][i]=='19(1)'):
       df['actor'][i]='19'
   elif(df['actor'][i]=='21(1)'):
      df['actor'][i]='21'

df["sex"] = ['F' if i % 2 == 0 else 'M' for i in df['actor'].astype(int)]
df["filename"] = file_names

In [19]:
df.head()

Unnamed: 0,modality,vocal_channel,emotion,emotional_intensity,statement,repetition,actor,sex,filename
0,audio-only,speech,neutral,normal,Kids are talking by the door,1st,1,M,03-01-01-01-01-01-01.wav
1,audio-only,speech,neutral,normal,Kids are talking by the door,1st,2,F,03-01-01-01-01-01-02.wav
2,audio-only,speech,neutral,normal,Kids are talking by the door,1st,3,M,03-01-01-01-01-01-03.wav
3,audio-only,speech,neutral,normal,Kids are talking by the door,1st,4,F,03-01-01-01-01-01-04.wav
4,audio-only,speech,neutral,normal,Kids are talking by the door,1st,5,M,03-01-01-01-01-01-05.wav


In [20]:
df_sorted_actor = df.sort_values(by=["actor", "filename"])
idxs_sorted = list(df_sorted_actor.index)

In [24]:
df_sorted_actor.head()

Unnamed: 0,modality,vocal_channel,emotion,emotional_intensity,statement,repetition,actor,sex,filename
0,audio-only,speech,neutral,normal,Kids are talking by the door,1st,1,M,03-01-01-01-01-01-01.wav
24,audio-only,speech,neutral,normal,Kids are talking by the door,2nd,1,M,03-01-01-01-01-02-01.wav
48,audio-only,speech,neutral,normal,Dogs are sitting by the door,1st,1,M,03-01-01-01-02-01-01.wav
72,audio-only,speech,neutral,normal,Dogs are sitting by the door,2nd,1,M,03-01-01-01-02-02-01.wav
96,audio-only,speech,calm,normal,Kids are talking by the door,1st,1,M,03-01-02-01-01-01-01.wav


In [25]:
idx_train = list(df_sorted_actor[df_sorted_actor['actor'].astype(int) < 19].index) # indici degli attori < 19
idx_test = list(df_sorted_actor[df_sorted_actor['actor'].astype(int) >= 19].index) # indici degli attori >=19

In [39]:
X_train = array[idx_train] # array=time series quindi in X_train ci sono i valori continui delle time series deigli attori < 19
Y_train = df_sorted_actor[df_sorted_actor.actor.astype(int) < 19] # in Y_train ci sono i valori categorici delle 9 feature deigli attori < 19
X_test = array[idx_test]   # in X_test ci sono i valori continui delle time series degli attori >= 19
Y_test = df_sorted_actor[df_sorted_actor.actor.astype(int) >= 19] # in Y_test ci sono i valori categorici delle 9 feature degli attori >= 19

### TO NUMPY

In [46]:
def pad_X(X, m_max, nan_value=0):
    return ak.fill_none(ak.pad_none(X, m_max, axis=2, clip=True), value=nan_value)

In [47]:
# find the max length of X_train
maximum = 0
for ts in X_train:
    length = len(np.asarray(np.ravel(ts)))
    if length > maximum:
        maximum = length
maximum

304304

In [48]:
%%time
X_train = np.squeeze(np.array(pad_X(X_train, maximum, np.nan)))
X_test = np.squeeze(np.array(pad_X(X_test, maximum, np.nan)))

CPU times: total: 32.6 s
Wall time: 33.6 s


In [51]:
print(X_train.shape) # 1828 time series con i valori continui
print(Y_train.shape) # 1828 time series con i valori categorici 
print(X_test.shape)  # 634 time series con i valori continui
print(Y_test.shape)  # 634 time series con i valori categorici

(1828, 304304)
(1828, 9)
(634, 304304)
(634, 9)


In [52]:
# Salvo le TS con valori continui in due array npy
#np.save("RavdessAudioOnlyNumpy__X_train.npy", X_train)
#np.save("RavdessAudioOnlyNumpy__X_test.npy", X_test)

In [53]:
# Salvo le TS con valori discreti in due dataset in formato csv
#Y_train.to_csv("RavdessAudioOnlyNumpy__Y_train.csv", index=False)
#Y_test.to_csv("RavdessAudioOnlyNumpy__Y_test.csv", index=False)

### FROM NUMPY

In [54]:
X_train = np.load("RavdessAudioOnlyNumpy__X_train.npy")
X_test = np.load("RavdessAudioOnlyNumpy__X_test.npy")
print(X_train.shape, X_test.shape)

(1828, 304304) (634, 304304)


In [55]:
X_train

array([[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
                    nan,             nan,             nan],
       [ 0.00000000e+00,  0.00000000e+00, -3.05175781e-05, ...,
                    nan,             nan,             nan],
       [ 3.05175781e-05,  3.05175781e-05,  0.00000000e+00, ...,
                    nan,             nan,             nan],
       ...,
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
                    nan,             nan,             nan],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
                    nan,             nan,             nan],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
                    nan,             nan,             nan]])

In [56]:
X_test

array([[ 0.        ,  0.        ,  0.        , ...,         nan,
                nan,         nan],
       [ 0.        ,  0.        ,  0.        , ...,         nan,
                nan,         nan],
       [-0.00018311, -0.00018311, -0.00018311, ...,         nan,
                nan,         nan],
       ...,
       [ 0.        ,  0.        ,  0.        , ...,         nan,
                nan,         nan],
       [ 0.        ,  0.        ,  0.        , ...,         nan,
                nan,         nan],
       [ 0.        ,  0.        ,  0.        , ...,         nan,
                nan,         nan]])

In [61]:
Y_train.isnull().sum()

modality               0
vocal_channel          0
emotion                0
emotional_intensity    0
statement              0
repetition             0
actor                  0
sex                    0
filename               0
dtype: int64

In [62]:
Y_test.isnull().sum()

modality               0
vocal_channel          0
emotion                0
emotional_intensity    0
statement              0
repetition             0
actor                  0
sex                    0
filename               0
dtype: int64

In [63]:
Y_train

Unnamed: 0,modality,vocal_channel,emotion,emotional_intensity,statement,repetition,actor,sex,filename
0,audio-only,speech,neutral,normal,Kids are talking by the door,1st,01,M,03-01-01-01-01-01-01.wav
24,audio-only,speech,neutral,normal,Kids are talking by the door,2nd,01,M,03-01-01-01-01-02-01.wav
48,audio-only,speech,neutral,normal,Dogs are sitting by the door,1st,01,M,03-01-01-01-02-01-01.wav
72,audio-only,speech,neutral,normal,Dogs are sitting by the door,2nd,01,M,03-01-01-01-02-02-01.wav
96,audio-only,speech,calm,normal,Kids are talking by the door,1st,01,M,03-01-02-01-01-01-01.wav
...,...,...,...,...,...,...,...,...,...
1343,audio-only,speech,surprised,normal,Dogs are sitting by the door,2nd,18,F,03-01-08-01-02-02-18.wav
1367,audio-only,speech,surprised,strong,Kids are talking by the door,1st,18,F,03-01-08-02-01-01-18.wav
1391,audio-only,speech,surprised,strong,Kids are talking by the door,2nd,18,F,03-01-08-02-01-02-18.wav
1415,audio-only,speech,surprised,strong,Dogs are sitting by the door,1st,18,F,03-01-08-02-02-01-18.wav
