In [1]:
import audb
import pandas as pd
import os
import glob
import re

In [2]:
datasets = audb.available()

targets = ["ravdess", "emozionalmente", "tess", "savee"]

datasets[datasets.index.isin(targets)].sort_values(by=["name", "version"], ascending=[True, False])

Unnamed: 0_level_0,backend,host,repository,version
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
emozionalmente,s3,s3.dualstack.eu-north-1.amazonaws.com,audb-public,1.0.0
ravdess,s3,s3.dualstack.eu-north-1.amazonaws.com,audb-public,1.1.3
ravdess,s3,s3.dualstack.eu-north-1.amazonaws.com,audb-public,1.1.2
ravdess,s3,s3.dualstack.eu-north-1.amazonaws.com,audb-public,1.1.1
ravdess,s3,s3.dualstack.eu-north-1.amazonaws.com,audb-public,1.1.0
ravdess,s3,s3.dualstack.eu-north-1.amazonaws.com,audb-public,1.0.1
ravdess,s3,s3.dualstack.eu-north-1.amazonaws.com,audb-public,1.0.0


In [3]:
class audb_dataset:
    def __init__(self, name: str, version: str | None = None):
        self.name = name
        self.version = version

## RAVDESS

In [None]:
ravdess = audb_dataset(name = "ravdess", version = "1.1.3")

ravdess_db = audb.load(ravdess.name,
                    version = ravdess.version,
                    format = "wav", 
                    sampling_rate= 16000,
                    mixdown = True,
                    pickle_tables= False,
                    cache_root="./"
)

In [4]:
tmp_files = pd.read_csv('RAVDESS/1.1.3/fe182b91/db.files.csv')
tmp_files

Unnamed: 0,file,speaker,duration,transcription,vocal channel
0,songs/actor_06/03-02-04-02-02-01-06.wav,6,4304291667,Dogs are sitting by the door,song
1,songs/actor_06/03-02-02-02-01-01-06.wav,6,5605583333,Kids are talking by the door,song
2,songs/actor_06/03-02-03-02-01-02-06.wav,6,4637979167,Kids are talking by the door,song
3,songs/actor_06/03-02-06-01-02-01-06.wav,6,4437770833,Dogs are sitting by the door,song
4,songs/actor_06/03-02-06-02-01-02-06.wav,6,4204208333,Kids are talking by the door,song
...,...,...,...,...,...
2447,speech/actor_08/03-01-06-01-01-01-08.wav,8,3970645833,Kids are talking by the door,speech
2448,speech/actor_08/03-01-01-01-01-02-08.wav,8,3636979167,Kids are talking by the door,speech
2449,speech/actor_08/03-01-03-01-01-01-08.wav,8,3503499999,Kids are talking by the door,speech
2450,speech/actor_08/03-01-05-02-01-02-08.wav,8,3770437500,Kids are talking by the door,speech


### Train files

In [5]:
tmp_song_train = pd.read_csv('RAVDESS/1.1.3/fe182b91/db.emotion.song.train.csv')

tmp_speech_train = pd.read_csv('RAVDESS/1.1.3/fe182b91/db.emotion.speech.train.csv')

tmp_train = pd.concat([tmp_song_train, tmp_speech_train], ignore_index=True)
tmp_train

Unnamed: 0,file,emotion,emotional intensity
0,songs/actor_06/03-02-04-02-02-01-06.wav,sadness,strong
1,songs/actor_06/03-02-02-02-01-01-06.wav,calm,strong
2,songs/actor_06/03-02-03-02-01-02-06.wav,happiness,strong
3,songs/actor_06/03-02-06-01-02-01-06.wav,fear,normal
4,songs/actor_06/03-02-06-02-01-02-06.wav,fear,strong
...,...,...,...
1615,speech/actor_08/03-01-06-01-01-01-08.wav,fear,normal
1616,speech/actor_08/03-01-01-01-01-02-08.wav,neutral,normal
1617,speech/actor_08/03-01-03-01-01-01-08.wav,happiness,normal
1618,speech/actor_08/03-01-05-02-01-02-08.wav,anger,strong


In [6]:
train_files = tmp_train.merge(tmp_files, how='left', on = 'file')
train_files

Unnamed: 0,file,emotion,emotional intensity,speaker,duration,transcription,vocal channel
0,songs/actor_06/03-02-04-02-02-01-06.wav,sadness,strong,6,4304291667,Dogs are sitting by the door,song
1,songs/actor_06/03-02-02-02-01-01-06.wav,calm,strong,6,5605583333,Kids are talking by the door,song
2,songs/actor_06/03-02-03-02-01-02-06.wav,happiness,strong,6,4637979167,Kids are talking by the door,song
3,songs/actor_06/03-02-06-01-02-01-06.wav,fear,normal,6,4437770833,Dogs are sitting by the door,song
4,songs/actor_06/03-02-06-02-01-02-06.wav,fear,strong,6,4204208333,Kids are talking by the door,song
...,...,...,...,...,...,...,...
1615,speech/actor_08/03-01-06-01-01-01-08.wav,fear,normal,8,3970645833,Kids are talking by the door,speech
1616,speech/actor_08/03-01-01-01-01-02-08.wav,neutral,normal,8,3636979167,Kids are talking by the door,speech
1617,speech/actor_08/03-01-03-01-01-01-08.wav,happiness,normal,8,3503499999,Kids are talking by the door,speech
1618,speech/actor_08/03-01-05-02-01-02-08.wav,anger,strong,8,3770437500,Kids are talking by the door,speech


In [7]:
train_files = train_files[['file', 'speaker', 'emotion']]
train_files

Unnamed: 0,file,speaker,emotion
0,songs/actor_06/03-02-04-02-02-01-06.wav,6,sadness
1,songs/actor_06/03-02-02-02-01-01-06.wav,6,calm
2,songs/actor_06/03-02-03-02-01-02-06.wav,6,happiness
3,songs/actor_06/03-02-06-01-02-01-06.wav,6,fear
4,songs/actor_06/03-02-06-02-01-02-06.wav,6,fear
...,...,...,...
1615,speech/actor_08/03-01-06-01-01-01-08.wav,8,fear
1616,speech/actor_08/03-01-01-01-01-02-08.wav,8,neutral
1617,speech/actor_08/03-01-03-01-01-01-08.wav,8,happiness
1618,speech/actor_08/03-01-05-02-01-02-08.wav,8,anger


In [8]:
train_files['split'] = 'train'
train_files.rename(columns={'file':'old_path'}, inplace=True)

ravdess_root = os.path.join('RAVDESS', 'data')

train_files['file'] = train_files['old_path'].apply(lambda x: os.path.join(ravdess_root, x))
train_files

Unnamed: 0,old_path,speaker,emotion,split,file
0,songs/actor_06/03-02-04-02-02-01-06.wav,6,sadness,train,RAVDESS/data/songs/actor_06/03-02-04-02-02-01-...
1,songs/actor_06/03-02-02-02-01-01-06.wav,6,calm,train,RAVDESS/data/songs/actor_06/03-02-02-02-01-01-...
2,songs/actor_06/03-02-03-02-01-02-06.wav,6,happiness,train,RAVDESS/data/songs/actor_06/03-02-03-02-01-02-...
3,songs/actor_06/03-02-06-01-02-01-06.wav,6,fear,train,RAVDESS/data/songs/actor_06/03-02-06-01-02-01-...
4,songs/actor_06/03-02-06-02-01-02-06.wav,6,fear,train,RAVDESS/data/songs/actor_06/03-02-06-02-01-02-...
...,...,...,...,...,...
1615,speech/actor_08/03-01-06-01-01-01-08.wav,8,fear,train,RAVDESS/data/speech/actor_08/03-01-06-01-01-01...
1616,speech/actor_08/03-01-01-01-01-02-08.wav,8,neutral,train,RAVDESS/data/speech/actor_08/03-01-01-01-01-02...
1617,speech/actor_08/03-01-03-01-01-01-08.wav,8,happiness,train,RAVDESS/data/speech/actor_08/03-01-03-01-01-01...
1618,speech/actor_08/03-01-05-02-01-02-08.wav,8,anger,train,RAVDESS/data/speech/actor_08/03-01-05-02-01-02...


In [9]:
train_files['speaker'].dtype

dtype('int64')

In [10]:
train_files['source'] = 'ravdess'
train_files['speaker'] = train_files['speaker'].apply(lambda x: f'ravdess_{x:03d}')
train_files

Unnamed: 0,old_path,speaker,emotion,split,file,source
0,songs/actor_06/03-02-04-02-02-01-06.wav,ravdess_006,sadness,train,RAVDESS/data/songs/actor_06/03-02-04-02-02-01-...,ravdess
1,songs/actor_06/03-02-02-02-01-01-06.wav,ravdess_006,calm,train,RAVDESS/data/songs/actor_06/03-02-02-02-01-01-...,ravdess
2,songs/actor_06/03-02-03-02-01-02-06.wav,ravdess_006,happiness,train,RAVDESS/data/songs/actor_06/03-02-03-02-01-02-...,ravdess
3,songs/actor_06/03-02-06-01-02-01-06.wav,ravdess_006,fear,train,RAVDESS/data/songs/actor_06/03-02-06-01-02-01-...,ravdess
4,songs/actor_06/03-02-06-02-01-02-06.wav,ravdess_006,fear,train,RAVDESS/data/songs/actor_06/03-02-06-02-01-02-...,ravdess
...,...,...,...,...,...,...
1615,speech/actor_08/03-01-06-01-01-01-08.wav,ravdess_008,fear,train,RAVDESS/data/speech/actor_08/03-01-06-01-01-01...,ravdess
1616,speech/actor_08/03-01-01-01-01-02-08.wav,ravdess_008,neutral,train,RAVDESS/data/speech/actor_08/03-01-01-01-01-02...,ravdess
1617,speech/actor_08/03-01-03-01-01-01-08.wav,ravdess_008,happiness,train,RAVDESS/data/speech/actor_08/03-01-03-01-01-01...,ravdess
1618,speech/actor_08/03-01-05-02-01-02-08.wav,ravdess_008,anger,train,RAVDESS/data/speech/actor_08/03-01-05-02-01-02...,ravdess


In [11]:
train_files = train_files[['file', 'speaker', 'emotion', 'source', 'split']]
train_files

Unnamed: 0,file,speaker,emotion,source,split
0,RAVDESS/data/songs/actor_06/03-02-04-02-02-01-...,ravdess_006,sadness,ravdess,train
1,RAVDESS/data/songs/actor_06/03-02-02-02-01-01-...,ravdess_006,calm,ravdess,train
2,RAVDESS/data/songs/actor_06/03-02-03-02-01-02-...,ravdess_006,happiness,ravdess,train
3,RAVDESS/data/songs/actor_06/03-02-06-01-02-01-...,ravdess_006,fear,ravdess,train
4,RAVDESS/data/songs/actor_06/03-02-06-02-01-02-...,ravdess_006,fear,ravdess,train
...,...,...,...,...,...
1615,RAVDESS/data/speech/actor_08/03-01-06-01-01-01...,ravdess_008,fear,ravdess,train
1616,RAVDESS/data/speech/actor_08/03-01-01-01-01-02...,ravdess_008,neutral,ravdess,train
1617,RAVDESS/data/speech/actor_08/03-01-03-01-01-01...,ravdess_008,happiness,ravdess,train
1618,RAVDESS/data/speech/actor_08/03-01-05-02-01-02...,ravdess_008,anger,ravdess,train


In [12]:
if os.path.exists('RAVDESS/ravdess_train_files.csv'):
    print("File already exists.")
else:
    train_files.to_csv('RAVDESS/ravdess_train_files.csv', index=False)
    print("File saved.")

File saved.


### Dev files

In [13]:
tmp_song_dev = pd.read_csv('RAVDESS/1.1.3/fe182b91/db.emotion.song.dev.csv')

tmp_speech_dev = pd.read_csv('RAVDESS/1.1.3/fe182b91/db.emotion.speech.dev.csv')

tmp_dev = pd.concat([tmp_song_dev, tmp_speech_dev], ignore_index=True)
tmp_dev

Unnamed: 0,file,emotion,emotional intensity
0,songs/actor_16/03-02-03-01-01-01-16.wav,happiness,normal
1,songs/actor_16/03-02-06-01-01-02-16.wav,fear,normal
2,songs/actor_16/03-02-03-02-02-02-16.wav,happiness,strong
3,songs/actor_16/03-02-02-02-02-01-16.wav,calm,strong
4,songs/actor_16/03-02-02-01-02-01-16.wav,calm,normal
...,...,...,...
411,speech/actor_19/03-01-02-02-01-01-19.wav,calm,strong
412,speech/actor_19/03-01-07-01-02-01-19.wav,disgust,normal
413,speech/actor_19/03-01-03-02-01-02-19.wav,happiness,strong
414,speech/actor_19/03-01-03-02-02-02-19.wav,happiness,strong


In [14]:
dev_files = tmp_dev.merge(tmp_files, how='left', on = 'file')
dev_files

Unnamed: 0,file,emotion,emotional intensity,speaker,duration,transcription,vocal channel
0,songs/actor_16/03-02-03-01-01-01-16.wav,happiness,normal,16,4871541667,Kids are talking by the door,song
1,songs/actor_16/03-02-06-01-01-02-16.wav,fear,normal,16,4704708333,Kids are talking by the door,song
2,songs/actor_16/03-02-03-02-02-02-16.wav,happiness,strong,16,4671333333,Dogs are sitting by the door,song
3,songs/actor_16/03-02-02-02-02-01-16.wav,calm,strong,16,5205208333,Dogs are sitting by the door,song
4,songs/actor_16/03-02-02-01-02-01-16.wav,calm,normal,16,5071729167,Dogs are sitting by the door,song
...,...,...,...,...,...,...,...
411,speech/actor_19/03-01-02-02-01-01-19.wav,calm,strong,19,4471145833,Kids are talking by the door,speech
412,speech/actor_19/03-01-07-01-02-01-19.wav,disgust,normal,19,4170833333,Dogs are sitting by the door,speech
413,speech/actor_19/03-01-03-02-01-02-19.wav,happiness,strong,19,3803791667,Kids are talking by the door,speech
414,speech/actor_19/03-01-03-02-02-02-19.wav,happiness,strong,19,4004000000,Dogs are sitting by the door,speech


In [15]:
dev_files = dev_files[['file', 'speaker', 'emotion']]
dev_files

Unnamed: 0,file,speaker,emotion
0,songs/actor_16/03-02-03-01-01-01-16.wav,16,happiness
1,songs/actor_16/03-02-06-01-01-02-16.wav,16,fear
2,songs/actor_16/03-02-03-02-02-02-16.wav,16,happiness
3,songs/actor_16/03-02-02-02-02-01-16.wav,16,calm
4,songs/actor_16/03-02-02-01-02-01-16.wav,16,calm
...,...,...,...
411,speech/actor_19/03-01-02-02-01-01-19.wav,19,calm
412,speech/actor_19/03-01-07-01-02-01-19.wav,19,disgust
413,speech/actor_19/03-01-03-02-01-02-19.wav,19,happiness
414,speech/actor_19/03-01-03-02-02-02-19.wav,19,happiness


In [16]:
dev_files['split'] = 'dev'
dev_files.rename(columns={'file':'old_path'}, inplace=True)

ravdess_root = os.path.join('RAVDESS', 'data')

dev_files['file'] = dev_files['old_path'].apply(lambda x: os.path.join(ravdess_root, x))
dev_files

Unnamed: 0,old_path,speaker,emotion,split,file
0,songs/actor_16/03-02-03-01-01-01-16.wav,16,happiness,dev,RAVDESS/data/songs/actor_16/03-02-03-01-01-01-...
1,songs/actor_16/03-02-06-01-01-02-16.wav,16,fear,dev,RAVDESS/data/songs/actor_16/03-02-06-01-01-02-...
2,songs/actor_16/03-02-03-02-02-02-16.wav,16,happiness,dev,RAVDESS/data/songs/actor_16/03-02-03-02-02-02-...
3,songs/actor_16/03-02-02-02-02-01-16.wav,16,calm,dev,RAVDESS/data/songs/actor_16/03-02-02-02-02-01-...
4,songs/actor_16/03-02-02-01-02-01-16.wav,16,calm,dev,RAVDESS/data/songs/actor_16/03-02-02-01-02-01-...
...,...,...,...,...,...
411,speech/actor_19/03-01-02-02-01-01-19.wav,19,calm,dev,RAVDESS/data/speech/actor_19/03-01-02-02-01-01...
412,speech/actor_19/03-01-07-01-02-01-19.wav,19,disgust,dev,RAVDESS/data/speech/actor_19/03-01-07-01-02-01...
413,speech/actor_19/03-01-03-02-01-02-19.wav,19,happiness,dev,RAVDESS/data/speech/actor_19/03-01-03-02-01-02...
414,speech/actor_19/03-01-03-02-02-02-19.wav,19,happiness,dev,RAVDESS/data/speech/actor_19/03-01-03-02-02-02...


In [17]:
dev_files['source'] = 'ravdess'
dev_files['speaker'] = dev_files['speaker'].apply(lambda x: f'ravdess_{x:03d}')
dev_files

Unnamed: 0,old_path,speaker,emotion,split,file,source
0,songs/actor_16/03-02-03-01-01-01-16.wav,ravdess_016,happiness,dev,RAVDESS/data/songs/actor_16/03-02-03-01-01-01-...,ravdess
1,songs/actor_16/03-02-06-01-01-02-16.wav,ravdess_016,fear,dev,RAVDESS/data/songs/actor_16/03-02-06-01-01-02-...,ravdess
2,songs/actor_16/03-02-03-02-02-02-16.wav,ravdess_016,happiness,dev,RAVDESS/data/songs/actor_16/03-02-03-02-02-02-...,ravdess
3,songs/actor_16/03-02-02-02-02-01-16.wav,ravdess_016,calm,dev,RAVDESS/data/songs/actor_16/03-02-02-02-02-01-...,ravdess
4,songs/actor_16/03-02-02-01-02-01-16.wav,ravdess_016,calm,dev,RAVDESS/data/songs/actor_16/03-02-02-01-02-01-...,ravdess
...,...,...,...,...,...,...
411,speech/actor_19/03-01-02-02-01-01-19.wav,ravdess_019,calm,dev,RAVDESS/data/speech/actor_19/03-01-02-02-01-01...,ravdess
412,speech/actor_19/03-01-07-01-02-01-19.wav,ravdess_019,disgust,dev,RAVDESS/data/speech/actor_19/03-01-07-01-02-01...,ravdess
413,speech/actor_19/03-01-03-02-01-02-19.wav,ravdess_019,happiness,dev,RAVDESS/data/speech/actor_19/03-01-03-02-01-02...,ravdess
414,speech/actor_19/03-01-03-02-02-02-19.wav,ravdess_019,happiness,dev,RAVDESS/data/speech/actor_19/03-01-03-02-02-02...,ravdess


In [18]:
dev_files = dev_files[['file', 'speaker', 'emotion', 'source', 'split']]
dev_files

Unnamed: 0,file,speaker,emotion,source,split
0,RAVDESS/data/songs/actor_16/03-02-03-01-01-01-...,ravdess_016,happiness,ravdess,dev
1,RAVDESS/data/songs/actor_16/03-02-06-01-01-02-...,ravdess_016,fear,ravdess,dev
2,RAVDESS/data/songs/actor_16/03-02-03-02-02-02-...,ravdess_016,happiness,ravdess,dev
3,RAVDESS/data/songs/actor_16/03-02-02-02-02-01-...,ravdess_016,calm,ravdess,dev
4,RAVDESS/data/songs/actor_16/03-02-02-01-02-01-...,ravdess_016,calm,ravdess,dev
...,...,...,...,...,...
411,RAVDESS/data/speech/actor_19/03-01-02-02-01-01...,ravdess_019,calm,ravdess,dev
412,RAVDESS/data/speech/actor_19/03-01-07-01-02-01...,ravdess_019,disgust,ravdess,dev
413,RAVDESS/data/speech/actor_19/03-01-03-02-01-02...,ravdess_019,happiness,ravdess,dev
414,RAVDESS/data/speech/actor_19/03-01-03-02-02-02...,ravdess_019,happiness,ravdess,dev


In [19]:
if os.path.exists('RAVDESS/ravdess_dev_files.csv'):
    print("File already exists.")
else:
    dev_files.to_csv('RAVDESS/ravdess_dev_files.csv', index=False)
    print("File saved.")

File saved.


### Test files

In [29]:
tmp_song_test = pd.read_csv('RAVDESS/1.1.3/fe182b91/db.emotion.song.test.csv')

tmp_speech_test = pd.read_csv('RAVDESS/1.1.3/fe182b91/db.emotion.speech.test.csv')

tmp_test = pd.concat([tmp_song_test, tmp_speech_test], ignore_index=True)
tmp_test

Unnamed: 0,file,emotion,emotional intensity
0,songs/actor_24/03-02-02-01-01-01-24.wav,calm,normal
1,songs/actor_24/03-02-03-02-02-02-24.wav,happiness,strong
2,songs/actor_24/03-02-06-02-01-01-24.wav,fear,strong
3,songs/actor_24/03-02-03-01-02-01-24.wav,happiness,normal
4,songs/actor_24/03-02-02-01-01-02-24.wav,calm,normal
...,...,...,...
411,speech/actor_22/03-01-06-02-02-01-22.wav,fear,strong
412,speech/actor_22/03-01-07-02-02-02-22.wav,disgust,strong
413,speech/actor_22/03-01-07-01-02-02-22.wav,disgust,normal
414,speech/actor_22/03-01-05-01-02-02-22.wav,anger,normal


In [30]:
test_files = tmp_test.merge(tmp_files, how='left', on = 'file')
test_files

Unnamed: 0,file,emotion,emotional intensity,speaker,duration,transcription,vocal channel
0,songs/actor_24/03-02-02-01-01-01-24.wav,calm,normal,24,5538875000,Kids are talking by the door,song
1,songs/actor_24/03-02-03-02-02-02-24.wav,happiness,strong,24,4671333333,Dogs are sitting by the door,song
2,songs/actor_24/03-02-06-02-01-01-24.wav,fear,strong,24,4537854167,Kids are talking by the door,song
3,songs/actor_24/03-02-03-01-02-01-24.wav,happiness,normal,24,4270937500,Dogs are sitting by the door,song
4,songs/actor_24/03-02-02-01-01-02-24.wav,calm,normal,24,5772416667,Kids are talking by the door,song
...,...,...,...,...,...,...,...
411,speech/actor_22/03-01-06-02-02-01-22.wav,fear,strong,22,3670333333,Dogs are sitting by the door,speech
412,speech/actor_22/03-01-07-02-02-02-22.wav,disgust,strong,22,4104083333,Dogs are sitting by the door,speech
413,speech/actor_22/03-01-07-01-02-02-22.wav,disgust,normal,22,3837166667,Dogs are sitting by the door,speech
414,speech/actor_22/03-01-05-01-02-02-22.wav,anger,normal,22,4037354167,Dogs are sitting by the door,speech


In [31]:
test_files = test_files[['file', 'speaker', 'emotion']]
test_files

Unnamed: 0,file,speaker,emotion
0,songs/actor_24/03-02-02-01-01-01-24.wav,24,calm
1,songs/actor_24/03-02-03-02-02-02-24.wav,24,happiness
2,songs/actor_24/03-02-06-02-01-01-24.wav,24,fear
3,songs/actor_24/03-02-03-01-02-01-24.wav,24,happiness
4,songs/actor_24/03-02-02-01-01-02-24.wav,24,calm
...,...,...,...
411,speech/actor_22/03-01-06-02-02-01-22.wav,22,fear
412,speech/actor_22/03-01-07-02-02-02-22.wav,22,disgust
413,speech/actor_22/03-01-07-01-02-02-22.wav,22,disgust
414,speech/actor_22/03-01-05-01-02-02-22.wav,22,anger


In [32]:
test_files['split'] = 'test'
test_files.rename(columns={'file':'old_path'}, inplace=True)

ravdess_root = os.path.join('RAVDESS', 'data')

test_files['file'] = test_files['old_path'].apply(lambda x: os.path.join(ravdess_root, x))
test_files

Unnamed: 0,old_path,speaker,emotion,split,file
0,songs/actor_24/03-02-02-01-01-01-24.wav,24,calm,test,RAVDESS/data/songs/actor_24/03-02-02-01-01-01-...
1,songs/actor_24/03-02-03-02-02-02-24.wav,24,happiness,test,RAVDESS/data/songs/actor_24/03-02-03-02-02-02-...
2,songs/actor_24/03-02-06-02-01-01-24.wav,24,fear,test,RAVDESS/data/songs/actor_24/03-02-06-02-01-01-...
3,songs/actor_24/03-02-03-01-02-01-24.wav,24,happiness,test,RAVDESS/data/songs/actor_24/03-02-03-01-02-01-...
4,songs/actor_24/03-02-02-01-01-02-24.wav,24,calm,test,RAVDESS/data/songs/actor_24/03-02-02-01-01-02-...
...,...,...,...,...,...
411,speech/actor_22/03-01-06-02-02-01-22.wav,22,fear,test,RAVDESS/data/speech/actor_22/03-01-06-02-02-01...
412,speech/actor_22/03-01-07-02-02-02-22.wav,22,disgust,test,RAVDESS/data/speech/actor_22/03-01-07-02-02-02...
413,speech/actor_22/03-01-07-01-02-02-22.wav,22,disgust,test,RAVDESS/data/speech/actor_22/03-01-07-01-02-02...
414,speech/actor_22/03-01-05-01-02-02-22.wav,22,anger,test,RAVDESS/data/speech/actor_22/03-01-05-01-02-02...


In [33]:
test_files['source'] = 'ravdess'
test_files['speaker'] = test_files['speaker'].apply(lambda x: f'ravdess_{x:03d}')
test_files

Unnamed: 0,old_path,speaker,emotion,split,file,source
0,songs/actor_24/03-02-02-01-01-01-24.wav,ravdess_024,calm,test,RAVDESS/data/songs/actor_24/03-02-02-01-01-01-...,ravdess
1,songs/actor_24/03-02-03-02-02-02-24.wav,ravdess_024,happiness,test,RAVDESS/data/songs/actor_24/03-02-03-02-02-02-...,ravdess
2,songs/actor_24/03-02-06-02-01-01-24.wav,ravdess_024,fear,test,RAVDESS/data/songs/actor_24/03-02-06-02-01-01-...,ravdess
3,songs/actor_24/03-02-03-01-02-01-24.wav,ravdess_024,happiness,test,RAVDESS/data/songs/actor_24/03-02-03-01-02-01-...,ravdess
4,songs/actor_24/03-02-02-01-01-02-24.wav,ravdess_024,calm,test,RAVDESS/data/songs/actor_24/03-02-02-01-01-02-...,ravdess
...,...,...,...,...,...,...
411,speech/actor_22/03-01-06-02-02-01-22.wav,ravdess_022,fear,test,RAVDESS/data/speech/actor_22/03-01-06-02-02-01...,ravdess
412,speech/actor_22/03-01-07-02-02-02-22.wav,ravdess_022,disgust,test,RAVDESS/data/speech/actor_22/03-01-07-02-02-02...,ravdess
413,speech/actor_22/03-01-07-01-02-02-22.wav,ravdess_022,disgust,test,RAVDESS/data/speech/actor_22/03-01-07-01-02-02...,ravdess
414,speech/actor_22/03-01-05-01-02-02-22.wav,ravdess_022,anger,test,RAVDESS/data/speech/actor_22/03-01-05-01-02-02...,ravdess


In [34]:
test_files = test_files[['file', 'speaker', 'emotion', 'source', 'split']]
test_files

Unnamed: 0,file,speaker,emotion,source,split
0,RAVDESS/data/songs/actor_24/03-02-02-01-01-01-...,ravdess_024,calm,ravdess,test
1,RAVDESS/data/songs/actor_24/03-02-03-02-02-02-...,ravdess_024,happiness,ravdess,test
2,RAVDESS/data/songs/actor_24/03-02-06-02-01-01-...,ravdess_024,fear,ravdess,test
3,RAVDESS/data/songs/actor_24/03-02-03-01-02-01-...,ravdess_024,happiness,ravdess,test
4,RAVDESS/data/songs/actor_24/03-02-02-01-01-02-...,ravdess_024,calm,ravdess,test
...,...,...,...,...,...
411,RAVDESS/data/speech/actor_22/03-01-06-02-02-01...,ravdess_022,fear,ravdess,test
412,RAVDESS/data/speech/actor_22/03-01-07-02-02-02...,ravdess_022,disgust,ravdess,test
413,RAVDESS/data/speech/actor_22/03-01-07-01-02-02...,ravdess_022,disgust,ravdess,test
414,RAVDESS/data/speech/actor_22/03-01-05-01-02-02...,ravdess_022,anger,ravdess,test


In [36]:
if os.path.exists('RAVDESS/ravdess_test_files.csv'):
    print("File already exists.")
else:
    test_files.to_csv('RAVDESS/ravdess_test_files.csv', index=False)
    print("File saved.")

File saved.


### Complete files 

In [38]:
ravdess_complete_files = pd.concat([train_files, dev_files, test_files], ignore_index=True)
ravdess_complete_files

Unnamed: 0,file,speaker,emotion,source,split
0,RAVDESS/data/songs/actor_06/03-02-04-02-02-01-...,ravdess_006,sadness,ravdess,train
1,RAVDESS/data/songs/actor_06/03-02-02-02-01-01-...,ravdess_006,calm,ravdess,train
2,RAVDESS/data/songs/actor_06/03-02-03-02-01-02-...,ravdess_006,happiness,ravdess,train
3,RAVDESS/data/songs/actor_06/03-02-06-01-02-01-...,ravdess_006,fear,ravdess,train
4,RAVDESS/data/songs/actor_06/03-02-06-02-01-02-...,ravdess_006,fear,ravdess,train
...,...,...,...,...,...
2447,RAVDESS/data/speech/actor_22/03-01-06-02-02-01...,ravdess_022,fear,ravdess,test
2448,RAVDESS/data/speech/actor_22/03-01-07-02-02-02...,ravdess_022,disgust,ravdess,test
2449,RAVDESS/data/speech/actor_22/03-01-07-01-02-02...,ravdess_022,disgust,ravdess,test
2450,RAVDESS/data/speech/actor_22/03-01-05-01-02-02...,ravdess_022,anger,ravdess,test


In [39]:
ravdess_complete_files.to_csv('RAVDESS/ravdess_complete_files.csv', index=False)

## SAVEE

In [None]:
savee_root = "SAVEE/data"

file_paths = glob.glob(os.path.join(savee_root, "*.wav"))

savee_complete_df = pd.DataFrame(file_paths, columns=["file"])
print(savee_complete_df.shape)
savee_complete_df

In [None]:
EMOTION_MAPPING_SAVEE = {
    'a': 'anger', 
    'd': 'disgust',
    'f': 'fear',
    'h': 'happy',
    'n': 'neutral',
    'sa': 'sadness',
    'su': 'surprise'
}

pattern_savee = r"(?P<speaker>[a-zA-Z]+)_(?P<emotion>[a-z]+)\d+\.wav"

def ext_speaker(file_path: str) -> str:
    base_name = os.path.basename(file_path)
    match = re.match(pattern_savee, base_name)
    if match:
        return match.group("speaker")
    return "unknown"

def ext_emotion(file_path: str) -> str:
    base_name = os.path.basename(file_path)
    match = re.match(pattern_savee, base_name)
    if match:
        return EMOTION_MAPPING_SAVEE.get(match.group("emotion"), "unknown")
    return "unknown"


In [None]:
savee_complete_df["emotion"] = savee_complete_df["file"].apply(ext_emotion)

savee_complete_df["speaker"] = savee_complete_df["file"].apply(ext_speaker)
savee_complete_df["source"] = "savee"
savee_complete_df

In [None]:
if os.path.exists("SAVEE/savee_complete_files.csv"  ):
    print("File already exists. Skipping save to avoid overwriting.")
else:
    savee_complete_df.to_csv("SAVEE/savee_complete_files.csv", index = False)
    print("File saved successfully.")

## TESS

In [None]:
tess_complete_df = pd.read_csv("TESS/MANIFEST.TXT", sep=" ", header=None, names=["file", "unused1", "unused2", "unused3"])
tess_complete_df


In [None]:
tess_complete_df.drop(columns=["unused1", "unused2", "unused3"], inplace=True)

tess_root = os.path.join("TESS", "data")

tess_complete_df["file"] = tess_complete_df["file"].apply(lambda x: os.path.join(tess_root, x))
tess_complete_df

In [None]:
EMOTION_MAPPING_SAVEE = {
    'a': 'anger', 
    'd': 'disgust',
    'f': 'fear',
    'h': 'happy',
    'n': 'neutral',
    'sa': 'sadness',
    'su': 'surprise'
}

pattern_savee = r"(?P<speaker>[A-Z]+)_(?P<expression>[a-z]+)_(?P<emotion>[a-z]+)\.wav"

def ext_speaker(file_path: str) -> str:
    base_name = os.path.basename(file_path)
    match = re.match(pattern_savee, base_name)
    if match:
        return match.group("speaker")
    return "unknown"

def ext_emotion(file_path: str) -> str:
    base_name = os.path.basename(file_path)
    match = re.match(pattern_savee, base_name)
    if match:
        return match.group("emotion")
    return "unknown"



In [None]:
tess_complete_df["speaker"] = tess_complete_df["file"].apply(ext_speaker)
tess_complete_df["emotion"] = tess_complete_df["file"].apply(ext_emotion)
tess_complete_df

In [None]:
if os.path.exists("TESS/tess_complete_files.csv"  ):
    print("File already exists. Skipping save to avoid overwriting.")
else:
    tess_complete_df.to_csv("TESS/tess_complete_files.csv", index = False)
    print("File saved successfully.")

## EMOITA

In [40]:
datasets = audb.available()

targets = ["emozionalmente"]

datasets[datasets.index.isin(targets)].sort_values(by=["name", "version"], ascending=[True, False])

Unnamed: 0_level_0,backend,host,repository,version
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
emozionalmente,s3,s3.dualstack.eu-north-1.amazonaws.com,audb-public,1.0.0


In [41]:
class audb_dataset:
    def __init__(self, name: str, version: str | None = None):
        self.name = name
        self.version = version
        
emoita = audb_dataset(name = "emozionalmente", version = "1.0.0")

In [None]:
emoita_db = audb.load(emoita.name,
                    version = emoita.version,
                    format = "wav", 
                    sampling_rate= 16000,
                    mixdown = True,
                    pickle_tables= False,
                    cache_root="./"
)

### Train files

In [42]:
tmp_train = pd.read_csv('emozionalmente/1.0.0/fe182b91/db.emotion.categories.train.desired.csv')
tmp_train

Unnamed: 0,file,emotion
0,audio/1613671614352.wav,anger
1,audio/1613658275427.wav,anger
2,audio/1613324357435.wav,anger
3,audio/1614274086698.wav,anger
4,audio/1612982146424.wav,anger
...,...,...
3983,audio/1616505528024.wav,surprise
3984,audio/1614627012378.wav,surprise
3985,audio/1613310701161.wav,surprise
3986,audio/1613403675624.wav,surprise


In [43]:
tmp_train.emotion.value_counts()

emotion
happiness    588
neutral      588
surprise     571
fear         566
disgust      564
anger        559
sadness      552
Name: count, dtype: int64

In [44]:
emoita_raw_files = pd.read_csv('emozionalmente/1.0.0/fe182b91/db.files.csv')
emoita_raw_files

Unnamed: 0,file,speaker,transcription
0,audio/1614951631538.wav,d4bbc5043503b9aed309ede00854ee48937684b57a1cc0...,s0
1,audio/1613671614352.wav,b830cbf6e79aeeeec1fbf2ef3be741628ffc574c9b15f3...,s1
2,audio/1613994293936.wav,acdd987f9c1700b25898d3bd30c201df5ad4f34b3ca5eb...,s2
3,audio/1617135480050.wav,28043d2516f2d956b81ce66cc01fbd427ac54ff1eb3a07...,s0
4,audio/1613658275427.wav,5b8476bb94a4a1a17df32b417ec035029f48e4487c9055...,s3
...,...,...,...
6897,audio/1613310701161.wav,1ae2a40d6e3a1b8c6041da953f650dc017fea8c646648b...,s0
6898,audio/1613061195435.wav,f4d0c9178a221556efe45a4ea3d2bb77b470626409af7f...,s17
6899,audio/1615231434303.wav,7ce215563f7b620fdd32c6618a95215100b4fbede0eb4f...,s14
6900,audio/1613403675624.wav,1dab1d691c3cb60f475163ff79b4fa2f58bac807098245...,s4


In [45]:
emoita_train = tmp_train.merge(emoita_raw_files, on="file", how="left")
emoita_train.drop(columns=["transcription"], inplace=True)
emoita_train

Unnamed: 0,file,emotion,speaker
0,audio/1613671614352.wav,anger,b830cbf6e79aeeeec1fbf2ef3be741628ffc574c9b15f3...
1,audio/1613658275427.wav,anger,5b8476bb94a4a1a17df32b417ec035029f48e4487c9055...
2,audio/1613324357435.wav,anger,f78d765ec69ed2306cf544ae70b352f27ac00ced795ad5...
3,audio/1614274086698.wav,anger,33fae0227059f08853c11257fe73f4976d4527614588b9...
4,audio/1612982146424.wav,anger,b3c7c357c77848765caaded035be8800169fb4dc53996f...
...,...,...,...
3983,audio/1616505528024.wav,surprise,f78d765ec69ed2306cf544ae70b352f27ac00ced795ad5...
3984,audio/1614627012378.wav,surprise,8cd4289dc47d0ea3a3deca73495829e5c4a9c96b1c4616...
3985,audio/1613310701161.wav,surprise,1ae2a40d6e3a1b8c6041da953f650dc017fea8c646648b...
3986,audio/1613403675624.wav,surprise,1dab1d691c3cb60f475163ff79b4fa2f58bac807098245...


In [46]:
emoita_train.rename(columns={"file" : "old_filename"}, inplace=True)

emoita_root = os.path.join("EMOITA", "data")

emoita_train["file"] = emoita_train["old_filename"].apply(lambda x: os.path.join(emoita_root, os.path.basename(x)))
emoita_train



Unnamed: 0,old_filename,emotion,speaker,file
0,audio/1613671614352.wav,anger,b830cbf6e79aeeeec1fbf2ef3be741628ffc574c9b15f3...,EMOITA/data/1613671614352.wav
1,audio/1613658275427.wav,anger,5b8476bb94a4a1a17df32b417ec035029f48e4487c9055...,EMOITA/data/1613658275427.wav
2,audio/1613324357435.wav,anger,f78d765ec69ed2306cf544ae70b352f27ac00ced795ad5...,EMOITA/data/1613324357435.wav
3,audio/1614274086698.wav,anger,33fae0227059f08853c11257fe73f4976d4527614588b9...,EMOITA/data/1614274086698.wav
4,audio/1612982146424.wav,anger,b3c7c357c77848765caaded035be8800169fb4dc53996f...,EMOITA/data/1612982146424.wav
...,...,...,...,...
3983,audio/1616505528024.wav,surprise,f78d765ec69ed2306cf544ae70b352f27ac00ced795ad5...,EMOITA/data/1616505528024.wav
3984,audio/1614627012378.wav,surprise,8cd4289dc47d0ea3a3deca73495829e5c4a9c96b1c4616...,EMOITA/data/1614627012378.wav
3985,audio/1613310701161.wav,surprise,1ae2a40d6e3a1b8c6041da953f650dc017fea8c646648b...,EMOITA/data/1613310701161.wav
3986,audio/1613403675624.wav,surprise,1dab1d691c3cb60f475163ff79b4fa2f58bac807098245...,EMOITA/data/1613403675624.wav


In [47]:
emoita_speakers = pd.read_csv("emozionalmente/1.0.0/fe182b91/db.speaker.csv")
emoita_speakers

Unnamed: 0,speaker,age,gender,mother_tongue
0,f04bb0e6361c05acba4d5185a2d372177bdb77898c1a21...,30,female,italian
1,b395b3d82da20e930e20220a5ab4de9adb9a12aaa7cecf...,26,male,italian
2,69a6f8fa8a7d2e7338bddab3f872ba3532c914f62ebfeb...,26,female,italian
3,5b1d678a1b4936c172e8c239785f89a2693f7603890c41...,54,female,italian
4,1718aa7e2d0821e3ade5783b4e045ab5930ec22054a0fe...,11,female,italian
...,...,...,...,...
426,90faa565f21422b948417c0e69766d0df3331b1b347f88...,33,female,italian
427,faf6ce1ea46bca3ee95197b53c7137840d63e6caddc447...,27,male,italian
428,7da9f2113516c871e275194f8b2e7521f0c3fb77728ca9...,27,male,italian
429,39bf573469551ecb760d59bd0e38e18c2727456e62a732...,22,male,italian


In [48]:
emoita_speakers.iloc[0]['speaker']

'f04bb0e6361c05acba4d5185a2d372177bdb77898c1a213f5dfe5c1d60331ea1'

In [51]:
unique_hashes = emoita_speakers['speaker'].unique()

speaker_map = {original : f"emoita_{i+1:03d}" for i, original in enumerate(unique_hashes)}

emoita_speakers["speaker_new_id"] = emoita_speakers["speaker"].map(speaker_map)
emoita_speakers

Unnamed: 0,speaker,age,gender,mother_tongue,speaker_new_id
0,f04bb0e6361c05acba4d5185a2d372177bdb77898c1a21...,30,female,italian,emoita_001
1,b395b3d82da20e930e20220a5ab4de9adb9a12aaa7cecf...,26,male,italian,emoita_002
2,69a6f8fa8a7d2e7338bddab3f872ba3532c914f62ebfeb...,26,female,italian,emoita_003
3,5b1d678a1b4936c172e8c239785f89a2693f7603890c41...,54,female,italian,emoita_004
4,1718aa7e2d0821e3ade5783b4e045ab5930ec22054a0fe...,11,female,italian,emoita_005
...,...,...,...,...,...
426,90faa565f21422b948417c0e69766d0df3331b1b347f88...,33,female,italian,emoita_427
427,faf6ce1ea46bca3ee95197b53c7137840d63e6caddc447...,27,male,italian,emoita_428
428,7da9f2113516c871e275194f8b2e7521f0c3fb77728ca9...,27,male,italian,emoita_429
429,39bf573469551ecb760d59bd0e38e18c2727456e62a732...,22,male,italian,emoita_430


In [50]:
if os.path.exists("EMOITA/speakers.csv"):
    print("File already exists. Skipping save to avoid overwriting.")
else:
    emoita_speakers.to_csv("EMOITA/speakers.csv", index = False)
    print("File saved successfully.")

File already exists. Skipping save to avoid overwriting.


In [53]:
emoita_train.replace({"speaker": speaker_map}, inplace=True)
emoita_train

Unnamed: 0,old_filename,emotion,speaker,file
0,audio/1613671614352.wav,anger,emoita_321,EMOITA/data/1613671614352.wav
1,audio/1613658275427.wav,anger,emoita_303,EMOITA/data/1613658275427.wav
2,audio/1613324357435.wav,anger,emoita_314,EMOITA/data/1613324357435.wav
3,audio/1614274086698.wav,anger,emoita_109,EMOITA/data/1614274086698.wav
4,audio/1612982146424.wav,anger,emoita_179,EMOITA/data/1612982146424.wav
...,...,...,...,...
3983,audio/1616505528024.wav,surprise,emoita_314,EMOITA/data/1616505528024.wav
3984,audio/1614627012378.wav,surprise,emoita_079,EMOITA/data/1614627012378.wav
3985,audio/1613310701161.wav,surprise,emoita_206,EMOITA/data/1613310701161.wav
3986,audio/1613403675624.wav,surprise,emoita_258,EMOITA/data/1613403675624.wav


In [54]:
emoita_train = emoita_train[["file", "speaker", "emotion"]]
emoita_train['source'] = 'emoita'
emoita_train["split"] = "train"
emoita_train

Unnamed: 0,file,speaker,emotion,source,split
0,EMOITA/data/1613671614352.wav,emoita_321,anger,emoita,train
1,EMOITA/data/1613658275427.wav,emoita_303,anger,emoita,train
2,EMOITA/data/1613324357435.wav,emoita_314,anger,emoita,train
3,EMOITA/data/1614274086698.wav,emoita_109,anger,emoita,train
4,EMOITA/data/1612982146424.wav,emoita_179,anger,emoita,train
...,...,...,...,...,...
3983,EMOITA/data/1616505528024.wav,emoita_314,surprise,emoita,train
3984,EMOITA/data/1614627012378.wav,emoita_079,surprise,emoita,train
3985,EMOITA/data/1613310701161.wav,emoita_206,surprise,emoita,train
3986,EMOITA/data/1613403675624.wav,emoita_258,surprise,emoita,train


In [56]:
if os.path.exists("EMOITA/emoita_train_files.csv"):
    print("File already exists. Skipping save to avoid overwriting.")
else:
    emoita_train.to_csv("EMOITA/emoita_train_files.csv", index = False)
    print("File saved successfully.")

File already exists. Skipping save to avoid overwriting.


### Dev files

In [57]:
tmp_dev = pd.read_csv('emozionalmente/1.0.0/fe182b91/db.emotion.categories.dev.desired.csv')
tmp_dev

Unnamed: 0,file,emotion
0,audio/1614951631538.wav,anger
1,audio/1613994293936.wav,anger
2,audio/1613562322650.wav,anger
3,audio/1614266094803.wav,anger
4,audio/1615229375102.wav,anger
...,...,...
1681,audio/1613210989194.wav,surprise
1682,audio/1613849744545.wav,surprise
1683,audio/1615239203008.wav,surprise
1684,audio/1615228349855.wav,surprise


In [58]:
dev_files = tmp_dev.merge(emoita_raw_files, on="file", how="left")
dev_files.drop(columns=["transcription"], inplace=True)
dev_files

Unnamed: 0,file,emotion,speaker
0,audio/1614951631538.wav,anger,d4bbc5043503b9aed309ede00854ee48937684b57a1cc0...
1,audio/1613994293936.wav,anger,acdd987f9c1700b25898d3bd30c201df5ad4f34b3ca5eb...
2,audio/1613562322650.wav,anger,4627bdb3ed141168a5e8663c86bfae205566c5bd27779a...
3,audio/1614266094803.wav,anger,07f763295ba2764d034d200456b42eb8bf498d341c586a...
4,audio/1615229375102.wav,anger,253387e8620ee1896c76809782406e139bce35860aaa0d...
...,...,...,...
1681,audio/1613210989194.wav,surprise,de0fdef2b5dea85416a04115cf8c9c1a868dc5ca712ae2...
1682,audio/1613849744545.wav,surprise,d4bbc5043503b9aed309ede00854ee48937684b57a1cc0...
1683,audio/1615239203008.wav,surprise,00a0efc0faaeb5a288481a25b5d0b7d4c0b070120c6d61...
1684,audio/1615228349855.wav,surprise,8e0c2c89852e802de022aa0220dbc78053cfa0a9d8abe2...


In [59]:
dev_files.replace({"speaker": speaker_map}, inplace=True)
dev_files

Unnamed: 0,file,emotion,speaker
0,audio/1614951631538.wav,anger,emoita_032
1,audio/1613994293936.wav,anger,emoita_139
2,audio/1613562322650.wav,anger,emoita_219
3,audio/1614266094803.wav,anger,emoita_169
4,audio/1615229375102.wav,anger,emoita_161
...,...,...,...
1681,audio/1613210989194.wav,surprise,emoita_201
1682,audio/1613849744545.wav,surprise,emoita_032
1683,audio/1615239203008.wav,surprise,emoita_008
1684,audio/1615228349855.wav,surprise,emoita_358


In [60]:
speaker_dev = set(dev_files["speaker"].unique())
speaker_train = set(emoita_train["speaker"].unique())

if speaker_train.intersection(speaker_dev) == set():
    print("No overlapping speakers between train and dev sets.")
else:
    print("Overlapping speakers found.")
    speaker_train.intersection(speaker_dev)

No overlapping speakers between train and dev sets.


In [61]:
dev_files.rename(columns={"file" : "old_filename"}, inplace=True)

emoita_root = os.path.join("EMOITA", "data")

dev_files["file"] = dev_files["old_filename"].apply(lambda x: os.path.join(emoita_root, os.path.basename(x)))
dev_files

Unnamed: 0,old_filename,emotion,speaker,file
0,audio/1614951631538.wav,anger,emoita_032,EMOITA/data/1614951631538.wav
1,audio/1613994293936.wav,anger,emoita_139,EMOITA/data/1613994293936.wav
2,audio/1613562322650.wav,anger,emoita_219,EMOITA/data/1613562322650.wav
3,audio/1614266094803.wav,anger,emoita_169,EMOITA/data/1614266094803.wav
4,audio/1615229375102.wav,anger,emoita_161,EMOITA/data/1615229375102.wav
...,...,...,...,...
1681,audio/1613210989194.wav,surprise,emoita_201,EMOITA/data/1613210989194.wav
1682,audio/1613849744545.wav,surprise,emoita_032,EMOITA/data/1613849744545.wav
1683,audio/1615239203008.wav,surprise,emoita_008,EMOITA/data/1615239203008.wav
1684,audio/1615228349855.wav,surprise,emoita_358,EMOITA/data/1615228349855.wav


In [62]:
dev_files =dev_files[["file", "speaker", "emotion"]]
dev_files["source"] = "emoita"
dev_files["split"] = "dev"
dev_files

Unnamed: 0,file,speaker,emotion,source,split
0,EMOITA/data/1614951631538.wav,emoita_032,anger,emoita,dev
1,EMOITA/data/1613994293936.wav,emoita_139,anger,emoita,dev
2,EMOITA/data/1613562322650.wav,emoita_219,anger,emoita,dev
3,EMOITA/data/1614266094803.wav,emoita_169,anger,emoita,dev
4,EMOITA/data/1615229375102.wav,emoita_161,anger,emoita,dev
...,...,...,...,...,...
1681,EMOITA/data/1613210989194.wav,emoita_201,surprise,emoita,dev
1682,EMOITA/data/1613849744545.wav,emoita_032,surprise,emoita,dev
1683,EMOITA/data/1615239203008.wav,emoita_008,surprise,emoita,dev
1684,EMOITA/data/1615228349855.wav,emoita_358,surprise,emoita,dev


In [64]:
if os.path.exists("EMOITA/emoita_dev_files.csv"):
    print("File already exists. Skipping save to avoid overwriting.")
else:
    dev_files.to_csv("EMOITA/emoita_dev_files.csv", index = False)
    print("File saved successfully.")

File already exists. Skipping save to avoid overwriting.


### Test files

In [65]:
tmp_test = pd.read_csv('emozionalmente/1.0.0/fe182b91/db.emotion.categories.test.desired.csv')
tmp_test

Unnamed: 0,file,emotion
0,audio/1617135480050.wav,anger
1,audio/1613586149044.wav,anger
2,audio/1617135448748.wav,anger
3,audio/1616505237674.wav,anger
4,audio/1614359087222.wav,anger
...,...,...
1223,audio/1614266389200.wav,surprise
1224,audio/1613821821107.wav,surprise
1225,audio/1614196499838.wav,surprise
1226,audio/1613061195435.wav,surprise


In [66]:
test_files = tmp_test.merge(emoita_raw_files, on="file", how="left")
test_files.drop(columns=["transcription"], inplace=True)
test_files

Unnamed: 0,file,emotion,speaker
0,audio/1617135480050.wav,anger,28043d2516f2d956b81ce66cc01fbd427ac54ff1eb3a07...
1,audio/1613586149044.wav,anger,0ec8b058931916615ae00bce3d91a997ae440eb9d51ac3...
2,audio/1617135448748.wav,anger,28043d2516f2d956b81ce66cc01fbd427ac54ff1eb3a07...
3,audio/1616505237674.wav,anger,3f9b6ed1d2adc407ef668b8c89a89fe022f2233d94ce94...
4,audio/1614359087222.wav,anger,9299497bc321d5d9ece8f0e6f4932161650ff277c34466...
...,...,...,...
1223,audio/1614266389200.wav,surprise,8c5cae1948feaaa972f368641dedda6eea6e08e853b649...
1224,audio/1613821821107.wav,surprise,f04bb0e6361c05acba4d5185a2d372177bdb77898c1a21...
1225,audio/1614196499838.wav,surprise,b069c4c181f02bf6e0d1b29de3b7e28a4529e315dbb223...
1226,audio/1613061195435.wav,surprise,f4d0c9178a221556efe45a4ea3d2bb77b470626409af7f...


In [67]:
test_files.replace({"speaker": speaker_map}, inplace=True)
test_files

Unnamed: 0,file,emotion,speaker
0,audio/1617135480050.wav,anger,emoita_413
1,audio/1613586149044.wav,anger,emoita_307
2,audio/1617135448748.wav,anger,emoita_413
3,audio/1616505237674.wav,anger,emoita_168
4,audio/1614359087222.wav,anger,emoita_091
...,...,...,...
1223,audio/1614266389200.wav,surprise,emoita_051
1224,audio/1613821821107.wav,surprise,emoita_001
1225,audio/1614196499838.wav,surprise,emoita_152
1226,audio/1613061195435.wav,surprise,emoita_187


In [68]:
speaker_dev = set(dev_files["speaker"].unique())
speaker_train = set(emoita_train["speaker"].unique())
speaker_test = set(test_files["speaker"].unique())

if speaker_test.intersection(speaker_dev) == set() and speaker_test.intersection(speaker_train) == set():
    print("No overlapping speakers between train or dev and test sets.")
else:
    print("Overlapping speakers found.")
    speaker_train.intersection(speaker_dev)

No overlapping speakers between train or dev and test sets.


In [69]:
test_files.rename(columns={"file" : "old_filename"}, inplace=True)

emoita_root = os.path.join("EMOITA", "data")

test_files["file"] = test_files["old_filename"].apply(lambda x: os.path.join(emoita_root, os.path.basename(x)))
test_files

Unnamed: 0,old_filename,emotion,speaker,file
0,audio/1617135480050.wav,anger,emoita_413,EMOITA/data/1617135480050.wav
1,audio/1613586149044.wav,anger,emoita_307,EMOITA/data/1613586149044.wav
2,audio/1617135448748.wav,anger,emoita_413,EMOITA/data/1617135448748.wav
3,audio/1616505237674.wav,anger,emoita_168,EMOITA/data/1616505237674.wav
4,audio/1614359087222.wav,anger,emoita_091,EMOITA/data/1614359087222.wav
...,...,...,...,...
1223,audio/1614266389200.wav,surprise,emoita_051,EMOITA/data/1614266389200.wav
1224,audio/1613821821107.wav,surprise,emoita_001,EMOITA/data/1613821821107.wav
1225,audio/1614196499838.wav,surprise,emoita_152,EMOITA/data/1614196499838.wav
1226,audio/1613061195435.wav,surprise,emoita_187,EMOITA/data/1613061195435.wav


In [70]:
test_files =test_files[["file", "speaker", "emotion"]]
test_files["source"] = "emoita"
test_files["split"] = "test"
test_files

Unnamed: 0,file,speaker,emotion,source,split
0,EMOITA/data/1617135480050.wav,emoita_413,anger,emoita,test
1,EMOITA/data/1613586149044.wav,emoita_307,anger,emoita,test
2,EMOITA/data/1617135448748.wav,emoita_413,anger,emoita,test
3,EMOITA/data/1616505237674.wav,emoita_168,anger,emoita,test
4,EMOITA/data/1614359087222.wav,emoita_091,anger,emoita,test
...,...,...,...,...,...
1223,EMOITA/data/1614266389200.wav,emoita_051,surprise,emoita,test
1224,EMOITA/data/1613821821107.wav,emoita_001,surprise,emoita,test
1225,EMOITA/data/1614196499838.wav,emoita_152,surprise,emoita,test
1226,EMOITA/data/1613061195435.wav,emoita_187,surprise,emoita,test


In [71]:
test_files.to_csv("EMOITA/emoita_test_files.csv", index = False)

In [None]:
if os.path.exists("EMOITA/emoita_test_files.csv"):
    print("File already exists. Skipping save to avoid overwriting.")
else:
    test_files.to_csv("EMOITA/emoita_test_files.csv", index = False)
    print("File saved successfully.")

In [72]:
emoita_complete_files = pd.concat([emoita_train, dev_files, test_files], ignore_index=True)
emoita_complete_files

Unnamed: 0,file,speaker,emotion,source,split
0,EMOITA/data/1613671614352.wav,emoita_321,anger,emoita,train
1,EMOITA/data/1613658275427.wav,emoita_303,anger,emoita,train
2,EMOITA/data/1613324357435.wav,emoita_314,anger,emoita,train
3,EMOITA/data/1614274086698.wav,emoita_109,anger,emoita,train
4,EMOITA/data/1612982146424.wav,emoita_179,anger,emoita,train
...,...,...,...,...,...
6897,EMOITA/data/1614266389200.wav,emoita_051,surprise,emoita,test
6898,EMOITA/data/1613821821107.wav,emoita_001,surprise,emoita,test
6899,EMOITA/data/1614196499838.wav,emoita_152,surprise,emoita,test
6900,EMOITA/data/1613061195435.wav,emoita_187,surprise,emoita,test


In [74]:
if os.path.exists("EMOITA/emoita_complete_files.csv"):
    print("File already exists. Skipping save to avoid overwriting.")
else:
    emoita_complete_files.to_csv("EMOITA/emoita_complete_files.csv", index = False)
    print("File saved successfully.")

File already exists. Skipping save to avoid overwriting.
