In [3]:
import pandas as pd
import torch
import torchaudio
from torch.utils.data import DataLoader

from DataProvider import DataProvider
from AudioFeaturesExtraction.AudioDataset import AudioDataset

import os
# change to True if you want to run the data provider
RUN_DATA_PROVIDER = False

## Generate the data

In [4]:
if RUN_DATA_PROVIDER: 
    dp = DataProvider()
    dp.run()

In [5]:
train_directory = 'train_audio'
test_directory = 'test_audio'
dev_directory = 'dev_audio'


train_files = sorted(os.listdir(train_directory))
test_files = sorted(os.listdir(test_directory))
dev_files = sorted(os.listdir(dev_directory))

train_files = [os.path.join(train_directory, file) for file in train_files]
test_files = [os.path.join(test_directory, file) for file in test_files]
dev_files = [os.path.join(dev_directory, file) for file in dev_files]

## Generate Labels

In [6]:
test_info = pd.read_csv('MELD.Raw/test_sent_emo.csv')
dev_info = pd.read_csv('MELD.Raw/dev_sent_emo.csv')
train_info = pd.read_csv('MELD.Raw/train/train_sent_emo.csv')
test_info['file_key'] = 'dia' + test_info['Dialogue_ID'].astype(str) + '_' + 'utt' + test_info['Utterance_ID'].astype(str)
train_info['file_key'] = 'dia' + train_info['Dialogue_ID'].astype(str) + '_' + 'utt' + train_info['Utterance_ID'].astype(str)
dev_info['file_key'] = 'dia' + dev_info['Dialogue_ID'].astype(str) + '_' + 'utt' + dev_info['Utterance_ID'].astype(str)
train_info = train_info.sort_values(by='file_key')
test_info = test_info.sort_values(by='file_key')
dev_info = dev_info.sort_values(by='file_key')

labels_dict = {
    'negative': 0,
    'neutral': 1,
    'positive': 2
}

train_info['label'] = train_info['Sentiment'].map(labels_dict)
test_info['label'] = test_info['Sentiment'].map(labels_dict)
dev_info['label'] = dev_info['Sentiment'].map(labels_dict)

In [7]:
is_in_train = pd.DataFrame({'file_key': [file.split('/')[1].split('.')[0] for file in train_files],
                            'is_in': [1 for _ in range(len(train_files))]})
is_in_test = pd.DataFrame({'file_key': [file.split('/')[1].split('.')[0] for file in test_files],
                           'is_in': [1 for _ in range(len(test_files))]})
is_in_dev = pd.DataFrame({'file_key': [file.split('/')[1].split('.')[0] for file in dev_files],
                          'is_in': [1 for _ in range(len(dev_files))]})

is_in_train['file_key'] = is_in_train['file_key'].astype(train_info['file_key'].dtype)
is_in_test['file_key'] = is_in_test['file_key'].astype(test_info['file_key'].dtype)
is_in_dev['file_key'] = is_in_dev['file_key'].astype(dev_info['file_key'].dtype)

In [8]:
train_files = sorted([file for file in train_files if file.split('/')[1].split('.')[0] in train_info['file_key'].values])
dev_files = sorted([file for file in dev_files if file.split('/')[1].split('.')[0] in dev_info['file_key'].values])
test_files = sorted([file for file in test_files if file.split('/')[1].split('.')[0] in test_info['file_key'].values])

train_info = train_info.merge(is_in_train, how='left', on='file_key')
test_info = test_info.merge(is_in_test, how='left', on='file_key')
dev_info = dev_info.merge(is_in_dev, how='left', on='file_key')

train_info = train_info[train_info['is_in'] == 1]
train_info = train_info.drop(columns=['is_in', 'file_key'])

test_info = test_info[test_info['is_in'] == 1]
test_info = test_info.drop(columns=['is_in', 'file_key'])

dev_info = dev_info[dev_info['is_in'] == 1]
dev_info = dev_info.drop(columns=['is_in', 'file_key'])

In [9]:
train_dataset = AudioDataset(train_files, train_info['label'].values.tolist())
test_dataset = AudioDataset(test_files, test_info['label'].values.tolist())
devel_dataset = AudioDataset(dev_files, dev_info['label'].values.tolist())

train_data_loader = DataLoader(train_dataset, batch_size=1)
test_data_loader = DataLoader(test_dataset, batch_size=1)
dev_data_loader = DataLoader(devel_dataset, batch_size=1)

In [10]:
def get_data_loader(data_loader: DataLoader):
    """
    get a data loader and return the waveforms and labels in a dictionary
    :param data_loader: DataLoader object
    :return: dict with waveforms and labels
    """
    res = {}
    for audio_path_name, waveform, label in data_loader:
        res.update({audio_path_name[0]: {
                    'waveforms': waveform[0],
                    'label': label[0]}
        })
    return res    

dev = get_data_loader(dev_data_loader)
train = get_data_loader(train_data_loader)
test = get_data_loader(test_data_loader)


In [11]:
dev

{'dia0_utt0': {'waveforms': tensor([[-0.0178, -0.0274, -0.0316,  ...,  0.0066,  0.0042,  0.0045],
          [-0.0186, -0.0286, -0.0326,  ...,  0.0186,  0.0155,  0.0110]]),
  'label': tensor(0)},
 'dia0_utt1': {'waveforms': tensor([[0.0028, 0.0054, 0.0061,  ..., 0.0000, 0.0000, 0.0000],
          [0.0042, 0.0068, 0.0073,  ..., 0.0000, 0.0000, 0.0000]]),
  'label': tensor(0)},
 'dia100_utt0': {'waveforms': tensor([[-0.0022, -0.0034, -0.0042,  ...,  0.0174,  0.0203,  0.0237],
          [-0.0019, -0.0030, -0.0037,  ...,  0.0161,  0.0187,  0.0218]]),
  'label': tensor(1)},
 'dia101_utt0': {'waveforms': tensor([[-0.0223, -0.0200, -0.0066,  ...,  0.0947,  0.0910,  0.0743],
          [-0.0197, -0.0090,  0.0106,  ...,  0.0761,  0.0776,  0.0626]]),
  'label': tensor(0)},
 'dia102_utt0': {'waveforms': tensor([[-0.0011, -0.0014, -0.0012,  ...,  0.0000,  0.0000,  0.0000],
          [-0.0043, -0.0078, -0.0111,  ...,  0.0000,  0.0000,  0.0000]]),
  'label': tensor(1)},
 'dia102_utt1': {'waveforms': t

In [12]:
len(train)

9988