In [29]:
import pandas as pd
import os
from sklearn.utils import resample

In [30]:
crema = 'CREMA-D/'
ravdess = 'RAVDESS/'
savee = 'SAVEE/'
tess = 'TESS/'


In [1]:
ravdess_emo = {'01':'neutral', '02':'calm', '03':'happy', '04':'sad', '05':'angry', '06':'fearful', '07':'disgust', '08':'surprised'}
crema_emo = {'SAD': 'sad', 'ANG': 'angry', 'DIS': 'disgust', 'FEA': 'fear', 'HAP':'happy', 'NEU':'neutral'}
savee_emo = {'a':'angry', 'd':'disgust', 'f':'fear', 'h':'happy', 'n':'neutral', 'sa':'sad', 'su':'surprise'}

In [32]:
def resample_data(data):
    '''sampling the dataset for equal classifications'''
    neu = data[data['sentiment'] == 'neutral']
    pos = data[data['sentiment'] == 'positive']
    neg = data[data['sentiment'] == 'negative']
    max_len = max([len(neu), len(pos), len(neg)])
    neu = resample(neu, replace=True, n_samples=max_len, random_state=42)
    pos = resample(pos, replace=True, n_samples=max_len, random_state=42)
    neg = resample(neg, replace=True, n_samples=max_len, random_state=42)
    return pd.concat([neu, pos, neg])

In [53]:
def emo_to_sentiment(emo):
    if emo in ['happy', 'calm', 'surprised', 'surprise']:
        return 'positive'
    if emo in ['sad', 'angry', 'fearful', 'disgust', 'fear']:
        return 'negative'
    else:
        return emo

# RAVDESS

In [34]:
emotion = []
path = []

for actor in os.listdir(ravdess):
    for item in os.listdir(ravdess + actor):
        emotion.append(emo_to_sentiment(ravdess_emo[item.split('-')[2]]))
        path.append(ravdess + actor + '/' + item)
        

In [35]:
sentiment = pd.DataFrame(emotion, columns=['sentiment'])
file_path = pd.DataFrame(path, columns=['file_path'])

In [36]:
ravdess_dataset = pd.concat([file_path, sentiment], axis=1)

In [37]:
ravdess_dataset = resample_data(ravdess_dataset)

In [38]:
ravdess_dataset['sentiment'].value_counts()

sentiment
neutral     768
positive    768
negative    768
Name: count, dtype: int64

In [39]:
ravdess_dataset.to_csv('datasets/ravdess.csv', index=False)

# CREMA-D

In [40]:
emotion = []
path = []

for items in os.listdir(crema):
    emotion.append(emo_to_sentiment(crema_emo[items.split('_')[2]]))
    path.append(crema + items)

In [41]:
sentiment = pd.DataFrame(emotion, columns=['sentiment'])
file_path = pd.DataFrame(path, columns=['file_path'])

In [42]:
crema_dataset = pd.concat([file_path, sentiment], axis=1)

In [43]:
crema_dataset = resample_data(crema_dataset)

In [44]:
crema_dataset['sentiment'].value_counts()

sentiment
neutral     5084
positive    5084
negative    5084
Name: count, dtype: int64

In [45]:
crema_dataset.to_csv('datasets/crema.csv', index=False)

# TESS

In [54]:
emotion = []
path = []

for file in os.listdir(tess):
    emo = file.split('_')[-1].lower()
    for wav in os.listdir(tess + file):
        emotion.append(emo_to_sentiment(emo))
        path.append(tess + file + '/' + wav)

In [56]:
sentiment = pd.DataFrame(emotion, columns=['sentiment'])
file_path = pd.DataFrame(path, columns=['file_path'])

In [57]:
tess_dataset = pd.concat([file_path, sentiment], axis=1)

In [59]:
tess_dataset = resample_data(tess_dataset)

In [60]:
tess_dataset['sentiment'].value_counts()

sentiment
neutral     1600
positive    1600
negative    1600
Name: count, dtype: int64

In [61]:
tess_dataset.to_csv('datasets/tess.csv', index=False)

# SAVEE

In [69]:
emotion = []
path = []

for wav in os.listdir(savee):
    emotion.append(emo_to_sentiment(savee_emo[wav[:-6].split('_')[-1]]))
    path.append(savee + wav)

In [70]:
sentiment = pd.DataFrame(emotion, columns=['sentiment'])
file_path = pd.DataFrame(path, columns=['file_path'])

In [72]:
savee_dataset = pd.concat([file_path, sentiment], axis=1)

In [73]:
savee_dataset = resample_data(savee_dataset)

In [74]:
savee_dataset['sentiment'].value_counts()

sentiment
neutral     240
positive    240
negative    240
Name: count, dtype: int64

In [75]:
savee_dataset.to_csv('datasets/savee.csv', index=False)

In [76]:
# Created by DaBloat