# **Prepearing the Combined Audio Dataset for DUA-D2C**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Import libraries
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from matplotlib.pyplot import specgram
import pandas as pd
import glob
from sklearn.metrics import confusion_matrix
import IPython.display as ipd  # To play sound in the notebook
import os
import sys
import warnings
# ignore warnings
if not sys.warnoptions:
    warnings.simplefilter("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning)

sys.path.append('/content/drive/MyDrive/IAR Research Grant/Codes/Audio Classification Codes')

# Importing the Datasets

In [None]:
TESS = "/content/drive/MyDrive/IAR Research Grant/Data/Audio Datasets/TESS.zip (Unzipped Files)/tess toronto emotional speech set data/TESS Toronto emotional speech set data/"
RAV = "/content/drive/MyDrive/IAR Research Grant/Data/Audio Datasets/RAVDESS Audio Emotion/"
CREMA = "/content/drive/MyDrive/IAR Research Grant/Data/Audio Datasets/CREMA-D/"

# Running an example
dir_list = os.listdir(RAV)
dir_list[0:5]

['Actor_19', 'Actor_18', 'Actor_21', 'Actor_20', 'Actor_24']

<a id="ravdess"></a>
## <center>1. RAVDESS dataset</center>

In [None]:
dir_list = os.listdir(RAV)
dir_list.sort()

emotion = []
gender = []
path = []
for i in dir_list:
    fname = os.listdir(RAV + i)
    for f in fname:
        part = f.split('.')[0].split('-')
        emotion.append(int(part[2]))
        temp = int(part[6])
        if temp%2 == 0:
            temp = "female"
        else:
            temp = "male"
        gender.append(temp)
        path.append(RAV + i + '/' + f)


RAV_df = pd.DataFrame(emotion)
RAV_df = RAV_df.replace({1:'neutral', 2:'neutral', 3:'happy', 4:'sad', 5:'angry', 6:'fear', 7:'disgust', 8:'surprise'})
RAV_df = pd.concat([pd.DataFrame(gender),RAV_df],axis=1)
RAV_df.columns = ['gender','emotion']
RAV_df['labels'] =RAV_df.gender + '_' + RAV_df.emotion
RAV_df['source'] = 'RAVDESS'
RAV_df = pd.concat([RAV_df,pd.DataFrame(path, columns = ['path'])],axis=1)
RAV_df = RAV_df.drop(['gender', 'emotion'], axis=1)
RAV_df.labels.value_counts()

male_neutral       144
female_neutral     144
male_happy          96
male_sad            96
male_angry          96
male_surprise       96
male_fear           96
male_disgust        96
female_angry        96
female_happy        96
female_fear         96
female_sad          96
female_disgust      96
female_surprise     96
Name: labels, dtype: int64

<a id="tess"></a>
##  <center> 2. TESS dataset <center>

In [None]:
dir_list = os.listdir(TESS)
dir_list.sort()
dir_list

['OAF_Fear',
 'OAF_Pleasant_surprise',
 'OAF_Sad',
 'OAF_angry',
 'OAF_disgust',
 'OAF_happy',
 'OAF_neutral',
 'YAF_angry',
 'YAF_disgust',
 'YAF_fear',
 'YAF_happy',
 'YAF_neutral',
 'YAF_pleasant_surprised',
 'YAF_sad']

In [None]:
path = []
emotion = []

for i in dir_list:
    fname = os.listdir(TESS + i)
    for f in fname:
        if i == 'OAF_angry' or i == 'YAF_angry':
            emotion.append('female_angry')
        elif i == 'OAF_disgust' or i == 'YAF_disgust':
            emotion.append('female_disgust')
        elif i == 'OAF_Fear' or i == 'YAF_fear':
            emotion.append('female_fear')
        elif i == 'OAF_happy' or i == 'YAF_happy':
            emotion.append('female_happy')
        elif i == 'OAF_neutral' or i == 'YAF_neutral':
            emotion.append('female_neutral')
        elif i == 'OAF_Pleasant_surprise' or i == 'YAF_pleasant_surprised':
            emotion.append('female_surprise')
        elif i == 'OAF_Sad' or i == 'YAF_sad':
            emotion.append('female_sad')
        else:
            emotion.append('Unknown')
        path.append(TESS + i + "/" + f)

TESS_df = pd.DataFrame(emotion, columns = ['labels'])
TESS_df['source'] = 'TESS'
TESS_df = pd.concat([TESS_df,pd.DataFrame(path, columns = ['path'])],axis=1)
TESS_df.labels.value_counts()

female_fear        400
female_surprise    400
female_sad         400
female_angry       400
female_disgust     400
female_happy       400
female_neutral     400
Name: labels, dtype: int64

<a id="crema"></a>
##  <center> 3. CREMA-D dataset <center>

In [None]:
dir_list = os.listdir(CREMA)
dir_list.sort()
print(dir_list[0:10])

['1001_DFA_ANG_XX.wav', '1001_DFA_DIS_XX.wav', '1001_DFA_FEA_XX.wav', '1001_DFA_HAP_XX.wav', '1001_DFA_NEU_XX.wav', '1001_DFA_SAD_XX.wav', '1001_IEO_ANG_HI.wav', '1001_IEO_ANG_LO.wav', '1001_IEO_ANG_MD.wav', '1001_IEO_DIS_HI.wav']


In [None]:
gender = []
emotion = []
path = []
female = [1002,1003,1004,1006,1007,1008,1009,1010,1012,1013,1018,1020,1021,1024,1025,1028,1029,1030,1037,1043,1046,1047,1049,
          1052,1053,1054,1055,1056,1058,1060,1061,1063,1072,1073,1074,1075,1076,1078,1079,1082,1084,1089,1091]

for i in dir_list:
    part = i.split('_')
    if int(part[0]) in female:
        temp = 'female'
    else:
        temp = 'male'
    gender.append(temp)
    if part[2] == 'SAD' and temp == 'male':
        emotion.append('male_sad')
    elif part[2] == 'ANG' and temp == 'male':
        emotion.append('male_angry')
    elif part[2] == 'DIS' and temp == 'male':
        emotion.append('male_disgust')
    elif part[2] == 'FEA' and temp == 'male':
        emotion.append('male_fear')
    elif part[2] == 'HAP' and temp == 'male':
        emotion.append('male_happy')
    elif part[2] == 'NEU' and temp == 'male':
        emotion.append('male_neutral')
    elif part[2] == 'SAD' and temp == 'female':
        emotion.append('female_sad')
    elif part[2] == 'ANG' and temp == 'female':
        emotion.append('female_angry')
    elif part[2] == 'DIS' and temp == 'female':
        emotion.append('female_disgust')
    elif part[2] == 'FEA' and temp == 'female':
        emotion.append('female_fear')
    elif part[2] == 'HAP' and temp == 'female':
        emotion.append('female_happy')
    elif part[2] == 'NEU' and temp == 'female':
        emotion.append('female_neutral')
    else:
        emotion.append('Unknown')
    path.append(CREMA + i)

CREMA_df = pd.DataFrame(emotion, columns = ['labels'])
CREMA_df['source'] = 'CREMA'
CREMA_df = pd.concat([CREMA_df,pd.DataFrame(path, columns = ['path'])],axis=1)
CREMA_df.labels.value_counts()

male_angry        671
male_disgust      671
male_fear         671
male_happy        671
male_sad          671
female_angry      600
female_disgust    600
female_fear       600
female_happy      600
female_sad        600
male_neutral      575
female_neutral    512
Name: labels, dtype: int64

# **Combining the Datasets**

In [None]:
df = pd.concat([RAV_df, TESS_df, CREMA_df], axis = 0).reset_index(drop=True)
print(df.labels.value_counts())
df.head()
df.to_csv("/content/drive/MyDrive/IAR Research Grant/Codes/Audio Classification Codes/Data_path_short.csv",index=False)

female_angry       1096
female_happy       1096
female_fear        1096
female_sad         1096
female_disgust     1096
female_neutral     1056
male_happy          767
male_sad            767
male_angry          767
male_fear           767
male_disgust        767
male_neutral        719
female_surprise     496
male_surprise        96
Name: labels, dtype: int64


In [None]:
df

Unnamed: 0,labels,source,path
0,male_neutral,RAVDESS,/content/drive/MyDrive/IAR Research Grant/Data...
1,male_neutral,RAVDESS,/content/drive/MyDrive/IAR Research Grant/Data...
2,male_neutral,RAVDESS,/content/drive/MyDrive/IAR Research Grant/Data...
3,male_neutral,RAVDESS,/content/drive/MyDrive/IAR Research Grant/Data...
4,male_neutral,RAVDESS,/content/drive/MyDrive/IAR Research Grant/Data...
...,...,...,...
11677,female_disgust,CREMA,/content/drive/MyDrive/IAR Research Grant/Data...
11678,female_fear,CREMA,/content/drive/MyDrive/IAR Research Grant/Data...
11679,female_happy,CREMA,/content/drive/MyDrive/IAR Research Grant/Data...
11680,female_neutral,CREMA,/content/drive/MyDrive/IAR Research Grant/Data...
