# Model 2 
Trying to use more data from different data sets 

In [68]:
import pandas as pd
import numpy as np

import os
import sys
import re

#Libros for sound analysis
import librosa
import librosa.display 
import seaborn as sns
import matplotlib.pyplot as plt

#To play audio
import IPython.display as ipd
from IPython.display import Audio

import tensorflow as tf 

import warnings
if not sys.warnoptions:
    warnings.simplefilter("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning) 


## Create Data set

### TESS

In [11]:
#Function from previous attempts
def load_data(location: str):
    path = []
    labels = []
    for dirname, _, filenames in os.walk(location):
        for filename in filenames:
            path.append(os.path.join(dirname,filename))
            label = filename.split('_')[-1]
            label = label.split('.')[0]
            labels.append(label.lower())
    df = pd.DataFrame() 
    df['File'] = path
    df['Label'] = labels
    return df

In [7]:
data_location = './Data/TESS Toronto emotional speech set data/'

In [20]:
tess_df = load_data(data_location)

In [118]:
tess_df.loc[tess_df.Label=='ps','Label']='surprised'

In [119]:
tess_df.Label.value_counts()

Label
angry        400
disgust      400
fear         400
happy        400
neutral      400
surprised    400
sad          400
Name: count, dtype: int64

### RAVDESS

In [155]:
ravdess = "./Data/Ravdess/audio_speech_actors_01-24/"
ravdes_directory_list = os.listdir(ravdess)

In [156]:
ravdess_labels = []
ravdess_path = []
for i in ravdes_directory_list:
    actor = os.listdir(ravdess + i)
    for f in actor:
        part = f.split('.')[0].split('-')
        ravdess_labels.append(int(part[2]))
        ravdess_path.append(ravdess + i + '/'+ f)

In [157]:
print(ravdess_labels[0])
print(ravdess_path[0])

1
./Data/Ravdess/audio_speech_actors_01-24/Actor_01/03-01-01-01-01-01-01.wav


In [158]:
file_df = pd.DataFrame(ravdess_path,columns=['File'])
label_df = pd.DataFrame(ravdess_labels,columns=['Label'])
ravdess_df = pd.concat([file_df,label_df],axis=1)
ravdess_df

Unnamed: 0,File,Label
0,./Data/Ravdess/audio_speech_actors_01-24/Actor...,1
1,./Data/Ravdess/audio_speech_actors_01-24/Actor...,1
2,./Data/Ravdess/audio_speech_actors_01-24/Actor...,1
3,./Data/Ravdess/audio_speech_actors_01-24/Actor...,1
4,./Data/Ravdess/audio_speech_actors_01-24/Actor...,2
...,...,...
1435,./Data/Ravdess/audio_speech_actors_01-24/Actor...,8
1436,./Data/Ravdess/audio_speech_actors_01-24/Actor...,8
1437,./Data/Ravdess/audio_speech_actors_01-24/Actor...,8
1438,./Data/Ravdess/audio_speech_actors_01-24/Actor...,8


In [159]:
ravdess_df.Label.replace({
    1 :'neutral', 2 :'calm', 3:'happy', 4:'sad', 5:'angry', 6:'fear', 7:'disgust', 8:'surprised'
},inplace=True)

In [160]:
ravdess_df.Label.value_counts()

Label
calm         192
happy        192
sad          192
angry        192
fear         192
disgust      192
surprised    192
neutral       96
Name: count, dtype: int64

In [139]:
ravdess_df

Unnamed: 0,File,Label
0,./Data/Ravdess/audio_speech_actors_01-24/Actor...,neutral
1,./Data/Ravdess/audio_speech_actors_01-24/Actor...,neutral
2,./Data/Ravdess/audio_speech_actors_01-24/Actor...,neutral
3,./Data/Ravdess/audio_speech_actors_01-24/Actor...,neutral
4,./Data/Ravdess/audio_speech_actors_01-24/Actor...,calm
...,...,...
1435,./Data/Ravdess/audio_speech_actors_01-24/Actor...,surprised
1436,./Data/Ravdess/audio_speech_actors_01-24/Actor...,surprised
1437,./Data/Ravdess/audio_speech_actors_01-24/Actor...,surprised
1438,./Data/Ravdess/audio_speech_actors_01-24/Actor...,surprised


### CREMA

In [162]:
crema = './Data/Crema/'
crema_dir = os.listdir(crema)

In [163]:
crema_file = []
crema_label = []

In [164]:
for file in crema_dir:
    crema_file.append(crema+file)
    part = file.split('_')
    l_label = part[2]
    match l_label:
        case 'SAD':
            crema_label.append('sad')
        case 'ANG':
            crema_label.append('angry')
        case 'DIS':
            crema_label.append('disgust')
        case 'FEA':
            crema_label.append('fear')
        case 'HAP':
            crema_label.append('happy')
        case 'NEU':
            crema_label.append('neutral')

In [165]:
crema_file_df = pd.DataFrame(crema_file,columns=['File'])
crema_label_df = pd.DataFrame(crema_label,columns=['Label'])
crema_df = pd.concat([crema_file_df,crema_label_df],axis=1)

In [166]:
crema_df.Label.value_counts()

Label
angry      1271
disgust    1271
fear       1271
happy      1271
sad        1271
neutral    1087
Name: count, dtype: int64

### Savee

In [167]:
savee = './Data/Savee/'
savee_dir = os.listdir(savee)

In [168]:
savee_file = []
savee_label = []

In [169]:
for file in savee_dir:
    savee_file.append(savee+file)
    part = file.split('_')[1].split('.')
    s_label = re.sub('[^a-z]','',part[0])
    match s_label:
        case 'a':
            savee_label.append('angry')
        case 'd':
            savee_label.append('disgust')
        case 'f':
            savee_label.append('fear')
        case 'h':
            savee_label.append('happy')
        case 'n':
            savee_label.append('neutral')
        case 'sa':
            savee_label.append('sad')
        case 'su':
            savee_label.append('surprised')

In [170]:
savee_file_df = pd.DataFrame(savee_file,columns=['File'])
savee_label_df = pd.DataFrame(savee_label,columns=['Label'])
savee_df = pd.concat([savee_file_df,savee_label_df],axis=1)

In [171]:
savee_df.Label.value_counts()

Label
neutral      120
angry         60
disgust       60
fear          60
happy         60
sad           60
surprised     60
Name: count, dtype: int64

### Concat datasets

In [172]:
data_sets = pd.concat([tess_df,ravdess_df,crema_df,savee_df],axis=0)

In [173]:
data = data_sets.reset_index(drop=True)

In [174]:
data.Label.value_counts()

Label
angry        1923
disgust      1923
fear         1923
happy        1923
sad          1923
neutral      1703
surprised     652
calm          192
Name: count, dtype: int64

In [177]:
data.shape

(12162, 2)

In [178]:
data.to_csv('./Data/datset1.csv',index=False)

## Load data

In [179]:
speach_data = pd.read_csv('./Data/datset1.csv')
speach_data

Unnamed: 0,File,Label
0,./Data/TESS Toronto emotional speech set data/...,angry
1,./Data/TESS Toronto emotional speech set data/...,angry
2,./Data/TESS Toronto emotional speech set data/...,angry
3,./Data/TESS Toronto emotional speech set data/...,angry
4,./Data/TESS Toronto emotional speech set data/...,angry
...,...,...
12157,./Data/Savee/KL_su11.wav,surprised
12158,./Data/Savee/KL_su12.wav,surprised
12159,./Data/Savee/KL_su13.wav,surprised
12160,./Data/Savee/KL_su14.wav,surprised
