In [48]:
# IMPORT LIBRARIES

# Processing
import librosa
import librosa.display
import numpy as np
import random
from tqdm import tqdm

# Visualization
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import IPython.display as ipd

# Files
import os
import joblib
import pickle

# Machine Learning
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.metrics import confusion_matrix
import keras
from keras.utils import np_utils
from tensorflow.keras.utils import to_categorical
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras.models import Sequential, Model, model_from_json
from keras.layers import Dense, Embedding, LSTM
from keras.layers import Input, Flatten, Dropout, Activation, BatchNormalization
from keras.layers import Conv1D, MaxPooling1D, AveragePooling1D
import tensorflow as tf

# ####### TEST ####### 
# Scipy
from scipy import signal
from scipy.io import wavfile

In [57]:
# AUDIO_DATA_PATH = 'data/'
GPATH = '/content/drive/My Drive/Master/Asignaturas/2 Cuatrimestre/Proyecto/Code/'
FEATURES_PATH = 'SpeechEmotionRecognition/data/processed/features/'
# LPATH_AUGMENTED = GPATH + AUDIO_DATA_PATH + 'RAVDESS/augmented'

SAVEE_PATH = GPATH + 'data/SAVEE/'
TESS_PATH = GPATH + 'data/TESS/'

# Maps
EMOTION_MAP = {1:'neutral', 2:'calm', 3:'happy', 4:'sad', 5:'angry', 6:'fear', 7:'disgust', 8:'surprise'}
INTENSITY_MAP = {1:'normal', 2:'strong'}

Configuracion del entorno para que nos deje acceder a los archivos e importar los módulos de python.

In [50]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [52]:
import sys  
sys.path.remove(GPATH + '/SpeechEmotionRecognition')
sys.path.insert(0, GPATH + '/SpeechEmotionRecognition')

Se leen los datasets

In [53]:
# Leemos SAVEE

emotion=[]
path = []

for dir in os.listdir(SAVEE_PATH):
  path_dir = os.listdir(SAVEE_PATH + dir) # todos los archivos de audios asociados a un directorio
  for filename in path_dir:
    str_path = SAVEE_PATH + dir + '/' + str(filename)
    path.append(str_path)
    if(filename[0]=='a'):
        emotion.append("anger")
    elif(filename[0]=='h'):
        emotion.append("happy")
    elif(filename[0]=='f'):
        emotion.append("fear")
    elif(filename[0]=='d'):
        emotion.append("disgust")
    elif(filename[0]=='n'):
        emotion.append('neutral')
    elif(filename[0]=='s'):
        if(filename[1]=='a'):
            emotion.append("sad")
        elif(filename[1]=='u'):
            emotion.append("surprise")


SAVEE_df = pd.DataFrame(columns=['emotion', 'path'])
SAVEE_df['emotion'] = emotion
SAVEE_df['path'] = path

In [54]:
SAVEE_df.head()

Unnamed: 0,emotion,path
0,anger,/content/drive/My Drive/Master/Asignaturas/2 C...
1,anger,/content/drive/My Drive/Master/Asignaturas/2 C...
2,neutral,/content/drive/My Drive/Master/Asignaturas/2 C...
3,neutral,/content/drive/My Drive/Master/Asignaturas/2 C...
4,neutral,/content/drive/My Drive/Master/Asignaturas/2 C...


In [55]:
# Leemos TESS

emotion=[]
path = []

for dir in os.listdir(TESS_PATH):
  path_dir = os.listdir(TESS_PATH + dir) # todos los archivos de audios asociados a un directorio
  label = dir.split('_')[1]
  for filename in path_dir:
    str_path = TESS_PATH + dir + '/' + str(filename)
    path.append(str_path)
    emotion.append(label)


TESS_df = pd.DataFrame(columns=['emotion', 'path'])
TESS_df['emotion'] = emotion
TESS_df['path'] = path

TESS_df.head()

Unnamed: 0,emotion,path
0,Sad,/content/drive/My Drive/Master/Asignaturas/2 C...
1,Sad,/content/drive/My Drive/Master/Asignaturas/2 C...
2,Sad,/content/drive/My Drive/Master/Asignaturas/2 C...
3,Sad,/content/drive/My Drive/Master/Asignaturas/2 C...
4,Sad,/content/drive/My Drive/Master/Asignaturas/2 C...


Se procesan y se juntan

In [None]:
# dataset = pd.concat([n_dataset,pd.DataFrame(dataset['feature'].values.tolist())],axis=1)

In [60]:
from src.MFCC import MFCC
# df_data, dataset_name, outpath='', n_features=40
featuresMFCC_SAVEE = MFCC(df_data = SAVEE_df)
featuresSAVEE_standard, featuresSAVEE_wn, featuresSAVEE_shiftted, featuresSAVEE_pitch = featuresMFCC_SAVEE.read_features_dataAugmentation()


  0%|          | 0/480 [00:00<?, ?it/s][A
  0%|          | 1/480 [00:00<03:42,  2.15it/s][A
  0%|          | 2/480 [00:00<03:21,  2.37it/s][A
  1%|          | 3/480 [00:01<03:13,  2.46it/s][A
  1%|          | 4/480 [00:01<03:16,  2.43it/s][A
  1%|          | 5/480 [00:01<03:03,  2.60it/s][A
  1%|▏         | 6/480 [00:02<03:07,  2.52it/s][A
  1%|▏         | 7/480 [00:02<03:05,  2.55it/s][A
  2%|▏         | 8/480 [00:03<02:54,  2.70it/s][A
  2%|▏         | 9/480 [00:03<02:45,  2.85it/s][A
  2%|▏         | 10/480 [00:03<02:34,  3.05it/s][A
  2%|▏         | 11/480 [00:03<02:30,  3.12it/s][A
  2%|▎         | 12/480 [00:04<02:41,  2.90it/s][A
  3%|▎         | 13/480 [00:04<02:39,  2.92it/s][A
  3%|▎         | 14/480 [00:05<02:43,  2.85it/s][A
  3%|▎         | 15/480 [00:05<03:00,  2.58it/s][A
  3%|▎         | 16/480 [00:05<02:50,  2.72it/s][A
  4%|▎         | 17/480 [00:06<03:03,  2.53it/s][A
  4%|▍         | 18/480 [00:06<03:02,  2.54it/s][A
  4%|▍         | 19/480 [00:0

Standard features into file


  0%|          | 2/480 [00:00<00:45, 10.60it/s][A
  1%|          | 3/480 [00:00<00:48,  9.83it/s][A
  1%|          | 4/480 [00:00<00:48,  9.75it/s][A
  1%|▏         | 6/480 [00:00<00:46, 10.25it/s][A
  1%|▏         | 7/480 [00:00<00:47,  9.87it/s][A
  2%|▏         | 9/480 [00:00<00:47,  9.82it/s][A
  2%|▏         | 10/480 [00:01<00:49,  9.44it/s][A
  2%|▏         | 11/480 [00:01<00:51,  9.03it/s][A
  2%|▎         | 12/480 [00:01<00:50,  9.19it/s][A
  3%|▎         | 13/480 [00:01<00:53,  8.66it/s][A
  3%|▎         | 14/480 [00:01<00:54,  8.53it/s][A
  3%|▎         | 15/480 [00:01<00:54,  8.60it/s][A
  3%|▎         | 16/480 [00:01<00:54,  8.48it/s][A
  4%|▎         | 17/480 [00:01<00:52,  8.87it/s][A
  4%|▍         | 18/480 [00:01<00:51,  8.89it/s][A
  4%|▍         | 20/480 [00:02<00:46,  9.79it/s][A
  5%|▍         | 22/480 [00:02<00:48,  9.40it/s][A
  5%|▍         | 23/480 [00:02<00:49,  9.21it/s][A
  5%|▌         | 24/480 [00:02<00:53,  8.48it/s][A
  5%|▌         | 2

White Noise features into file



  1%|          | 3/480 [00:00<00:41, 11.50it/s][A
  1%|          | 4/480 [00:00<00:44, 10.82it/s][A
  1%|▏         | 6/480 [00:00<00:41, 11.34it/s][A
  1%|▏         | 7/480 [00:00<00:43, 10.86it/s][A
  2%|▏         | 9/480 [00:00<00:43, 10.79it/s][A
  2%|▏         | 10/480 [00:00<00:44, 10.50it/s][A
  2%|▏         | 11/480 [00:01<00:48,  9.71it/s][A
  3%|▎         | 13/480 [00:01<00:47,  9.83it/s][A
  3%|▎         | 14/480 [00:01<00:50,  9.22it/s][A
  3%|▎         | 16/480 [00:01<00:47,  9.71it/s][A
  4%|▍         | 18/480 [00:01<00:46,  9.90it/s][A
  4%|▍         | 20/480 [00:01<00:42, 10.79it/s][A
  5%|▍         | 22/480 [00:02<00:44, 10.21it/s][A
  5%|▌         | 24/480 [00:02<00:47,  9.69it/s][A
  5%|▌         | 26/480 [00:02<00:47,  9.46it/s][A
  6%|▌         | 27/480 [00:02<00:49,  9.18it/s][A
  6%|▌         | 28/480 [00:02<00:51,  8.81it/s][A
  6%|▌         | 29/480 [00:02<00:50,  8.96it/s][A
  6%|▋         | 30/480 [00:03<00:53,  8.44it/s][A
  7%|▋         |

Shiftted into file



  0%|          | 1/480 [00:01<10:13,  1.28s/it][A
  0%|          | 2/480 [00:01<07:40,  1.04it/s][A
  1%|          | 3/480 [00:01<05:52,  1.35it/s][A
  1%|          | 4/480 [00:02<04:46,  1.66it/s][A
  1%|          | 5/480 [00:02<03:46,  2.09it/s][A
  1%|▏         | 6/480 [00:02<03:50,  2.06it/s][A
  1%|▏         | 7/480 [00:02<03:15,  2.43it/s][A
  2%|▏         | 8/480 [00:03<02:38,  2.97it/s][A
  2%|▏         | 9/480 [00:03<02:39,  2.95it/s][A
  2%|▏         | 10/480 [00:03<02:27,  3.19it/s][A
  2%|▏         | 11/480 [00:03<02:24,  3.24it/s][A
  2%|▎         | 12/480 [00:04<02:12,  3.52it/s][A
  3%|▎         | 13/480 [00:04<02:10,  3.57it/s][A
  3%|▎         | 14/480 [00:04<02:06,  3.67it/s][A
  3%|▎         | 15/480 [00:05<02:04,  3.74it/s][A
  3%|▎         | 16/480 [00:05<02:01,  3.83it/s][A
  4%|▎         | 17/480 [00:05<02:00,  3.84it/s][A
  4%|▍         | 18/480 [00:05<02:03,  3.74it/s][A
  4%|▍         | 19/480 [00:05<01:51,  4.12it/s][A
  4%|▍         | 20/

Pitch Tunning features into file





In [None]:
featuresMFCC_TESS = MFCC(df_data = TESS_df)
featuresTESS_standard, featuresTESS_wn, featuresTESS_shiftted, featuresTESS_pitch = featuresMFCC_TESS.read_features_dataAugmentation()


  0%|          | 0/2800 [00:00<?, ?it/s][A
  0%|          | 3/2800 [00:00<01:56, 24.07it/s][A
  0%|          | 5/2800 [00:00<02:05, 22.34it/s][A
  0%|          | 7/2800 [00:00<02:09, 21.56it/s][A
  0%|          | 9/2800 [00:00<03:54, 11.90it/s][A
  0%|          | 11/2800 [00:01<06:07,  7.59it/s][A
  0%|          | 12/2800 [00:01<08:21,  5.56it/s][A
  0%|          | 13/2800 [00:01<09:29,  4.89it/s][A
  0%|          | 14/2800 [00:02<12:38,  3.67it/s][A
  1%|          | 15/2800 [00:02<12:28,  3.72it/s][A
  1%|          | 16/2800 [00:02<14:14,  3.26it/s][A
  1%|          | 17/2800 [00:03<15:02,  3.08it/s][A
  1%|          | 18/2800 [00:03<14:11,  3.27it/s][A
  1%|          | 19/2800 [00:03<14:02,  3.30it/s][A
  1%|          | 20/2800 [00:04<14:26,  3.21it/s][A
  1%|          | 21/2800 [00:04<15:25,  3.00it/s][A
  1%|          | 22/2800 [00:04<15:02,  3.08it/s][A
  1%|          | 23/2800 [00:05<14:31,  3.19it/s][A
  1%|          | 24/2800 [00:05<14:10,  3.27it/s][A
  1%|

Se entrenan