In [1]:
# Reseteamos todas las variables del entorno
%reset
# IMPORT LIBRARIES

# Processing
import librosa
import librosa.display
import numpy as np
import random
from tqdm import tqdm

# Visualization
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import IPython.display as ipd

# Files
import os
import joblib
import pickle

# Machine Learning
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.metrics import confusion_matrix
import keras
from keras.utils import np_utils
from tensorflow.keras.utils import to_categorical
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras.models import Sequential, Model, model_from_json
from keras.layers import Dense, Embedding, LSTM
from keras.layers import Input, Flatten, Dropout, Activation, BatchNormalization
from keras.layers import Conv1D, MaxPooling1D, AveragePooling1D
import tensorflow as tf

# ####### TEST ####### 
# Scipy
from scipy import signal
from scipy.io import wavfile

Once deleted, variables cannot be recovered. Proceed (y/[n])? y


# **CONFIGURACION DEL ENTORNO**

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import sys  
GPATH = '/content/drive/My Drive/Master/Asignaturas/2 Cuatrimestre/Proyecto/Code/'

if GPATH + '/SpeechEmotionRecognition' in sys.path:
  sys.path.remove(GPATH + '/SpeechEmotionRecognition')

sys.path.insert(0, GPATH + '/SpeechEmotionRecognition')

sys.path

['/content/drive/My Drive/Master/Asignaturas/2 Cuatrimestre/Proyecto/Code//SpeechEmotionRecognition',
 '',
 '/content',
 '/env/python',
 '/usr/lib/python37.zip',
 '/usr/lib/python3.7',
 '/usr/lib/python3.7/lib-dynload',
 '/usr/local/lib/python3.7/dist-packages',
 '/usr/lib/python3/dist-packages',
 '/usr/local/lib/python3.7/dist-packages/IPython/extensions',
 '/root/.ipython']

In [15]:
FEATURES_PATH = GPATH + 'SpeechEmotionRecognition/data/processed/features/'
FIGURES_PATH = GPATH + 'SpeechEmotionRecognition/reports/figures/'
# Datasets
SAVEE_PATH = GPATH + 'data/SAVEE/'
TESS_PATH = GPATH + 'data/TESS/'
RAVDESS_PATH = GPATH + 'data/RAVDESS/'
EMODB_PATH = GPATH + 'data/EMODB/'

# Espectogramas
SPECT_IMG = GPATH + 'data/spectrograms/'

# Maps
EMOTION_MAP = {1:'neutral', 2:'calm', 3:'happy', 4:'sad', 5:'angry', 6:'fear', 7:'disgust', 8:'surprise'}
# EMOTION_EMODB_MAP = {'L':'boredom', 'F':'happy', 'T':'sad', 'W':'angry', 'A':'fear', 'E':'disgust', 'N':'neutral'} # original
# EMOTION_EMODB_MAP = {'L':'aburrimiento', 'F':'felicidad', 'T':'tristeza', 'W':'enfado', 'A':'miedo', 'E':'asco', 'N':'neutral'}
# INTENSITY_MAP = {1:'normal', 2:'strong'}
reduced_values = ['angry', 'disgust', 'happy', 'sad', 'neutral']

## **1. DATOS**

In [5]:
# Leemos EMO-DB
EMOTION_EMODB_MAP = {'F':'happy', 'T':'sad', 'W':'angry','E':'disgust', 'N':'neutral'}
emotion=[]
path = []

for filename in os.listdir(EMODB_PATH):
  str_path = EMODB_PATH +  '/' + str(filename)
  emo_capital = filename[5]
  if emo_capital in EMOTION_EMODB_MAP:
    emotion.append(EMOTION_EMODB_MAP[emo_capital])
    path.append(str_path)
  
EMODDB_df = pd.DataFrame(columns=['emotion', 'path'])
EMODDB_df['emotion'] = emotion
EMODDB_df['path'] = path

class_distribution = EMODDB_df['emotion'].value_counts()
print( class_distribution )

angry      127
neutral     79
happy       71
sad         62
disgust     46
Name: emotion, dtype: int64


In [6]:
print( class_distribution )

angry      127
neutral     79
happy       71
sad         62
disgust     46
Name: emotion, dtype: int64


In [7]:
# from src.SpeechDataProcess import SpeechDataProcess
# data = SpeechDataProcess('TESS', TESS_PATH)
# TESS_df = data.read()
# data = SpeechDataProcess('SAVEE', SAVEE_PATH)
# SAVEE_df = data.read()

In [8]:
emotion=[]
path = []
reduced_values = ['a', 'd', 'h', 'sa', 'n']

for dir in os.listdir(SAVEE_PATH):
  path_dir = os.listdir(SAVEE_PATH + dir) # todos los archivos de audios asociados a un directorio
  for filename in path_dir:
    str_path = SAVEE_PATH + dir + '/' + str(filename)
    label = filename[0]
    if filename[1].isalpha() : label = filename[0] + filename[1] 
    # Solo los valores reducidos
    if label in reduced_values:
      path.append(str_path)
      if(label=='a'):
          emotion.append("angry")
      elif(label=='h'):
          emotion.append("happy")
      elif(label=='f'):
          emotion.append("fear")
      elif(label=='d'):
          emotion.append("disgust")
      elif(label=='n'):
          emotion.append('neutral')
      elif(label=='sa'):
          emotion.append("sad")
      elif(label=='su'):
          emotion.append("surprise")


SAVEE_df = pd.DataFrame(columns=['emotion', 'path'])
SAVEE_df['emotion'] = emotion
SAVEE_df['path'] = path

SAVEE_df['emotion'].unique()
print( SAVEE_df['emotion'].value_counts() )

neutral    120
happy       60
disgust     60
angry       60
sad         60
Name: emotion, dtype: int64


In [9]:
# Leemos TESS

emotion=[]
path = []
reduced_values = ['angry', 'disgust', 'happy', 'sad', 'neutral']
for dir in os.listdir(TESS_PATH):
  path_dir = os.listdir(TESS_PATH + dir) # todos los archivos de audios asociados a un directorio
  label = dir.split('_')[1]
  for filename in path_dir:
    str_path = TESS_PATH + dir + '/' + str(filename)

    # Solo los valores reducidos
    if label.lower() in reduced_values:
      path.append(str_path)
      emotion.append(label.lower())


TESS_df = pd.DataFrame(columns=['emotion', 'path'])
TESS_df['emotion'] = emotion
TESS_df['path'] = path

print( "Size of the dataset: {} \n".format(len(TESS_df)) )
print( TESS_df['emotion'].value_counts() )
# TESS_df.sample(5)

Size of the dataset: 2000 

happy      400
sad        400
disgust    400
angry      400
neutral    400
Name: emotion, dtype: int64


In [None]:
if len(TESS_df.emotion.unique()) == len(SAVEE_df.emotion.unique()) and len(TESS_df.emotion.unique()) == len(EMODDB_df.emotion.unique()):
  print("Todas las dimensiones son correctas")

Todas las dimensiones son correctas


### **1.2 EXTRACCION DE ESPECTROGRAMAS**

In [10]:
from src.MFCC import MFCC

In [13]:
features_spectMFCC_SAVEE = MFCC(df_data = SAVEE_df, dataset_name = "SAVEE")
features_spectMFCC_TESS = MFCC(df_data = TESS_df, dataset_name = "TESS")

features_spectMFCC_EMODB = MFCC(df_data = EMODDB_df, dataset_name = "EMODB")
# list_images, labels = featuresMFCC_SAVEE.load_images(IMG_MFCC, (40,40))

In [16]:
# Genero los espectrogramas
# features_spectMFCC_SAVEE.generate_spectrograms(SPECT_IMG)
# features_spectMFCC_TESS.generate_spectrograms(SPECT_IMG)

features_spectMFCC_TESS.generate_spectrograms(SPECT_IMG + 'test_emodb/')
# generate_spectrograms(SAVEE_df, "SAVEE", SPECT_IMG)
# generate_spectrograms(RAVDESS_df, "RAVDESS", SPECT_IMG)
# generate_spectrograms(TESS_df, "TESS", SPECT_IMG)

100%|██████████| 2000/2000 [17:24<00:00,  1.91it/s]


In [None]:
list_images_savee_tess, labels = load_spectograms_dataset(SPECT_IMG + 'mfcc_savee_tess/', (30,40))

Leídas 460 espectogramas pertenecientes a angry
Leídas 460 espectogramas pertenecientes a disgust
Leídas 460 espectogramas pertenecientes a fear
Leídas 460 espectogramas pertenecientes a happy
Leídas 524 espectogramas pertenecientes a neutral
Leídas 460 espectogramas pertenecientes a sad
Leídas 460 espectogramas pertenecientes a surprise


In [None]:
array_images_savee_tess = np.array(list_images_savee_tess)
print("{} imagenes con dimension {}".format(array_images_savee_tess.shape[0], array_images_savee_tess.shape[1:]))

3284 imagenes con dimension (40, 30, 3)


## **2. DEFINICION DEL MODELO**

In [None]:
def get_model_2D_CNN(shape, classes): # Ganador
  model=Sequential()
  model.add(Conv2D(32,(4,10),input_shape=shape, activation='relu',padding='same'))
  model.add(MaxPooling2D(3,3))
  model.add(Dropout(0.2))

  model.add(Conv2D(32,(4,10),activation='relu',padding='same'))
  model.add(MaxPooling2D(3,3))
  model.add(Dropout(0.2))

  model.add(Conv2D(32,(4,10),activation='relu',padding='same'))
  model.add(MaxPooling2D(3,3))
  model.add(Dropout(0.2))

  model.add(Flatten())
  model.add(Dense(classes, activation='softmax'))
 
  return model


def plot_loss_acc(history):
  # Mostramos la grafica loss 
  fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
  ax1.plot(history.history['loss'])
  ax1.plot(history.history['val_loss'])
  ax1.set_title('Loss')
  ax1.set(xlabel='epoch', ylabel='loss')
  ax1.legend(['train', 'test'], loc='upper right')

  # Mostramos la grafica accuracy
  ax2.plot(history.history['accuracy'])
  ax2.plot(history.history['val_accuracy'])
  ax2.set_title('Validation')
  ax2.set(xlabel='epoch', ylabel='acc')

  ax2.legend(['train', 'test'], loc='lower right')