# Emotion Voice Detection

## Librairies

In [1]:
from glob import glob
import os

import json

import librosa
import librosa.display

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

import tensorflow as tf

import keras
from keras.utils import np_utils
from keras.models import Sequential
from keras.models import model_from_json
from keras.layers import Dense, Embedding
from keras.layers import Input, Flatten, Dropout, Activation
from keras.layers import Conv1D, MaxPooling1D, AveragePooling1D

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Data Preprocessing

### a. Import data

In [8]:
# Data folder path
file_path = '/Users/anatoli_debradke/Desktop/Speech Emotion Analysis/RAVDESS/Data/'

# Get all filenames from all actors
file_names = mylist= os.listdir(file_path)

### b. Set labels

In [9]:
# Initialize Label list
label_list = []

# Set all audio files labels
for audio_file in file_names:
    if audio_file[6:-16]=='02' and int(audio_file[18:-4])%2==0:
        label_list.append('female_calm')
    elif audio_file[6:-16]=='02' and int(audio_file[18:-4])%2==1:
        label_list.append('male_calm')
    elif audio_file[6:-16]=='03' and int(audio_file[18:-4])%2==0:
        label_list.append('female_happy')
    elif audio_file[6:-16]=='03' and int(audio_file[18:-4])%2==1:
        label_list.append('male_happy')
    elif audio_file[6:-16]=='04' and int(audio_file[18:-4])%2==0:
        label_list.append('female_sad')
    elif audio_file[6:-16]=='04' and int(audio_file[18:-4])%2==1:
        label_list.append('male_sad')
    elif audio_file[6:-16]=='05' and int(audio_file[18:-4])%2==0:
        label_list.append('female_angry')
    elif audio_file[6:-16]=='05' and int(audio_file[18:-4])%2==1:
        label_list.append('male_angry')
    elif audio_file[6:-16]=='06' and int(audio_file[18:-4])%2==0:
        label_list.append('female_fearful')
    elif audio_file[6:-16]=='06' and int(audio_file[18:-4])%2==1:
        label_list.append('male_fearful')
    elif audio_file[2:-6]=='a':
        label_list.append('male_angry')
    elif audio_file[2:-6]=='f':
        label_list.append('male_fearful')
    elif audio_file[2:-6]=='h':
        label_list.append('male_happy')
    elif audio_file[2:-6]=='sa':
        label_list.append('male_sad')

# Build dataframe with all label
df_labels = pd.DataFrame(label_list, columns=['label'])

### c. Build Features

In [11]:
# Initialize DataFrame for features
df_features = pd.DataFrame(columns=['features'])

# Build all audio files features using librosa
for audio_index, audio_file in enumerate(file_names):
    
    # Remove undesired label (NEUTRAL: (01,n), DISGUST: (07,d), SURPRISED: (08,su)
    if audio_file[6:-16]!='01' and audio_file[6:-16]!='07' and audio_file[6:-16]!='08' and audio_file[2:-6]!='su' and audio_file[2:-6]!='n' and audio_file[2:-6]!='d':

        # load audio file
        X, sample_rate = librosa.load( file_path + audio_file, res_type='kaiser_fast', duration=2.5, sr=22050*2, offset=0.5)

        # Calculate and add features to dataFrame
        df_features.loc[audio_index] = [np.mean(librosa.feature.mfcc(y=X, sr=np.array(sample_rate), n_mfcc=13), axis=0)]

# Split features column in dataFrame
df_features = pd.DataFrame(df_features['features'].values.tolist())

# Concatenate Features and Label Dataframe
df = pd.concat([df_features, df_labels], axis=1)

# Shuffle dataFrame
df = shuffle(df)

# Replace NA values
df = df.fillna(0)

## CNN Classification

### a. Build train and test dataset

In [12]:
# Build Train and test dataset
X_train, X_test, y_train, y_test = train_test_split(df.drop(['label'], axis=1), df['label'], test_size=0.2)

# Cast to array
X_train, X_test = np.array(X_train), np.array(X_test)
y_train, y_test = np.array(y_train), np.array(y_test)

### b. Encode Label

In [13]:
# Encode Label from categorical to numerical
lb = LabelEncoder()
y_train, y_test = np_utils.to_categorical(lb.fit_transform(y_train)), np_utils.to_categorical(lb.fit_transform(y_test))

### c. Changing dimension for CNN model

In [14]:
# Reshape features train and test dataset
X_train = np.expand_dims(X_train, axis=2)
X_test = np.expand_dims(X_test, axis=2)

### d. Build CNN model

In [15]:
# Initialize sequence (linear stack of layers)
model = Sequential()

# Add all Layers
model.add(Conv1D(256, 5, padding='same', input_shape=(216, 1)))
model.add(Activation('relu'))
model.add(Conv1D(128, 5, padding='same'))
model.add(Activation('relu'))
model.add(Dropout(0.1))
model.add(MaxPooling1D(pool_size=8))
model.add(Conv1D(128, 5, padding='same'))
model.add(Activation('relu'))
model.add(Conv1D(128, 5,padding='same'))
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(10))
model.add(Activation('softmax'))

# Summarize CNN Model
model.summary()

# Compile CNN model
model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.rmsprop(lr=0.00001, decay=1e-6), metrics=['accuracy'])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_1 (Conv1D)            (None, 216, 256)          1536      
_________________________________________________________________
activation_1 (Activation)    (None, 216, 256)          0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 216, 128)          163968    
_________________________________________________________________
activation_2 (Activation)    (None, 216, 128)          0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 216, 128)          0         
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 27, 128)           0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 27, 128)           82048     
__________

### e. Train model

In [None]:
cnn = model.fit(X_train, y_train, batch_size=16, epochs=700, validation_data=(X_test, y_test))

Train on 960 samples, validate on 240 samples
Epoch 1/700
Epoch 2/700
Epoch 3/700
Epoch 4/700
Epoch 5/700
Epoch 6/700
Epoch 7/700
Epoch 8/700
Epoch 9/700
Epoch 10/700
Epoch 11/700
Epoch 12/700
Epoch 13/700
Epoch 14/700
Epoch 15/700
Epoch 16/700
Epoch 17/700
Epoch 18/700
Epoch 19/700
Epoch 20/700
Epoch 21/700
Epoch 22/700
Epoch 23/700
Epoch 24/700
Epoch 25/700
Epoch 26/700
Epoch 27/700
Epoch 28/700
Epoch 29/700
Epoch 30/700
Epoch 31/700
Epoch 32/700
Epoch 33/700
Epoch 34/700
Epoch 35/700
Epoch 36/700
Epoch 37/700
Epoch 38/700
Epoch 39/700
Epoch 40/700
Epoch 41/700
Epoch 42/700
Epoch 43/700
Epoch 44/700
Epoch 45/700
Epoch 46/700
Epoch 47/700
Epoch 48/700
Epoch 49/700
Epoch 50/700
Epoch 51/700
Epoch 52/700
Epoch 53/700
Epoch 54/700
Epoch 55/700
Epoch 56/700
Epoch 57/700
Epoch 58/700
Epoch 59/700
Epoch 60/700
Epoch 61/700


Epoch 62/700
Epoch 63/700
Epoch 64/700
Epoch 65/700
Epoch 66/700
Epoch 67/700
Epoch 68/700
Epoch 69/700
Epoch 70/700
Epoch 71/700
Epoch 72/700
Epoch 73/700
Epoch 74/700
Epoch 75/700
Epoch 76/700
Epoch 77/700
Epoch 78/700
Epoch 79/700
Epoch 80/700
Epoch 81/700
Epoch 82/700
Epoch 83/700
Epoch 84/700
Epoch 85/700
Epoch 86/700
Epoch 87/700
Epoch 88/700
Epoch 89/700
Epoch 90/700
Epoch 91/700
Epoch 92/700
Epoch 93/700
Epoch 94/700
Epoch 95/700
Epoch 96/700
Epoch 97/700
Epoch 98/700
Epoch 99/700
Epoch 100/700
Epoch 101/700
Epoch 102/700
Epoch 103/700
Epoch 104/700
Epoch 105/700
Epoch 106/700
Epoch 107/700
Epoch 108/700
Epoch 109/700
Epoch 110/700
Epoch 111/700
Epoch 112/700
Epoch 113/700
192/960 [=====>........................] - ETA: 6s - loss: 1.0786 - acc: 0.6354

### f. Model Accuracy and Loss

In [None]:
plt.figure(0, figsize=(20, 20))

# Plot Model loss
plt.subplot(221)
plt.plot(cnn.history['loss'], label='train')
plt.plot(cnn.history['val_loss'], label='test')
plt.title('model loss', fontsize = 16)
plt.ylabel('loss'); plt.xlabel('epoch')
plt.legend(loc='upper left')

# Plot Model Accuracy
plt.subplot(222)
plt.plot(cnn.history['acc'], label='train')
plt.plot(cnn.history['val_acc'], label='test')
plt.title('model accuracy', fontsize = 16)
plt.ylabel('acc')
plt.xlabel('epoch')
plt.legend(loc='upper left')
plt.show()

### g. Predict emotion on test dataset

In [None]:
# Prediction
pred = model.predict(X_test, batch_size=32, verbose=0)

# Score
score = model.evaluate(X_test, y_test, verbose=0)

# Get the index of the max 'probability' for prediction and actual
pred = pred.argmax(axis=1)
actual = y_test.argmax(axis=1)

# Reverse label encoder
pred = (lb.inverse_transform((pred.astype(int).flatten())))
actual = (lb.inverse_transform((actual.astype(int).flatten())))

# Build dataFrame
df_pred = pd.DataFrame({'Actual': actual, 'Prediction': pred})

# Commentary
print('Accuracy Score on test dataset: {}%'.format(np.round(100 * score[1],2)))

### h. Test on personnal audio file

In [None]:
# Audio File path
filenames = ['/Users/anatoli_debradke/Desktop/Speech Emotion Analysis/AudioData/DC/a03.wav', '/Users/anatoli_debradke/Desktop/Speech Emotion Analysis/AudioData/JK/su07.wav']

# Initialize DataFrame for features
df_features = pd.DataFrame(columns=['features'])

# Build all audio files features using librosa
for audio_index, audio_file in enumerate(filenames):
    
    # load audio file
    X, sample_rate = librosa.load(audio_file, res_type='kaiser_fast', duration=2.5, sr=22050*2, offset=0.5)
    
    # Calculate and add features to dataFrame
    df_features.loc[audio_index] = [np.mean(librosa.feature.mfcc(y=X, sr=np.array(sample_rate), n_mfcc=13), axis=0)]

# Split features column in dataFrame
df_features = pd.DataFrame(df_features['features'].values.tolist())

# Fill NA values
df_features = df_features.fillna(0)

# Reshape features for CNN model
X = np.expand_dims(np.array(df_features), axis=2)

# Prediction
pred = model.predict(X, batch_size=1, verbose=0)

# Get the index of the max 'probability' for prediction and actual
pred = pred.argmax(axis=1)

# Reverse label encoder
pred = (lb.inverse_transform((pred.astype(int).flatten())))

# Build dataFrame
df_pred = pd.DataFrame({'Prediction': pred}, index=filenames)
df_pred

## Save model

In [None]:
# Model file name
model_name = 'Emotion_8_Voice_Detection_Model'

# Save path
save_path = '/Users/anatoli_debradke/Desktop/Speech Emotion Analysis/Librosa and CNN/Models/'

# Save model's weights to HDF5 format
model.save_weights(save_path + model_name + '.h5')

# Save model to JSON format
with open(save_path + model_name + '.json', 'w') as json_file:
    json_file.write(model.to_json())

## Load model

In [None]:
# Model file name
model_name = 'Emotion_8_Voice_Detection_Model'

# model path
save_path = '/Users/anatoli_debradke/Desktop/Speech Emotion Analysis/Librosa and CNN/Models/'

# Load JSON
json_file = open(save_path + model_name + '.json', 'r')

# Create model
model = model_from_json(json_file.read())

# Load weights into model
model.load_weights(save_path + model_name + '.h5')
 
# evaluate loaded model on test data
model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.rmsprop(lr=0.00001, decay=1e-6), metrics=['accuracy'])

# Close model file
json_file.close()