***Import Libraries and Modules***

In [8]:
import librosa
import soundfile
import os, glob
import numpy as np

***Extract feature mfcc from a sound file***

In [9]:
def extract_feature(file_name, mfcc):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate=sound_file.samplerate
        result=np.array([])
        if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result=np.hstack((result, mfccs))
    return result

***Emotions in the RAVDESS dataset and Emotions to observe***

In [10]:
emotions={
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}
observed_emotions=['calm', 'happy', 'fearful', 'disgust']

In [11]:
from sklearn.model_selection import train_test_split

***Load the data and extract features for each sound file***

In [12]:
def load_data(test_size=0.1):
    x,y=[],[]
    for file in glob.glob("D:\Coderscave projects\Speech Emotion Recognition\speech-emotion-recognition-ravdess-data\Actor_*\\*.wav"):
        file_name=os.path.basename(file)
        emotion=emotions[file_name.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        feature=extract_feature(file, mfcc=True)
        x.append(feature)
        y.append(emotion)
    return train_test_split(np.array(x), y, test_size=test_size, random_state=9)

***Split the dataset***

In [13]:
x_train,x_test,y_train,y_test=load_data(test_size=0.1)

***Get the shape of the training and testing datasets***

In [14]:
x_train = np.asarray(x_train)
y_train = np.asarray(y_train)
x_test = np.asarray(x_test)
y_test = np.asarray(y_test)

x_train.shape, y_train.shape, x_test.shape, y_test.shape

((691, 40), (691,), (77, 40), (77,))

In [15]:
from sklearn.neural_network import MLPClassifier

***Initialize the Multi Layer Perceptron Classifier***

In [16]:
model=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=400)

***Train the model***

In [17]:
model.fit(x_train,y_train)

***Predict for the test set***

In [18]:
y_pred=model.predict(x_test)

In [19]:
from sklearn.metrics import accuracy_score

***Calculate the accuracy of our model***

In [20]:
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)

***Print the accuracy***

In [21]:
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 90.91%


In [22]:
from sklearn.metrics import classification_report

In [23]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

        calm       0.91      0.95      0.93        21
     disgust       0.89      0.81      0.85        21
     fearful       0.93      0.93      0.93        15
       happy       0.90      0.95      0.93        20

    accuracy                           0.91        77
   macro avg       0.91      0.91      0.91        77
weighted avg       0.91      0.91      0.91        77



In [24]:
from sklearn.metrics import confusion_matrix

In [25]:
matrix = confusion_matrix(y_test,y_pred)
print (matrix)

[[20  1  0  0]
 [ 1 17  1  2]
 [ 1  0 14  0]
 [ 0  1  0 19]]
