In [85]:
import librosa
import os, glob
import numpy as np
from sklearn.model_selection import train_test_split

In [86]:
def extract_feature(filename, list_of_features):
  features=[]
  audio,sample_rate=librosa.load(filename)
  for feature in list_of_features:
        if feature == 'mfcc':
            mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=45)
            mfccs_mean = np.mean(mfccs.T, axis=0)
            print(np.shape(mfccs_mean))
            features.append(mfccs_mean)

        elif feature == 'chroma':
            chroma = librosa.feature.chroma_stft(y=audio, sr=sample_rate)
            chroma_mean = np.mean(chroma.T, axis=0)
            print(np.shape(chroma_mean))
            features.append(chroma_mean)

        elif feature == 'melspectrogram':
            mel_spec = librosa.feature.melspectrogram(y=audio, sr=sample_rate, n_fft=2048, hop_length=512)
            mel_spec_mean = np.mean(mel_spec.T, axis=0)
            print(np.shape(mel_spec_mean))
            features.append(mel_spec_mean)

  return np.hstack(features)

In [87]:
# Emotions in the RAVDESS dataset
emotions={
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}

In [88]:

def load_data(test_size=0.2):
    x,y=[],[]
    for file in glob.glob('./Audio_*/Actor_*/*.wav'):
        file_name=str(os.path.basename(file))
        print(file)
        feature=extract_feature(file,['mfcc', 'chroma', 'melspectrogram'])
        x.append(feature)
        z = np.zeros((8,))
        z[int(file_name.split("-")[2]) - 1] = 1
        y.append(z)
    return train_test_split(np.array(x), y, test_size=test_size, random_state=9)

In [89]:

x_train,x_test,y_train,y_test=load_data(test_size=0.25)

./Audio_Song_Actors_01-24/Actor_01/03-02-01-01-01-01-01.wav
(45,)
(12,)
(128,)
./Audio_Song_Actors_01-24/Actor_01/03-02-01-01-01-02-01.wav
(45,)
(12,)
(128,)
./Audio_Song_Actors_01-24/Actor_01/03-02-01-01-02-01-01.wav
(45,)
(12,)
(128,)
./Audio_Song_Actors_01-24/Actor_01/03-02-01-01-02-02-01.wav
(45,)
(12,)
(128,)
./Audio_Song_Actors_01-24/Actor_01/03-02-02-01-01-01-01.wav
(45,)
(12,)
(128,)
./Audio_Song_Actors_01-24/Actor_01/03-02-02-01-01-02-01.wav
(45,)
(12,)
(128,)
./Audio_Song_Actors_01-24/Actor_01/03-02-02-01-02-01-01.wav
(45,)
(12,)
(128,)
./Audio_Song_Actors_01-24/Actor_01/03-02-02-01-02-02-01.wav
(45,)
(12,)
(128,)
./Audio_Song_Actors_01-24/Actor_01/03-02-02-02-01-01-01.wav
(45,)
(12,)
(128,)
./Audio_Song_Actors_01-24/Actor_01/03-02-02-02-01-02-01.wav
(45,)
(12,)
(128,)
./Audio_Song_Actors_01-24/Actor_01/03-02-02-02-02-01-01.wav
(45,)
(12,)
(128,)
./Audio_Song_Actors_01-24/Actor_01/03-02-02-02-02-02-01.wav
(45,)
(12,)
(128,)
./Audio_Song_Actors_01-24/Actor_01/03-02-03-01-01-01

In [90]:
#Get the shape of the training and testing datasets
print((x_train.shape[0], x_test.shape[0]))

(1839, 613)


In [91]:
# Get the number of features extracted
print(f'Features extracted: {x_train.shape[1]}')

Features extracted: 185


In [92]:
y_test = np.array(y_test)
y_train = np.array(y_train)
print(x_test.shape, x_train.shape, y_test.shape, y_train.shape)

(613, 185) (1839, 185) (613, 8) (1839, 8)


In [154]:
# Initialize the Multi Layer Perceptron Classifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping


model = Sequential([
    Dense(300, activation='sigmoid', input_shape=(185,)),
    Dense(8,activation='sigmoid')
])

optimizer = Adam(learning_rate=0.001, beta_1=0.95, beta_2=0.999, epsilon=1e-08)

early_stopping = EarlyStopping(monitor='val_accuracy', 
                               patience=100, 
                               verbose=1,
                               mode='max',
                               restore_best_weights=True)

model.compile(optimizer=optimizer, 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [155]:
model.fit(x_train, y_train, batch_size=256, epochs=1000, validation_split=0.20, callbacks = [early_stopping])

Epoch 1/1000
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - accuracy: 0.1018 - loss: 2.4159 - val_accuracy: 0.2065 - val_loss: 2.0206
Epoch 2/1000
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.1820 - loss: 2.0432 - val_accuracy: 0.1766 - val_loss: 2.0157
Epoch 3/1000
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.1982 - loss: 2.0410 - val_accuracy: 0.1848 - val_loss: 1.9935
Epoch 4/1000
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.2243 - loss: 1.9960 - val_accuracy: 0.2636 - val_loss: 1.9595
Epoch 5/1000
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.2785 - loss: 1.9654 - val_accuracy: 0.2772 - val_loss: 1.9087
Epoch 6/1000
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.2629 - loss: 1.9140 - val_accuracy: 0.2500 - val_loss: 1.8895
Epoch 7/1000
[1m6/6[0m [32m━━━━

<keras.src.callbacks.history.History at 0x7f1ab4554670>

In [156]:
# Predict for the test set
y_pred=model.predict(x_test)

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


In [157]:
# Calculate the accuracy of our model
test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=0)
print(f'Test Accuracy: {test_accuracy}')

Test Accuracy: 0.7226753830909729


In [158]:
from sklearn.metrics import classification_report
print(classification_report(np.argmax(y_test, axis=1),np.argmax(y_pred, axis=1)))


              precision    recall  f1-score   support

           0       0.76      0.71      0.74        59
           1       0.75      0.78      0.77        93
           2       0.75      0.80      0.78        96
           3       0.69      0.67      0.68        87
           4       0.86      0.79      0.82       107
           5       0.65      0.67      0.66        82
           6       0.53      0.56      0.55        43
           7       0.64      0.65      0.65        46

    accuracy                           0.72       613
   macro avg       0.70      0.70      0.70       613
weighted avg       0.73      0.72      0.72       613



In [159]:
from sklearn.metrics import confusion_matrix
matrix = confusion_matrix(np.argmax(y_test, axis=1),np.argmax(y_pred, axis=1))
print (matrix)

[[42  8  1  4  0  1  2  1]
 [ 6 73  8  4  0  1  1  0]
 [ 2  5 77  2  2  2  1  5]
 [ 1  4  4 58  1 15  1  3]
 [ 0  2  3  1 84  5  8  4]
 [ 0  1  5 13  4 55  2  2]
 [ 3  2  1  0  7  4 24  2]
 [ 1  2  3  2  0  2  6 30]]
