In [134]:
import librosa
import soundfile
import os, glob, pickle
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout, BatchNormalization
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from tabulate import tabulate
from sklearn.metrics import accuracy_score


In [169]:
def extract_feature(file_name, mfcc, chroma, mel):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate=sound_file.samplerate
        if chroma:
            stft=np.abs(librosa.stft(X))
            x=np.array([])
        if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            x=np.hstack((x, mfccs))
        if chroma:
            chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            x=np.hstack((x, chroma))
        if mel:
            mel=np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T,axis=0)
            x=np.hstack((x, mel))
        return x

In [170]:
emotions={
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}
observed_emotions=['calm', 'happy', 'fearful', 'disgust' , 'neutral']

In [171]:
def load_data(test_size=0.2):
    x,y=[],[]
    for file in glob.glob("C:\\Users\\falco\\Desktop\\SER\\Actor_*\\*.wav"):
        file_name=os.path.basename(file)
        emotion=emotions[file_name.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        feature=extract_feature(file, mfcc=True, chroma=True, mel=True)
        x.append(feature)
        y.append(emotion)
    return train_test_split(np.array(x), y, test_size=test_size, random_state=9)

In [172]:
x_train,x_test,y_train,y_test=load_data(test_size=0.25)

In [173]:
print((x_train.shape[0], x_test.shape[0]))

(648, 216)


In [174]:
print(f'Features extracted: {x_train.shape[1]}')

Features extracted: 180


In [179]:
model=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=500)

In [180]:
model.fit(x_train,y_train)

In [181]:
y_pred=model.predict(x_test)

In [182]:
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)

In [160]:
df = pd.DataFrame(y_test, y_pred)
pd.set_option('display.max_rows', None)
df = df.style.set_caption('Comparison Table')
display(df)
                   

Unnamed: 0,0
happy,fearful
calm,calm
calm,calm
disgust,disgust
fearful,fearful
happy,happy
disgust,disgust
neutral,neutral
calm,neutral
fearful,happy


In [183]:
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 64.35%


In [131]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

        calm       0.73      0.80      0.76        50
     disgust       0.66      0.85      0.74        52
     fearful       0.62      0.79      0.69        43
       happy       0.84      0.32      0.46        50
     neutral       0.45      0.43      0.44        21

    accuracy                           0.66       216
   macro avg       0.66      0.64      0.62       216
weighted avg       0.69      0.66      0.64       216

