In [1]:
import numpy as np
import librosa
import glob
import os
import soundfile
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [2]:
emotions = {
    "01": "neutral", "02": "calm", "03": "sad", "04": "sad", "05": "angry", "06": "fearful",
    "07": "disgust", "08": "surprised"
}

needed_emotions = ["happy", "neutral", "sad", "angry"]

X = []
y = []

In [3]:
def feature_extraction(file):
    with soundfile.SoundFile(file) as sf:
        try:
            X = sf.read(dtype="float32")
            samplerate = sf.samplerate
            result = np.array([])

            mfcc = np.mean(librosa.feature.mfcc(y=X,sr=samplerate,n_mfcc=40).T,axis=0)
            result = np.hstack((result, mfcc))
            stft = np.abs(librosa.stft(X))
            chroma = np.mean(librosa.feature.chroma_stft(S=stft,sr=samplerate).T,axis=0)
            result = np.hstack((result, chroma))
            contrast = np.mean(librosa.feature.spectral_contrast(S=stft,sr=samplerate).T,axis=0)
            result = np.hstack((result, contrast))
            mel = np.mean(librosa.feature.melspectrogram(X, sr=samplerate).T, axis=0)
            result = np.hstack((result, mel))
            tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=samplerate).T, axis=0)
            result = np.hstack((result, tonnetz))
        except:
            print(file)
    return result

In [4]:
for file in glob.glob("archive/Actor_*/*.wav"):
    filename = os.path.basename(file)
    emotion = emotions[filename.split("-")[2]]
    if emotion not in needed_emotions:
        continue
    features = feature_extraction(file)
    X.append(features)
    y.append(emotion)

In [9]:
np.array(X).shape

(671, 193)

In [11]:
X_train,X_test,y_train,y_test = train_test_split(np.array(X),y,test_size = 0.33,random_state = 42)

In [13]:
model = MLPClassifier(alpha=0.01,batch_size=256,epsilon=1e-08,hidden_layer_sizes=(300,),learning_rate='adaptive',max_iter=500)


In [14]:
model.fit(X_train,y_train)

MLPClassifier(alpha=0.01, batch_size=256, hidden_layer_sizes=(300,),
              learning_rate='adaptive', max_iter=500)

In [15]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 79.28%
