In [1]:
import librosa
import os
import pandas
import soundfile
import numpy as np
import pandas as pd
from IPython.display import clear_output
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

In [2]:
# pass this function a file and it will return a list of features.
def get_features(file):
    with soundfile.SoundFile(file) as sound_file:
        result = np.array([])
        X = sound_file.read(dtype="float32")
        sample_rate = sound_file.samplerate
        stft = np.abs(librosa.stft(X))
        
        mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
        result = np.hstack((result, mfccs))
        
        chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
        result = np.hstack((result, chroma))
        
        mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
        result = np.hstack((result, mel))
        
    return result

In [3]:
emotions = {
    "01": "neutral",
    "02": "calm",
    "03": "happy",
    "04": "sad",
    "05": "angry",
    "06": "fearful",
    "07": "disgust",
    "08": "surprised"
}
used_emotions = {
    "03": "happy",
    "05": "angry",
    "04": "sad"
}

In [4]:
directory = "data_16khz"
X,y = [],[]
total = 0
for dir,sub_dir,files in os.walk(directory):
    for file in files:
        emotion = file.split("-")[2]
        if emotion not in used_emotions: continue
        total += 1
        print("total files added :",total)
        url = dir+"\\"+file
        y.append(emotions[emotion])
        X.append(get_features(url))
        clear_output(wait=True)

total files added : 576


In [5]:
X_train,X_test,y_train,y_test = train_test_split(X,y)

In [17]:
model = MLPClassifier(hidden_layer_sizes=(175,), learning_rate='adaptive', max_iter=1000,epsilon= 1e-08,alpha = .01)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)
print("Accuracy of the Recognizer is: {:.1f}%".format(accuracy*100))

Accuracy of the Recognizer is: 71.5%


In [7]:
confusion_matrix(y_pred,y_test)

array([[38,  6,  6],
       [ 7, 38, 11],
       [ 5,  5, 28]], dtype=int64)