In [1]:
import librosa
import soundfile
import os, glob, pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [2]:
#Extract features (mfcc, chroma, mel) from a sound file
def feature_extraction(file_name, mfcc, chroma, mel):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate=sound_file.samplerate
        if chroma:
            stft=np.abs(librosa.stft(X))
        result=np.array([])
        if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result=np.hstack((result, mfccs))
        if chroma:
            chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result=np.hstack((result, chroma))
        if mel:
            mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
            result=np.hstack((result, mel))
    return result

In [3]:
#Emotions in the RAVDESS dataset
emotions={
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}

#Emotions to observe
observed_emotions=['calm', 'happy', 'fearful', 'disgust']

In [4]:
#Load the data and extract features for each sound file
def load_data(test_size=0.2):
    x,y=[],[]
    for file in glob.glob("C:\\Users\\Admin\\Downloads\\Actor_*\\*.wav"):
        file_name=os.path.basename(file)
        emotion=emotions[file_name.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        feature=feature_extraction(file, mfcc=True, chroma=True, mel=True)
        x.append(feature)
        y.append(emotion)
    return train_test_split(np.array(x), y, test_size=test_size, random_state=9)

In [5]:
#Split the dataset
x_train,x_test,y_train,y_test=load_data(test_size=0.05)

In [6]:
#Get the shape of the training and testing datasets
print((x_train.shape[0], x_test.shape[0]))

(729, 39)


In [7]:
#DataFlair - Get the number of features extracted
print(f'Features extracted: {x_train.shape[1]}')

Features extracted: 180


In [8]:
#DataFlair - Initialize the Multi Layer Perceptron Classifier
model=MLPClassifier(alpha=0.001, batch_size=400, beta_1=0.9,beta_2=0.999, early_stopping=False, epsilon=1e-08,hidden_layer_sizes=400, learning_rate='adaptive', learning_rate_init=0.001, max_fun=15000, max_iter=800,momentum=0.9, n_iter_no_change=10,nesterovs_momentum=True, power_t=0.5, random_state=None, shuffle=True, solver='adam', tol=0.001, validation_fraction=0.1, verbose=False, warm_start=False) 

In [9]:
#Train the model
model.fit(x_train,y_train)

MLPClassifier(alpha=0.001, batch_size=400, hidden_layer_sizes=400,
              learning_rate='adaptive', max_iter=800, tol=0.001)

In [10]:
#Predict for the test set
y_pred=model.predict(x_test)

In [11]:
#Calculate the accuracy of our model
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)
#Print the accuracy
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 74.36%


In [12]:
from sklearn.metrics import accuracy_score, f1_score

In [13]:
f1_score(y_test, y_pred,average=None)

array([0.84210526, 0.76190476, 0.4       , 0.78571429])

In [14]:
import pandas as pd
df=pd.DataFrame({'Actual': y_test, 'Predicted':y_pred})
df.head(20)

Unnamed: 0,Actual,Predicted
0,happy,happy
1,calm,calm
2,happy,happy
3,happy,happy
4,disgust,fearful
5,calm,calm
6,happy,disgust
7,happy,happy
8,disgust,disgust
9,happy,happy


In [15]:
import pickle
# Writing different model files to file
with open( 'modelForPrediction1.sav', 'wb') as f:
    pickle.dump(model,f)

In [16]:
filename = 'modelForPrediction1.sav'
loaded_model = pickle.load(open(filename, 'rb')) # loading the model file from the storage

feature=feature_extraction("C:\\Users\\Admin\\Downloads\\Actor_05\\03-01-08-02-02-01-05.wav", mfcc=True, chroma=True, mel=True)

feature=feature.reshape(1,-1)

prediction=loaded_model.predict(feature)
prediction

array(['happy'], dtype='<U7')

In [17]:
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)

#DataFlair - Print the accuracy
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 74.36%


In [18]:
feature

array([[-5.69613770e+02,  4.14624329e+01, -3.71787477e+00,
         4.61011982e+00, -7.21511030e+00, -5.93163776e+00,
        -1.02282162e+01, -8.58245087e+00, -8.53327656e+00,
         3.67578125e+00, -5.23400545e+00, -2.78784609e+00,
        -2.21404529e+00, -1.86230850e+00, -4.10027266e+00,
        -4.90808392e+00, -3.82830873e-02, -4.57898974e-01,
        -4.68290567e+00, -9.59803700e-01, -3.17451310e+00,
        -2.07436070e-01,  1.95775580e+00,  3.82537508e+00,
         3.33356214e+00,  2.94928861e+00,  2.82683301e+00,
         3.32106519e+00,  4.99040079e+00,  6.25532246e+00,
         4.05016994e+00,  5.29781199e+00,  4.73517847e+00,
         3.73376727e+00,  2.18284869e+00,  4.02091551e+00,
         2.31707764e+00,  3.83285689e+00,  3.12607169e-01,
         7.00969398e-01,  6.15234733e-01,  6.83239400e-01,
         6.97215021e-01,  7.58680522e-01,  7.59205878e-01,
         7.36530840e-01,  7.53984332e-01,  7.47070909e-01,
         7.50083864e-01,  7.36196280e-01,  6.82358325e-0