
[Dataset](https://drive.google.com/file/d/1wWsrN2Ep7x6lWqOXfr4rpKGYrJhWc8z7/view)

In [2]:
import os
Root = 'D:\\login\\templates\\dataset\\dataset10'
os.chdir(Root)

FileNotFoundError: [WinError 3] The system cannot find the path specified: 'D:\\login\\templates\\dataset\\dataset10'

In [3]:
ls

 Volume in drive C is Windows
 Volume Serial Number is 0695-0DCC

 Directory of c:\Users\HP\OneDrive\Desktop\jhanvi-deeksha

29-04-2023  13:17    <DIR>          .
29-04-2023  01:11    <DIR>          ..
20-04-2023  21:10    <DIR>          Actor_01
29-04-2023  13:43            25,028 Copy of Speech_Emotion_Recognition_with_librosa.ipynb
29-04-2023  13:20    <DIR>          front-end
29-04-2023  13:04         1,337,044 modelForPrediction1.sav
               2 File(s)      1,362,072 bytes
               4 Dir(s)  160,986,042,368 bytes free


In [5]:
import librosa
import soundfile
import os, glob, pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [6]:
#Extract features (mfcc, chroma, mel) from a sound file
def extract_feature(file_name, mfcc, chroma, mel):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate=sound_file.samplerate
        if chroma:
            stft=np.abs(librosa.stft(X))
        result=np.array([])
        if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result=np.hstack((result, mfccs))
        if chroma:
            chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result=np.hstack((result, chroma))
        if mel:
            mel=np.mean(librosa.feature.melspectrogram(y=X,sr=sample_rate).T,axis=0)
            result=np.hstack((result, mel))
    return result

In [7]:
# Emotions in the RAVDESS dataset
emotions={
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}

#Emotions to observe
observed_emotions=['calm', 'happy', 'fearful', 'disgust']

In [8]:
#Load the data and extract features for each sound file
def load_data(test_size=0.25):
    x,y=[],[]
    for file in glob.glob("C:\\Users\\HP\\OneDrive\\Desktop\\jhanvi-deeksha\\Actor_01\\*.wav"):
        file_name=os.path.basename(file)
        emotion=emotions[file_name.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        feature=extract_feature(file, mfcc=True, chroma=True, mel=True)
        x.append(feature)
        y.append(emotion)
    return train_test_split(np.array(x), y, test_size=test_size,random_state=9 )

In [9]:
#Split the dataset
x_train,x_test,y_train,y_test=load_data(test_size=0.25)

In [10]:
x_train

array([[-5.85305725e+02,  5.46925201e+01,  1.27419591e+00, ...,
         1.45659869e-05,  1.03847588e-05,  6.76315585e-06],
       [-4.14999451e+02,  3.99856758e+01, -1.88110352e+01, ...,
         4.83325275e-04,  4.66332975e-04,  2.68979260e-04],
       [-6.10676453e+02,  5.80515594e+01,  8.02511787e+00, ...,
         3.93370210e-05,  2.79050400e-05,  3.01324872e-05],
       ...,
       [-3.96463776e+02,  3.79583740e+01, -2.22718010e+01, ...,
         2.65736540e-04,  1.81186551e-04,  1.35726747e-04],
       [-4.81413239e+02,  4.03593674e+01, -1.13141022e+01, ...,
         1.56979333e-03,  8.16480839e-04,  3.50297749e-04],
       [-5.11908630e+02,  4.25309029e+01,  6.27620649e+00, ...,
         5.96258964e-04,  3.57012264e-04,  2.65360723e-04]])

In [11]:
#Get the shape of the training and testing datasets
print((x_train.shape[0], x_test.shape[0]))

(24, 8)


In [12]:
#Get the number of features extracted
print(f'Features extracted: {x_train.shape[1]}')

Features extracted: 180


In [13]:
#Initialize the Multi Layer Perceptron Classifier
model=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=500)

In [14]:
#Train the model
model.fit(x_train,y_train)



In [15]:
#Predict for the test set
y_pred=model.predict(x_test)

In [16]:
y_pred

array(['disgust', 'disgust', 'happy', 'fearful', 'disgust', 'disgust',
       'disgust', 'disgust'], dtype='<U7')

In [17]:
#Calculate the accuracy of our model
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)

#Print the accuracy
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 12.50%


In [18]:
from sklearn.metrics import accuracy_score, f1_score

In [19]:
f1_score(y_test, y_pred,average=None)

array([0. , 0. , 0. , 0.5])

In [20]:
import pandas as pd
df=pd.DataFrame({'Actual': y_test, 'Predicted':y_pred})
df.head(20)

Unnamed: 0,Actual,Predicted
0,calm,disgust
1,fearful,disgust
2,happy,happy
3,happy,fearful
4,happy,disgust
5,calm,disgust
6,calm,disgust
7,calm,disgust


In [21]:
import pickle
# Writing different model files to file
with open( 'modelForPrediction1.sav', 'wb') as f:
    pickle.dump(model,f)

In [22]:
filename = 'modelForPrediction1.sav'
loaded_model = pickle.load(open(filename, 'rb')) # loading the model file from the storage

feature=extract_feature("C:/Users/HP/OneDrive/Desktop/jhanvi-deeksha/Actor_01/03-01-02-02-01-02-01.wav", mfcc=True, chroma=True, mel=True)

feature=feature.reshape(1,-1)

prediction=loaded_model.predict(feature)
prediction

array(['disgust'], dtype='<U7')

In [23]:
feature

array([[-6.78538635e+02,  5.96855469e+01, -2.25318742e+00,
         1.36124058e+01,  1.60571754e+00, -2.37622237e+00,
        -8.63808250e+00, -8.15035152e+00, -8.38708496e+00,
         2.76587820e+00, -2.51134038e+00, -1.73484731e+00,
        -5.50506020e+00,  9.69749570e-01, -4.34499216e+00,
        -3.39112020e+00, -2.00774050e+00,  3.42371225e-01,
        -6.90993166e+00, -9.32045698e-01, -2.41020441e+00,
        -4.60052967e+00, -1.75189769e+00, -3.77097225e+00,
        -2.76861787e+00, -1.04434836e+00, -2.46554494e+00,
        -2.59407592e+00, -4.46165848e+00, -3.40189171e+00,
        -4.75956917e+00, -4.06750774e+00, -1.68034029e+00,
        -1.96667528e+00, -4.10416031e+00, -4.46409798e+00,
        -3.78234339e+00, -2.47336769e+00, -2.87224770e+00,
        -2.67926574e+00,  6.44428372e-01,  6.60105705e-01,
         6.94096506e-01,  7.18490064e-01,  7.08668411e-01,
         7.13441789e-01,  7.42585599e-01,  7.66007304e-01,
         7.29274690e-01,  6.99278772e-01,  6.93761230e-0