In [118]:
import librosa
import soundfile
import os, glob, pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [119]:
#Extract features (mfcc, chroma, mel) from a sound file
def extract_feature(file_name, mfcc, chroma, mel):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate=sound_file.samplerate
        if chroma:
            stft=np.abs(librosa.stft(X))
        result=np.array([])
        if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result=np.hstack((result, mfccs))
        if chroma:
            chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result=np.hstack((result, chroma))
        if mel:
            mel=np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T,axis=0)
            result=np.hstack((result, mel))
    return result

In [120]:
# Emotions in the RAVDESS dataset
emotions={
  '01':'positive',#neuture
  '02':'positive',#calm
  '03':'positive',#happy
  '04':'negative',#sad
  '05':'negative',#angry
  '06':'negative',#fearful
  '07':'negative',#disgust
  '08':'surprised'
}

#Emotions to observe
observed_emotions=['positive', 'negative']

In [121]:
#Load the data and extract features for each sound file
def load_data(test_size=0.2):
    x,y=[],[]
    for file in glob.glob("Dataset/Actor_*/*.wav"):
        file_name=os.path.basename(file)
        emotion=emotions[file_name.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        feature=extract_feature(file, mfcc=True, chroma=True, mel=True)
        x.append(feature)
        y.append(emotion)
    return train_test_split(np.array(x), y, test_size=test_size, random_state=9)

In [122]:
#Split the dataset
x_train,x_test,y_train,y_test=load_data(test_size=0.25)

In [123]:
x_train

array([[-4.16173218e+02,  4.12733763e-01, -5.11142921e+01, ...,
         4.48828767e-04,  2.31009049e-04,  1.15633935e-04],
       [-3.66309082e+02,  2.90105247e+01, -2.44320488e+01, ...,
         2.45177443e-03,  1.38937135e-03,  7.51937449e-04],
       [-5.72836121e+02,  3.39460983e+01, -8.68107498e-01, ...,
         4.34923917e-04,  5.24944451e-04,  7.99158122e-04],
       ...,
       [-4.27162933e+02,  4.28925247e+01, -2.82943211e+01, ...,
         1.67770206e-03,  1.24595861e-03,  8.13118357e-04],
       [-5.13774963e+02,  1.78634148e+01, -3.07672558e+01, ...,
         8.75806218e-05,  5.52895690e-05,  5.87678333e-05],
       [-6.55869934e+02,  4.76601906e+01,  1.00023632e+01, ...,
         7.86538440e-05,  4.55346744e-05,  1.32077957e-05]])

In [124]:
#Get the shape of the training and testing datasets
print((x_train.shape[0], x_test.shape[0]))

(936, 312)


In [125]:
#Get the number of features extracted
print(f'Features extracted: {x_train.shape[1]}')

Features extracted: 180


In [126]:
#Initialize the Multi Layer Perceptron Classifier
model=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=500)

In [127]:
#Train the model
model.fit(x_train,y_train)

In [128]:
#Predict for the test set
y_pred=model.predict(x_test)

In [129]:
y_pred

array(['positive', 'positive', 'negative', 'negative', 'positive',
       'positive', 'negative', 'positive', 'negative', 'negative',
       'negative', 'positive', 'negative', 'negative', 'negative',
       'positive', 'positive', 'positive', 'negative', 'negative',
       'positive', 'positive', 'negative', 'positive', 'negative',
       'negative', 'positive', 'negative', 'negative', 'negative',
       'negative', 'negative', 'negative', 'positive', 'negative',
       'positive', 'positive', 'negative', 'positive', 'positive',
       'positive', 'negative', 'negative', 'negative', 'negative',
       'negative', 'positive', 'negative', 'negative', 'negative',
       'positive', 'negative', 'negative', 'negative', 'negative',
       'negative', 'negative', 'negative', 'positive', 'negative',
       'negative', 'negative', 'positive', 'negative', 'negative',
       'negative', 'positive', 'negative', 'negative', 'negative',
       'positive', 'negative', 'positive', 'negative', 'negati

In [130]:
#Calculate the accuracy of our model
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)

#Print the accuracy
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 81.73%


In [103]:
from sklearn.metrics import accuracy_score, f1_score

In [104]:
f1_score(y_test, y_pred,average=None)

array([0.87741935, 0.8       ])

In [105]:
import pandas as pd
df=pd.DataFrame({'Actual': y_test, 'Predicted':y_pred})
df.head(20)

Unnamed: 0,Actual,Predicted
0,negative,positive
1,positive,positive
2,negative,negative
3,positive,negative
4,positive,positive
5,negative,positive
6,positive,positive
7,positive,positive
8,positive,negative
9,negative,negative


In [135]:
import pickle
# Writing different model files to file
with open('modelForPrediction(2e).sav', 'rb') as f:
    loaded_model = pickle.load(f)
    
#with open( 'modelForPrediction(2e).sav', 'wb') as f:
    #pickle.dump(model,f)

In [136]:
from IPython.display import Audio

# Extract features from a new sound file
new_file = "Dataset/Actor_01/03-01-05-01-01-01-01.wav"
feature = extract_feature(new_file, mfcc=True, chroma=True, mel=True)
feature = feature.reshape(1, -1)

# Predict the emotion using the loaded model
prediction = loaded_model.predict(feature)
predicted_emotion = prediction[0]

# Play the sound
audio_data, sample_rate = soundfile.read(new_file)
Audio(data=audio_data, rate=sample_rate)

In [137]:
print("Predicted Emotion:", predicted_emotion)

Predicted Emotion: negative


In [134]:
feature

array([[-5.33920715e+02,  3.48088074e+01, -3.42611313e+00,
         1.14189043e+01, -5.58042574e+00, -3.65054440e+00,
        -3.60167432e+00, -1.00888634e+01, -1.00481844e+01,
         8.57897520e-01, -6.84773779e+00,  4.12490219e-01,
        -7.28190660e+00, -6.60221934e-01, -6.04941034e+00,
        -3.64556980e+00, -3.54131985e+00, -3.71936917e+00,
        -5.49054623e+00, -3.38081503e+00, -4.41899586e+00,
        -4.87185621e+00, -2.44289207e+00, -3.26492858e+00,
        -2.55314589e+00,  9.45665777e-01, -3.05975890e+00,
         1.59906733e+00, -1.39902735e+00, -1.00879323e+00,
        -3.28679752e+00, -9.96816099e-01, -1.02184796e+00,
         7.04776356e-03, -2.75215912e+00, -2.31755280e+00,
        -2.59627962e+00, -6.06732190e-01,  2.74986339e+00,
         3.14026904e+00,  5.85309327e-01,  6.29368246e-01,
         6.49452388e-01,  6.80720568e-01,  6.68102622e-01,
         6.83693945e-01,  6.76383257e-01,  6.46300912e-01,
         6.63632929e-01,  7.25350499e-01,  6.93943977e-0