In [1]:
import librosa
import soundfile,time
import os,glob,pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [2]:
## Open the sound file with soundfile.SoundFile using with-as so it’s automatically closed once we’re done.
## Read from it and call it X. Also, get the sample rate. If chroma is True, get the Short-Time Fourier Transform of X.

def extract_feature(file_name, mfcc, chroma, mel):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate=sound_file.samplerate
        if chroma:
            stft=np.abs(librosa.stft(X))
        result=np.array([])
        if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result=np.hstack((result, mfccs))
        if chroma:
            chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result=np.hstack((result, chroma))
        if mel:
            mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
            result=np.hstack((result, mel))
    return result

In [3]:
# Here we define a dictionary to hold numbers and the emotions available in the RAVDESS dataset,
# and a list to hold those we want- calm, happy, fearful, disgust.

emotions={
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}

# Emotions to observe
observed_emotions=['calm','happy','fearful','disgust']

In [4]:
#let’s load the data with a function load_data. x and y are empty lists; 
# we’ll use the glob() function from the glob module to get all the pathnames for the sound files in our dataset.

# Using our emotions dictionary, this number is turned into an emotion, and our function checks whether this
# emotion is in our list of observed_emotions; if not, it continues to the next file.
# It makes a call to extract_feature and stores what is returned in ‘feature’.
# Then, it appends the feature to x and the emotion to y. So, the list x holds the features and y holds the emotions.
# We call the function train_test_split with these, the test size, and a random state value, and return that.

def load_data(test_size=0.2):
    x,y=[],[]
    for file in glob.glob("/home/rebelroar/JupyterCode/Speech-Emotion-Recognition/ravdess data/Actor_*//*.wav"):
        file_name=os.path.basename(file)
        emotion=emotions[file_name.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        feature=extract_feature(file, mfcc=True, chroma=True, mel=True)
        x.append(feature)
        y.append(emotion)
    return train_test_split(np.array(x), y, test_size=test_size, random_state=9)


In [5]:
file = "/home/rebelroar/JupyterCode/Speech-Emotion-Recognition/ravdess data/Actor_19/03-01-05-02-02-02-19.wav"
feature = extract_feature(file, mfcc=True, chroma=True,mel=True)

In [6]:
# Time to split the dataset into training and testing sets! 
# Let’s keep 25% data of everything and use the load_data function for this.
# Here we also print the dataset

x_train, x_test, y_train, y_test = load_data(test_size=0.25)
print(x_train)
print(x_test)
print(y_train)
print(y_test)

[[-6.41742493e+02  3.81749878e+01 -8.41347885e+00 ...  3.26658337e-05
   2.97957540e-05  2.17277611e-05]
 [-4.75023102e+02  5.54611511e+01 -2.26248856e+01 ...  9.70487075e-04
   7.80348724e-04  3.80643847e-04]
 [-5.62096863e+02  5.64227562e+01 -5.83460474e+00 ...  9.64779974e-05
   5.62684618e-05  3.77878459e-05]
 ...
 [-6.40954346e+02  5.70275040e+01 -3.73365498e+00 ...  4.05219835e-05
   2.88846350e-05  1.43326179e-05]
 [-5.92325684e+02  3.48012199e+01 -1.67392044e+01 ...  5.40780129e-05
   3.11042058e-05  2.10854214e-05]
 [-5.63921814e+02  5.21708450e+01  1.30012579e+01 ...  1.91435160e-04
   1.94719309e-04  7.13025729e-05]]
[[-5.56770630e+02  3.49958649e+01 -1.21606884e+01 ...  1.56850656e-04
   9.86818704e-05  6.10335883e-05]
 [-6.01231934e+02  4.61873779e+01 -1.52429905e+01 ...  1.04361658e-04
   6.08918417e-05  8.39796194e-05]
 [-5.18958252e+02  4.76211243e+01 -6.14259052e+00 ...  6.27887639e-05
   3.10710166e-05  2.06187524e-05]
 ...
 [-6.27685303e+02  6.59878387e+01  9.1510276

In [7]:
# let’s initialize an MLPClassifier. This is a Multi-layer Perceptron Classifier;
# it optimizes the log-loss function using LBFGS or stochastic gradient descent.
# Unlike SVM or Naive Bayes, the MLPClassifier has an internal neural network for the purpose of classification.

model = MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300,),learning_rate='adaptive',max_iter=500)

In [8]:
# Train the model

model.fit(x_train, y_train)

MLPClassifier(alpha=0.01, batch_size=256, hidden_layer_sizes=(300,),
              learning_rate='adaptive', max_iter=500)

In [9]:
# Here we predict the values of test set.This gives us y_pred

y_pred = model.predict(x_test)
y_pre = model.predict([feature])

In [10]:
# Printing the the calculated value

print(y_pre)

['fearful']


In [14]:
# Here we are calculating the accuracy of our predicted values and printing the accuracy of our system

accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)

print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 71.88%


In [11]:
time.sleep(2)

In [12]:
# here after getting the predicted value we open the music video in YouTube, Here we add all different emotion 
# type video link of YouTube and wichever type of emotion we get it automatically plays it

import webbrowser

if y_pre[0]=='calm':
    webbrowser.open('https://www.youtube.com/watch?v=1ZYbU82GVz4')
elif y_pre[0]=='happy':
    webbrowser.open('https://www.youtube.com/watch?v=1W9YCSLKf-0')
elif y_pre[0]=='neutral':
    webbrowser.open('https://www.youtube.com/watch?v=_wX1C-uVvgk')
elif y_pre[0]=='sad':
    webbrwset.open('https://www.youtube.com/watch?v=UKyb_3gBmj4')
elif y_pre[0]=='angry':
    webbrwser.open('https://www.youtube.com/watch?v=7PSS1i-mgFI')
elif y_pre[0]=='fearful':
    webbrowser.open('https://www.youtube.com/watch?v=ziQ9GURNrUg')
elif y_pre[0]=='disgust':
    webbrowser.open('https://www.youtube.com/watch?v=k-bbxd9-4mU')
else:
    webbrowser.open('https://www.youtube.com/watch?v=A-sfd1J8yX4')