In [1]:
import librosa
import librosa.display
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model

In [2]:
def extract_features(filename):
    features = np.empty((0,193))
    X, sample_rate = librosa.load(filename)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
    ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
    features = np.vstack([features,ext_features])
    return features

In [3]:
def predict_sound(filepath, sound_file_path):
    emotions = ["Anger","Excitement","Fear","Joy","Relaxing","Sadness",
               "Surprise"]
    with tf.device('/cpu:0'):
        model = load_model(filepath, compile = True)
    
    sound = extract_features(sound_file_path)
    
    prediction = model.predict(sound)
    
    if len(prediction) == 0: 
        print ("No prediction")

    ind = np.argpartition(prediction[0], -2)[-2:]
    ind[np.argsort(prediction[0][ind])]
    ind = ind[::-1]

    print ("Top guess: ", emotions[ind[0]], " (",round(prediction[0,ind[0]],3),")")
    print ("2nd guess: ", emotions[ind[1]], " (",round(prediction[0,ind[1]],3),")")

In [4]:
sound = 'sounds/shopping.wav'
model = 'best_MPLEmotions_model.h5'
predict_sound(model, sound)

Top guess:  Anger  ( 0.272 )
2nd guess:  Surprise  ( 0.239 )


In [5]:
sound2 = 'sounds/hospital-corridor.wav'
predict_sound(model, sound2)

Top guess:  Anger  ( 0.349 )
2nd guess:  Fear  ( 0.299 )


In [6]:
sound3 = 'sounds/enviroment-hospital.wav'
predict_sound(model, sound3)

Top guess:  Anger  ( 0.296 )
2nd guess:  Surprise  ( 0.267 )


In [7]:
sound4 = 'sounds/lo-fi-beat.wav'
predict_sound(model, sound4)

Top guess:  Fear  ( 0.384 )
2nd guess:  Surprise  ( 0.356 )


In [8]:
sound5 = 'sounds/forest.wav'
predict_sound(model, sound5)

Top guess:  Anger  ( 0.269 )
2nd guess:  Surprise  ( 0.194 )


In [9]:
sound6 = 'sounds/music_relax.wav'
predict_sound(model, sound6)

Top guess:  Joy  ( 0.461 )
2nd guess:  Relaxing  ( 0.212 )
