In [11]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import librosa

In [12]:
# LABELDICT:
labeldict={0: 'Sadness',
 1: 'Excited',
 2: 'Happiness',
 3: 'Anger',
 4: 'Frustration',
 5: 'Other'}

In [13]:
# Decode the one hot encoded:
def decode_emotion(one_hot):
    idx_arg_max = np.argmax(one_hot)
    return labeldict[idx_arg_max]

In [14]:
def get_mel(path):
    data, _ = librosa.load(path, sr=44100)
    mels = librosa.feature.melspectrogram(y=data, sr=44100, n_mels=256)
    return mels

In [28]:
def preprocess_input(path): # Returns a list of x (batch_size, timesteps, feature)
    # Preprocess x:
    x = get_mel(path)
    # Reshaping so that the order is not messed up
    x = x.reshape(1, 256, -1)
    # Transposing so that we have timesteps in dim 1
    x = x.transpose((0, 2, 1))
    # Convert to tensor and of type tf.float16 for faster operation
    x = tf.convert_to_tensor(x, dtype=tf.float16)
    return x

In [29]:
# Load the model
model = keras.models.load_model('./MEL_LSTM.h5')



In [35]:
def predict_emotion(path):
    data = preprocess_input(path)
    predicted = model(data, training=False)
    result = decode_emotion(predicted)
    print(f"The emotion associated with the file is {result}")
    return result

In [36]:
predict_emotion('./DATA/Sadness/Ses01F_impro02_F005.wav')

The emotion associated with the file is Sadness


'Sadness'