In [2]:
"""
This file can be used to try a live prediction.
"""
%pip install tensorflow==2.4.1 keras==2.4.3 numpy librosa

import tensorflow as tf
from tensorflow import keras
import numpy as np
import librosa
import time

class livePredictions:
    """
    Main class of the application.
    """

    def __init__(self, path, file):
        """
        Init method is used to initialize the main parameters.
        """
        self.path = path
        self.file = file

    def load_model(self):
        """
        Method to load the chosen model.
        :param path: path to your h5 model.
        :return: summary of the model with the .summary() function.
        """
        start_time = time.time()
        self.loaded_model = keras.models.load_model(self.path)
        end_time = time.time()
        print(f"Model loading time: {end_time - start_time:.4f} seconds")
        return self.loaded_model.summary()

    def makepredictions(self):
        """
        Method to process the files and create your features.
        """
        start_time = time.time()
        data, sampling_rate = librosa.load(self.file)
        data_load_time = time.time()
        print(f"Audio loading time: {data_load_time - start_time:.4f} seconds")
        
        mfccs = np.mean(librosa.feature.mfcc(y=data, sr=sampling_rate, n_mfcc=40).T, axis=0)
        feature_extraction_time = time.time()
        print(f"Feature extraction time: {feature_extraction_time - data_load_time:.4f} seconds")
        
        x = np.expand_dims(mfccs, axis=1)
        x = np.expand_dims(x, axis=0)
        
        predictions = self.loaded_model.predict(x)
        prediction_time = time.time()
        print(f"Prediction time: {prediction_time - feature_extraction_time:.4f} seconds")
        
        predicted_class = np.argmax(predictions, axis=1)
        print("Prediction is", " ", self.convertclasstoemotion(predicted_class[0]))

    @staticmethod
    def convertclasstoemotion(pred):
        """
        Method to convert the predictions (int) into human readable strings.
        """
        
        label_conversion = {'0': 'neutral',
                            '1': 'calm',
                            '2': 'happy',
                            '3': 'sad',
                            '4': 'angry',
                            '5': 'fearful',
                            '6': 'disgust',
                            '7': 'surprised'}

        for key, value in label_conversion.items():
            if int(key) == pred:
                label = value
        return label

# Here you can replace path and file with the path of your model and of the file 
#from the RAVDESS dataset you want to use for the prediction,
# Below, I have used a neutral file: the prediction made is neutral.

pred = livePredictions(path='SER_model.h5',file='Recording (10).wav')

pred.load_model()
pred.makepredictions()


Note: you may need to restart the kernel to use updated packages.
Model loading time: 0.2320 seconds
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_1 (Conv1D)            (None, 40, 64)            384       
_________________________________________________________________
activation_1 (Activation)    (None, 40, 64)            0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 40, 64)            0         
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 10, 64)            0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 10, 128)           41088     
_________________________________________________________________
activation_2 (Activation)    (None, 10, 128)           0         
___________________