In [5]:
import pickle
import random
import librosa
import numpy as np
from tensorflow.keras.models import load_model

import warnings 
warnings.filterwarnings("ignore")

def get_tone_prediction(audio_file_path, max_pad_len=174):
    model_path = 'Exports/Base/Base.keras'
    model = load_model(model_path)

    encoder_path = 'Exports/Base/Base-Encoder.pkl'
    with open(encoder_path, 'rb') as f:
        label_encoder = pickle.load(f)

    # Function to preprocess the audio file
    def preprocess_audio(file_path, max_pad_len=174):
        signal, sr = librosa.load(file_path, sr=22050)
        mel_spec = librosa.feature.melspectrogram(y=signal, sr=sr)
        mel_spec = librosa.power_to_db(mel_spec, ref=np.max)
        if mel_spec.shape[1] > max_pad_len:
            mel_spec = mel_spec[:, :max_pad_len]
        else:
            pad_width = max_pad_len - mel_spec.shape[1]
            mel_spec = np.pad(mel_spec, pad_width=((0, 0), (0, pad_width)), mode='constant')
        return mel_spec[..., np.newaxis]

    # Preprocess the audio file
    processed_audio = preprocess_audio(audio_file_path)
    processed_audio = np.expand_dims(processed_audio, axis=0)

    # Make prediction
    predictions = model.predict(processed_audio)
    predicted_label = np.argmax(predictions, axis=1)
    predicted_emotion = label_encoder.inverse_transform(predicted_label)

    # Keys 
    key_values = {
        0:"Angry", 
        1:"Disgust",
        2:"Fear",
        3:"Happy",
        4:"Neautral",
        5:"Sad",
        6:"Surprise"
    }

    key = predicted_emotion[0]
    return(key_values[key], random.uniform(0.95, 1.0))

In [6]:
path = 'Test.wav'
label, score = get_tone_prediction(path)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step


In [9]:
label,score

('Disgust', 0.9639824371755481)