Live Inference: A predict.py script that accepts an unseen .wav file, processes it, and
prints the predicted emotion with a confidence percentage.

In [11]:
import numpy as np
import librosa
import tensorflow as tf


In [12]:
from tensorflow.keras.models import load_model

model = load_model("../model/ser_enhanced_best.keras") #my model adress
print("Model loaded successfully")


Model loaded successfully


In [13]:
emotion_labels = {
    0: "Neutral",
    1: "Calm",
    2: "Happy",
    3: "Sad",
    4: "Angry",
    5: "Fearful",
    6: "Disgust",
    7: "Surprised"
}


In [14]:
def preprocess_audio(file_path):
    y, sr = librosa.load(file_path, sr=22050)
    
    # Trim silence
    y, _ = librosa.effects.trim(y, top_db=20)

    # Mel spectrogram
    mel = librosa.feature.melspectrogram(
        y=y,
        sr=sr,
        n_mels=128
    )

    mel_db = librosa.power_to_db(mel, ref=np.max)

    # Here we Pad / crop to (128, 128)
    if mel_db.shape[1] < 128:
        pad_width = 128 - mel_db.shape[1]
        mel_db = np.pad(mel_db, ((0, 0), (0, pad_width)))
    else:
        mel_db = mel_db[:, :128]

    return mel_db


In [15]:
def predict_emotion(file_path):
    mel = preprocess_audio(file_path)

    # Shape: (1, 128, 128, 1)
    mel = mel[..., np.newaxis]
    mel = np.expand_dims(mel, axis=0)

    probs = model.predict(mel)[0]

    pred_idx = np.argmax(probs)
    confidence = probs[pred_idx] * 100

    print("Prediction Result")
    print("-----------------")
    print(f"Emotion    : {emotion_labels[pred_idx]}")
    print(f"Confidence : {confidence:.2f}%")


In [16]:
# Replace with your testing test audio file
test_wav = "../data/RAVDESS/audio_speech_actors_01-24/Actor_14/03-01-08-01-02-01-14.wav"  #here add the testing file
predict_emotion(test_wav)



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
Prediction Result
-----------------
Emotion    : Surprised
Confidence : 97.53%
