In [6]:
import librosa
import numpy as np
from tensorflow.keras.models import load_model
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import pickle

# Load your saved model
model = load_model("speech_emotion_cnn_bilstm.h5")

# Load the scaler and label encoder saved during training
with open("label_encoder.pkl", "rb") as f:
    encoder = pickle.load(f)
with open("scaler.pkl", "rb") as f:
    scaler = pickle.load(f)

def preprocess_audio(file_path, n_mfcc=13, max_len=130):
    signal, sr = librosa.load(file_path, sr=16000)
    mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=n_mfcc)
    mfcc = mfcc.T
    if mfcc.shape[0] < max_len:
        pad_width = max_len - mfcc.shape[0]
        mfcc = np.pad(mfcc, pad_width=((0, pad_width), (0,0)), mode='constant')
    else:
        mfcc = mfcc[:max_len, :]
    # Flatten to 1D for scaler, then reshape back
    mfcc_flat = mfcc.flatten().reshape(1, -1)
    mfcc_scaled = scaler.transform(mfcc_flat)
    mfcc_scaled = mfcc_scaled.reshape(1, max_len, n_mfcc)
    return mfcc_scaled

# Example: test on your wav file
file_path = "yt_audio.wav"  # change to your test wav file path
input_data = preprocess_audio(file_path)

prediction = model.predict(input_data)
predicted_index = np.argmax(prediction)
predicted_emotion = encoder.inverse_transform([predicted_index])[0]

print(f"Predicted Emotion: {predicted_emotion}")

Predicted Emotion: happy
Predicted Emotion: happy
