In [15]:
import librosa
import numpy as np
from tensorflow.keras.models import load_model
from sklearn.preprocessing import LabelEncoder
import pandas as pd

# Load your saved model
model = load_model("speech_emotion_cnn_bilstm.h5")

# Load the label encoder fitted during training
# (Assuming you saved your labels somewhere; otherwise, recreate from dataset)
# Here is a simple way if you have your dataset csv:
df = pd.read_csv("ravdess_all_actors_full_mfcc.csv")
df = df[df['emotion'] != 'unknown'].reset_index(drop=True)
labels = df['emotion'].unique()
encoder = LabelEncoder()
encoder.fit(labels)

def preprocess_audio(file_path, n_mfcc=13, max_len=130):
    signal, sr = librosa.load(file_path, sr=16000)
    mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=n_mfcc)
    mfcc = mfcc.T

    if mfcc.shape[0] < max_len:
        pad_width = max_len - mfcc.shape[0]
        mfcc = np.pad(mfcc, pad_width=((0, pad_width), (0,0)), mode='constant')
    else:
        mfcc = mfcc[:max_len, :]

    mfcc = np.expand_dims(mfcc, axis=0)  # batch dimension
    return mfcc

# Example: test on your wav file
file_path = "yt_audio.wav"  # change to your test wav file path
input_data = preprocess_audio(file_path)

prediction = model.predict(input_data)
predicted_index = np.argmax(prediction)
predicted_emotion = encoder.inverse_transform([predicted_index])[0]

print(f"Predicted Emotion: {predicted_emotion}")


Predicted Emotion: angry
