In [None]:
import librosa
import numpy as np
import sounddevice as sd
import tensorflow as tf
from keras.models import load_model
import pickle
import scipy.io.wavfile as wavfile

# Load your model and encoder
MODEL_PATH = "tess_emotion_model.h5"
ENCODER_PATH = "label_encoder.pkl"

model = load_model(MODEL_PATH)

# Load LabelEncoder
with open(ENCODER_PATH, "rb") as f:
    le = pickle.load(f)

def record_audio(duration=10, fs=44100, filename="live_audio.wav"):
    print("🎙️ Speak Now...")
    recording = sd.rec(int(duration * fs), samplerate=fs, channels=1)
    sd.wait()
    wavfile.write(filename, fs, recording)
    print("✅ Recording Done.")
    return filename

def extract_features(file_path):
    audio, sr = librosa.load(file_path, sr=16000)
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)

    # Fix length: Pad or truncate to shape (40, 174)
    if mfcc.shape[1] < 174:
        pad_width = 174 - mfcc.shape[1]
        mfcc = np.pad(mfcc, ((0, 0), (0, pad_width)), mode='constant')
    else:
        mfcc = mfcc[:, :174]

    return mfcc


def predict_emotion(audio_path):
    y, sr = librosa.load(audio_path, sr=16000)
    
    # Extract MFCC features
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    
    # Take the mean over time-axis (axis=1) to make it (40,)
    mfcc_mean = np.mean(mfcc, axis=1)
    
    # Now shape it to (40, 1)
    features = mfcc_mean.reshape(40, 1)
    
    # Add batch dimension (1, 40, 1)
    features = np.expand_dims(features, axis=1)
    
    # Predict
    prediction = model.predict(features)
    predicted_class = np.argmax(prediction)
    predicted_emotion = le.inverse_transform([predicted_class])[0]
    
    print("Predicted Emotion:", predicted_emotion)


# Run the system
if __name__ == "__main__":
    audio_path = record_audio(duration=5)  # record for 10 seconds
    predict_emotion(audio_path)


🎙️ Speak Now...
✅ Recording Done.
Predicted Emotion: disgust
