In [None]:
from collections import Counter
import joblib
import librosa
import numpy as np
import speech_recognition as sr
from sklearn.preprocessing import StandardScaler
import re

In [None]:
log_model = joblib.load('logistic_text_emotion.pkl')
xgb_model = joblib.load('xgb_speech_emotion.pkl')

vectorizer = joblib.load('tfidf_vectorizer.pkl')
scaler = joblib.load('scaler.pkl')
label_encoder_text = joblib.load('label_encoder_logistic.pkl')
label_encoder_speech = joblib.load('label_encoder.pkl')

In [3]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r'\W', ' ', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

In [4]:
def extract_features(file_path):
    y, sr = librosa.load(file_path, duration=5)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    return np.mean(mfcc.T, axis=0)  # Compute mean MFCC features


In [5]:
def speech_to_text(audio_path):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio_path) as source:
        audio_data = recognizer.record(source)
    try:
        text = recognizer.recognize_google(audio_data)
        return text
    except sr.UnknownValueError:
        return None  # Return None if speech is not recognized
    except sr.RequestError:
        return None  # Return None if service is unavailable

In [6]:
def get_emotion_probabilities(text_emotion, speech_emotion):
    emotions = [text_emotion, speech_emotion]
    counts = Counter(emotions)
    total = sum(counts.values())

    prob_text = (counts[text_emotion] / total) * 100
    prob_speech = (counts[speech_emotion] / total) * 100

    return f"{text_emotion}: {prob_text:.1f}%, {speech_emotion}: {prob_speech:.1f}%"


In [None]:
def predict_emotion(audio_path):
    #  Speech-to-Text
    text = speech_to_text(audio_path)
    
    #  Predict Emotion from Text (KNN)
    if text:
        text_cleaned = clean_text(text)
        text_vectorized = vectorizer.transform([text_cleaned]).toarray()
        text_vectorized = scaler.transform(text_vectorized)
        text_emotion_index = log_model.predict(text_vectorized)[0]
        text_emotion = label_encoder_text.inverse_transform([text_emotion_index])[0]
    else:
        text_emotion = "Unknown"  # Handle case when speech-to-text fails

    #  Predict Emotion from Speech (XGBoost)
    speech_features = extract_features(audio_path).reshape(1, -1)
    speech_emotion_index = xgb_model.predict(speech_features)[0]
    speech_emotion = label_encoder_speech.inverse_transform([speech_emotion_index])[0]

    #  Combine Results with Probability Calculation
    if text_emotion == speech_emotion:
        final_emotion = text_emotion  # If both models agree
    else:
        final_emotion = get_emotion_probabilities(text_emotion, speech_emotion)

    print(f"\ Predicted Text Emotion: {text_emotion}")
    print(f"🎤 Predicted Speech Emotion: {speech_emotion}")
    print(f"✅ Final Emotion: {final_emotion}")

    return final_emotion

In [None]:
audio_path = r"D:\text_emotion - Copy - Copy\text_emotion\archive (1)\Actor_11\03-01-08-02-01-02-11.wav" # Replace with actual file path
predict_emotion(audio_path)


 Predicted Text Emotion: Excited
Predicted Speech Emotion: calm
 Final Emotion: Mixed Emotion: Excited (50%) & calm (50%)


'Mixed Emotion: Excited (50%) & calm (50%)'

In [23]:
from collections import Counter

# Example emotion probabilities (you can tweak based on confidence scores)
def get_emotion_probabilities(text_emotion, speech_emotion):
    emotions = [text_emotion, speech_emotion]
    counts = Counter(emotions)
    total = sum(counts.values())
    
    prob_text = (counts[text_emotion] / total) * 100
    prob_speech = (counts[speech_emotion] / total) * 100

    return f"{text_emotion}: {prob_text:.1f}%, {speech_emotion}: {prob_speech:.1f}%"

# Modify the `predict_emotion` function to use this
if text_emotion == speech_emotion:
    final_emotion = text_emotion
else:
    final_emotion = get_emotion_probabilities(text_emotion, speech_emotion)


NameError: name 'text_emotion' is not defined