In [25]:
import cv2
import sounddevice as sd
import numpy as np
import tensorflow as tf

# Load your pre-trained models for video and audio
video_model = tf.keras.models.load_model('EmotionDetectionImageModel.keras')
audio_model = tf.keras.models.load_model('AudioDetectionEmotionModel.keras')

# Parameters for audio
SAMPLE_RATE = 16000  # Hz
DURATION = 1  # Seconds (e.g., 1 second of audio)

def get_video_prediction(frame):
    frame = cv2.flip(frame, 1)
    # Preprocess the frame (resize, convert to grayscale, normalize, etc.)
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    resized_frame = cv2.resize(gray, (48, 48))  # Resize to match your model's input size (48x48)
    normalized_frame = resized_frame / 255.0  # Normalize pixel values

    # Reshape the frame to match model input (1, 48, 48, 1) for grayscale images
    input_frame = np.expand_dims(normalized_frame, axis=0)
    input_frame = np.expand_dims(input_frame, axis=-1)

    # Make a prediction
    prediction = model.predict(input_frame)
    emotion_label = np.argmax(prediction)  # Get the label with the highest probability
    
    # Add text to the frame
    emotions = ['Angry', 'Disgust', 'Fear', 'Happy', 'Neutral', 'Sad','Surprise']  # Update with your model's labels
    label_text = emotions[emotion_label]    
    # Display the frame with the predicted emotion
    cv2.imshow('Emotion Detection', frame)
    return prediction

def get_audio_prediction(audio_data):
    # Preprocess audio for the audio model
    audio_data_normalized = audio_data / np.max(np.abs(audio_data))  # Normalize
    audio_input = np.expand_dims(audio_data_normalized, axis=0)

    # Get audio model prediction
    audio_prediction = audio_model.predict(audio_input)
    return audio_prediction

def audio_callback(indata, frames, time, status):
    """Callback function to capture audio."""
    if status:
        print(status)
    audio_data = indata[:, 0]  # Use the first channel (mono)

    # Get prediction for the audio
    audio_prediction = get_audio_prediction(audio_data)
    print(f"Audio Prediction: {audio_prediction}")

# Open video capture (Webcam)
cap = cv2.VideoCapture(0)

# Start audio stream
with sd.InputStream(channels=1, samplerate=SAMPLE_RATE, callback=audio_callback):
    while True:
        # Capture frame-by-frame from webcam
        ret, frame = cap.read()
        if not ret:
            break

        # Show the video frame
        # Get prediction for the current video frame
        video_prediction = get_video_prediction(frame)
        print(f"Video Prediction: {video_prediction}")

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

# Release the webcam and close windows
cap.release()
cv2.destroyAllWindows()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 182ms/step
Audio Prediction: [[0.14420727 0.1445736  0.1404335  0.14325292 0.14283031 0.1430397
  0.14166263]]
input overflow
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
Audio Prediction: [[0.14306127 0.14333262 0.14227445 0.14274043 0.1427921  0.14278394
  0.14301513]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Audio Prediction: [[0.14405558 0.14480294 0.13981391 0.14373305 0.14274487 0.14211349
  0.14273614]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
Video Prediction: [[0.11233803 0.00079982 0.27250272 0.22015671 0.14077538 0.1873875
  0.06603975]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
Audio Prediction: [[0.14375801 0.14398934 0.14174128 0.14176406 0.14221683 0.14337641
  0.14315413]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
input overflow
Video Prediction: [[0.1176979  0.0

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step
3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
2
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
2
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━

2.17.0
