In [None]:
import cv2
import sounddevice as sd
import numpy as np
import tensorflow as tf

# Load your pre-trained models for video and audio
video_model = tf.keras.models.load_model('EmotionDetectionImageModel.keras')
audio_model = tf.keras.models.load_model('AudioDetectionEmotionModel.keras')

# Parameters for audio
SAMPLE_RATE = 16000  # Hz
DURATION = 1  # Seconds (e.g., 1 second of audio)

def get_video_prediction(frame):
    framed = cv2.flip(frame, 1)
    # Preprocess the frame (resize, convert to grayscale, normalize, etc.)
    gray = cv2.cvtColor(framed, cv2.COLOR_BGR2GRAY)
    resized_frame = cv2.resize(gray, (48, 48))  # Resize to match your model's input size (48x48)
    normalized_frame = resized_frame / 255.0  # Normalize pixel values

    # Reshape the frame to match model input (1, 48, 48, 1) for grayscale images
    input_frame = np.expand_dims(normalized_frame, axis=0)
    input_frame = np.expand_dims(input_frame, axis=-1)

    # Preprocess frame for the video model
    
    # Get video model prediction
    video_prediction = video_model.predict(input_frame)
    return video_prediction

def get_audio_prediction(audio_data):
    # Preprocess audio for the audio model
    audio_data_normalized = audio_data / np.max(np.abs(audio_data))  # Normalize
    audio_input = np.expand_dims(audio_data_normalized, axis=0)
    
    # Get audio model prediction
    audio_prediction = audio_model.predict(audio_input)
    return audio_prediction

def audio_callback(indata, frames, time, status):
    """Callback function to capture audio."""
    if status:
        print(status)
    audio_data = indata[:, 0]  # Use the first channel (mono)
    
    # Get prediction for the audio
    audio_prediction = get_audio_prediction(audio_data)
    print(f"Audio Prediction: {audio_prediction}")

# Open video capture (Webcam)


# Start audio stream
with sd.InputStream(channels=1, samplerate=SAMPLE_RATE, callback=audio_callback):
    cap = cv2.VideoCapture(0)
    while True:
        # Capture frame-by-frame from webcam
        ret, frame = cap.read()
        if not ret:
            break
        
        # Show the video frame
        cv2.imshow('Webcam', frame)
        
        # Get prediction for the current video frame
        video_prediction = get_video_prediction(frame)
        print(f"Video Prediction: {video_prediction}")
        
        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

# Release the webcam and close windows
cap.release()
cv2.destroyAllWindows()
