In [15]:
import cv2
import numpy as np
import tensorflow as tf

# Load the emotion detection model (Boredom, Confusion, Engagement, Frustration)
emotion_model = tf.keras.models.load_model('my_model.h5')
emotion_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Load the activity detection model (Sleep, Yawn, Active)
activity_model = tf.keras.models.load_model('vgg_model_3_labels.h5')
activity_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Initialize YOLO for face detection
yolo_net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
layer_names = yolo_net.getLayerNames()
output_layers = [layer_names[i - 1] for i in yolo_net.getUnconnectedOutLayers()]

# Labels for emotion and activity
emotion_labels = ['Boredom', 'Confusion', 'Engagement', 'Frustration']
activity_labels = ['Sleep', 'Yawn', 'Active']

# Start video capture
cap = cv2.VideoCapture("happy.mp4")  # Use the webcam instead of '1.mp'

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    height, width, _ = frame.shape
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
    yolo_net.setInput(blob)
    detections = yolo_net.forward(output_layers)

    boxes = []
    confidences = []

    # Loop over all detections and find the best face
    for detection in detections:
        for obj in detection:
            scores = obj[5:]
            confidence = max(scores)
            if confidence > 0.5:  # Confidence threshold for face detection
                center_x, center_y, w, h = (obj[:4] * [width, height, width, height]).astype(int)
                x, y = int(center_x - w / 2), int(center_y - h / 2)
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))

    # Non-maxima suppression to avoid multiple boxes around the same face
    indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

    if len(indices) > 0:
        for i in indices.flatten():
            x, y, w, h = boxes[i]

            # Crop the face from the frame
            face = frame[y:y + h, x:x + w]

            if face.shape[0] == 0 or face.shape[1] == 0:
                continue

            # Preprocessing for emotion model (convert to grayscale, resize, normalize)
            face_gray = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
            face_gray = cv2.resize(face_gray, (224, 224))  # Resize to match the input size
            face_gray = np.stack([face_gray] * 3, axis=-1)  # Convert grayscale to RGB
            face_gray = face_gray / 255.0  # Normalize the image
            face_gray = np.expand_dims(face_gray, axis=0)  # Add batch dimension

            # Debugging: Check the input shape
            print(f"Input shape to emotion model: {face_gray.shape}")

            # Get emotion prediction from the emotion detection model
            emotion_preds = emotion_model.predict(face_gray)
            emotion_label = np.argmax(emotion_preds)

            # Display the emotion on the frame
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
            cv2.putText(frame, f"Emotion: {emotion_labels[emotion_label]} ({round(max(emotion_preds[0]), 2)})",
                        (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

    # Display the result on the frame
    cv2.imshow('Emotion Detection', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()




Input shape to emotion model: (1, 224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 758ms/step
Input shape to emotion model: (1, 224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 333ms/step
Input shape to emotion model: (1, 224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 324ms/step
Input shape to emotion model: (1, 224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 326ms/step
Input shape to emotion model: (1, 224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 342ms/step
Input shape to emotion model: (1, 224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 406ms/step
Input shape to emotion model: (1, 224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 491ms/step
Input shape to emotion model: (1, 224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 998ms/step
Input shape to emotion model: (1, 224, 2

KeyboardInterrupt: 