In [9]:
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model
import time


In [13]:
def main():
    # Load the trained LearNet model
    model_path = 'lear_net.keras'  # Path to your saved model
    model = load_model(model_path)
    print("Model loaded successfully")
    
    # Load face cascade classifier for face detection
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    
    # Define emotion labels - IMPORTANT: Update these to match your actual training labels
    # These should match the exact labels from your training data
    emotion_labels = ['tense', 'happiness', 'repression', 'disgust', 'surprise', 'contempt', 'fear', 'sadness']
    print(f"Using emotion labels: {emotion_labels}")
    
    # Initialize webcam
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("Error: Could not open webcam")
        return
    
    # Frame buffer for dynamic image computation
    frame_buffer = []
    max_buffer_size = 10  # Adjust based on your model's requirements
    
    # For FPS calculation
    prev_time = time.time()
    frame_count = 0
    fps = 0
    
    print("Starting live emotion detection. Press 'q' to quit.")
    
    while True:
        # Capture frame-by-frame
        ret, frame = cap.read()
        if not ret:
            print("Error: Failed to capture frame")
            break
            
        # Calculate FPS
        frame_count += 1
        current_time = time.time()
        if current_time - prev_time >= 1.0:
            fps = frame_count
            frame_count = 0
            prev_time = current_time
        
        # Convert to grayscale for face detection
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
        # Detect faces
        faces = face_cascade.detectMultiScale(gray, 1.1, 4)
        
        # Process each detected face
        for (x, y, w, h) in faces:
            # Draw rectangle around face
            cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
            
            # Extract face ROI
            face_roi = frame[y:y+h, x:x+w]
            
            # Resize to match model input size
            try:
                face_roi = cv2.resize(face_roi, (112, 112))
                
                # Add to frame buffer for dynamic image processing
                if len(frame_buffer) >= max_buffer_size:
                    frame_buffer.pop(0)  # Remove oldest frame
                frame_buffer.append(face_roi)
                
                # Process frame for prediction - using same preprocessing as training
                processed_face = face_roi.astype('float32') / 255.0  # Normalize
                processed_face = np.expand_dims(processed_face, axis=0)  # Add batch dimension
                
                # Make prediction
                prediction = model.predict(processed_face, verbose=0)
                emotion_idx = np.argmax(prediction[0])
                
                # Ensure index is within bounds
                if emotion_idx < len(emotion_labels):
                    emotion = emotion_labels[emotion_idx]
                    confidence = float(prediction[0][emotion_idx])
                    
                    # Display emotion and confidence
                    label = f"{emotion}: {confidence:.2f}"
                    cv2.putText(frame, label, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)
                else:
                    print(f"Warning: Predicted index {emotion_idx} out of range for emotion_labels")
                    cv2.putText(frame, "Unknown emotion", (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)
                    
            except Exception as e:
                print(f"Error processing face: {str(e)}")
                continue
        
        # Display FPS
        cv2.putText(frame, f"FPS: {fps}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                
        # Display the frame
        cv2.imshow('Micro-Expression Detection', frame)
        
        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    # Release resources
    cap.release()
    cv2.destroyAllWindows()

# Add an option to use dynamic image processing if your model is trained for it
def process_with_dynamic_image(frame_buffer, model, emotion_labels):
    """
    Process frames using dynamic image technique and predict emotion
    """
    if len(frame_buffer) < 3:  # Need at least a few frames
        return "Waiting...", 0.0
    
    # Generate dynamic image
    dynamic_img = get_dynamic_image(frame_buffer)
    processed_img = dynamic_img.astype('float32') / 255.0
    processed_img = np.expand_dims(processed_img, axis=0)
    
    # Predict
    prediction = model.predict(processed_img, verbose=0)
    emotion_idx = np.argmax(prediction[0])
    emotion = emotion_labels[emotion_idx]
    confidence = float(prediction[0][emotion_idx])
    
    return emotion, confidence

def get_dynamic_image(frames, normalized=True):
    """ 
    Takes a list of frames and returns either a raw or normalized dynamic image.
    """
    # Convert to numpy array if not already
    frames_array = np.array(frames)
    
    num_channels = frames_array[0].shape[2]
    channel_frames = _get_channel_frames(frames_array, num_channels)
    
    # Compute dynamic image for each channel
    channel_dynamic_images = []
    for channel in channel_frames:
        try:
            dynamic_channel = _compute_dynamic_image(channel)
            channel_dynamic_images.append(dynamic_channel)
        except Exception as e:
            print(f"Error in dynamic image computation: {str(e)}")
            # If dynamic image computation fails, use the last frame
            channel_dynamic_images.append(channel[-1])
    
    # Merge channels back together
    dynamic_image = cv2.merge(tuple(channel_dynamic_images))
    
    # Normalize if requested
    if normalized:
        dynamic_image = cv2.normalize(dynamic_image, None, 0, 255, norm_type=cv2.NORM_MINMAX)
        dynamic_image = dynamic_image.astype('uint8')
    
    return dynamic_image

def _get_channel_frames(frames, num_channels):
    """ 
    Takes a list of frames and returns a list of frame lists split by channel.
    """
    channel_frames = [[] for _ in range(num_channels)]
    
    for frame in frames:
        channels = cv2.split(frame)
        for i, channel in enumerate(channels):
            channel_frames[i].append(channel.reshape((*channel.shape[0:2], 1)))
    
    # Convert to numpy arrays
    for i in range(len(channel_frames)):
        channel_frames[i] = np.array(channel_frames[i])
    
    return channel_frames

def _compute_dynamic_image(frames):
    """
    Compute the dynamic image for a single channel.
    """
    num_frames, h, w, depth = frames.shape
    
    # Modified implementation to avoid potential shape issues
    fw = np.zeros(num_frames)
    for n in range(num_frames):
        cumulative_indices = np.array(range(n, num_frames))
        fw[n] = np.sum(((2*(cumulative_indices+1)) - num_frames) / (cumulative_indices+1))
    
    # Reshape weights for broadcasting
    fw = fw.reshape((-1, 1, 1, 1))
    
    # Multiply frames by weights and sum
    weighted_sum = np.sum(frames * fw, axis=0)
    
    return weighted_sum

if __name__ == "__main__":
    main()

Model loaded successfully
Using emotion labels: ['tense', 'happiness', 'repression', 'disgust', 'surprise', 'contempt', 'fear', 'sadness']
Starting live emotion detection. Press 'q' to quit.
