In [1]:
import cv2
import mediapipe as mp 
import numpy as np
import os
import json
import glob
from os import listdir

In [2]:

class ImageProcessor():
    # Initializes standard image sizes: width and height = 64. Changing these values changes values in resize functions
    def __init__(self, image_size=(64, 64)):
        self.target_size = image_size

    # open and read images from specified file path
    def load_image(self, image_path):
        # Loads an image from the given path and returns it as a numpy array
        image = cv2.imread(image_path)
        if image is None:
            print(f"ERROR: Unable to read image at {image_path}")
        return image

     # ensures that the input is not an empty array
    def check_if_valid(self, image):
        if not isinstance(image, np.ndarray):
            return False  # return False if not valid
        return True  # return True if valid

     # checks to see if image is grayscale
    def convert_to_grayscale(self, image):
        # RGB colored images usually have 3 dimensions (width, height, color)
        # if the thrid dimesion of the tuple (represented by index of 2) has the color channel of RGB, then convert to grayscale
        if len(image.shape) == 3 and image.shape[2] == 3:
            image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        return image

    #scales pixels to [0,1]
    def normalize_image(self, image):
      #converts to float32 in case data is integers
      normalized_image = image.astype(np.float32) / 255.0
      return normalized_image

    # used to sharpen images using cv2 kernels (if data requires image sharpening)
    def sharpen_image (self, image):
      kerenel_sharpening = np.array([[-1,-1,-1],
                                      [-1,9,-1],
                                      [-1,-1,-1]])
      sharpened_image = cv2.filter2D(image, -1, kerenel_sharpening)
      return sharpened_image

    # (1) resizes the image to target size using INTER_AREA interpolation, recommended when downscaling original image
    def downscale_image(self, image):
      return cv2.resize(image, self.target_size, interpolation=cv2.INTER_AREA)

    # (2) resizes the image to target size using INTER_CUBIC interpolation, recommended when upscaling original image
    def upscale_image(self, image):
      return cv2.resize(image, self.target_size, interpolation=cv2.INTER_CUBIC)

    # (3) resizes an image to target size while maintaining ratios (uses black padding to cover open space), prevents distortion.
    # might need to change this function a little if image is not grayscaled
    def resize_with_aspect_ratio(self, image, pad_color):
      h, w = image.shape[:2]  #gets original dimensions of image
      target_h, target_w = self.target_size  #gets target dimensions

      aspect_original = w / h  #original aspect ratio
      aspect_target = target_w / target_h  #target aspect ratio

      # If the image already fits within the target size, no padding is required.
      if h == target_h and w == target_w:
          return image  # Return the image without changes if no padding is needed

      # if our model does not need all images to be exact same size, padding not necessary
      padded_image = np.zeros((target_h, target_w), dtype=np.uint8)  #creates blank image with target size (padding)
      padded_image[:] = pad_color  #sets padded image color to black

      #if original image is wider, recalculates height and sets width to max width (target width)
      if aspect_original > aspect_target:
          new_w = target_w
          new_h = int(new_w / aspect_original)
          #resizes image and uses either INTER_AREA or INTER_CUBIC depending on downscale or upscale
          if new_h <= h:
            resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_AREA)
          else:
            resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_CUBIC)

          y_offset = (target_h - new_h) // 2  #calculates offset to center image on padded image (idk if we need this tbh)
          padded_image[y_offset:y_offset + new_h, :] = resized  #places image on padded image

      #if original image is taller, recalculates width and sets height to max height (target height)
      else:
          new_h = target_h
          new_w = int(new_h * aspect_original)
          if new_w <= w:
            resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_AREA)
          else:
            resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_CUBIC)

          x_offset = (target_w - new_w) // 2
          padded_image[:, x_offset:x_offset + new_w] = resized

      return padded_image  #returns original image


    def clean_landmarks(self, landmarks_array):
      if landmarks_array is None or len(landmarks_array) == 0:
        return np.array([])
      cleaned_landmarks = landmarks_array.tolist()
      num_landmarks = len(cleaned_landmarks[0])
      all_values = [landmark[i] for landmark in cleaned_landmarks for i in range(num_landmarks) if landmark[i] is not None]
      if all_values:
        mean_val = np.mean(all_values)
        for j in range(len(cleaned_landmarks)):
          for i in range(num_landmarks):
            if cleaned_landmarks[j][i] is None:
              cleaned_landmarks[j][i] = mean_val
      else:
        print("Warning: No valid data for mean calculation, skipping mean replacement")
      all_values = [landmark[i] for landmark in cleaned_landmarks for i in range(num_landmarks) if landmark[i] is not None]
      if all_values:
        q1 = np.quantile(all_values, 0.25)
        q3 = np.quantile(all_values, 0.75)
        iqr = q3 - q1
        lower_bound = q1 - 1.5 * iqr
        upper_bound = q3 + 1.5 * iqr
        cleaned_landmarks_filtered = []
        for landmark in cleaned_landmarks:
          if all(lower_bound <= landmark[i] <= upper_bound for i in range(num_landmarks)):
            cleaned_landmarks_filtered.append(landmark)
        return np.array(cleaned_landmarks_filtered)
      else:
        print("Warning: No valid data for outlier detection, returning unfiltered landmarks")
        return np.array(cleaned_landmarks)


        def process_image(self, image_path): #added this needed function.
          image = self.load_image(image_path)
          if image is None:
              return (None, image_path, None)

          image = self.convert_to_grayscale(image)
          image = self.resize_with_aspect_ratio(image, 0)
          image = self.normalize_image(image)

          return (image, image_path, None)


    #function to process images in a file
    #takes landmarks as a dictionary where keys are image filenames and values are corresponding landmark data
    #returns a list of tuples, where each tuple contains (processed_image, image_path, cleaned_landmarks(if applicable))
    def process_multiple_image(self, folder_path, landmarks_dict=None):
      results = []
      for filename in os.listdir(folder_path):
          image_path = os.path.join(folder_path, filename)
          processed_result = self.process_image(image_path)

          if processed_result[0] is not None:
              results.append(processed_result)

      if landmarks_dict is not None:
          cleaned_landmarks_dict = self.process_landmark_dictionary_combined(landmarks_dict)
          for i, result in enumerate(results):
              image_name = os.path.basename(result[1])
              if cleaned_landmarks_dict.get(image_name) is not None:
                  results[i] = (result[0], result[1], cleaned_landmarks_dict[image_name])
      return results

    mp_face_mesh = mp.solutions.face_mesh
    mp_face_detection = mp.solutions.face_detection
    mp_drawing = mp.solutions.drawing_utils
    mp_drawing_styles = mp.solutions.drawing_styles

    def process_images_with_landmarks_in_folder(self, folder_path):
        # Used to store the landmarks
        all_landmarks = {}

        # Get all image paths (both .jpg and .png)
        image_paths = glob.glob(os.path.join(folder_path, "*.jpg")) + glob.glob(os.path.join(folder_path, "*.png"))

        # Loop through each image in the folder
        for image_path in image_paths:
            processor = ImageProcessor(image_size=(128, 128))

            # Get each image in the folder
            image = processor.load_image(image_path)
            if image is None:
                print(f"Error: Could not load image {image_path}")
                all_landmarks[image_path] = None
                continue

            # Add mesh to the faces
            face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1, refine_landmarks=True)
            face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5)

            # Detect the face so we can get the landmarks
            faces = detect_faces(image, face_detection)
            if not faces:
                print(f"No faces detected in image: {image_path}")
                all_landmarks[image_path] = None
                continue

            # Extract landmarks
            landmarks_dict = extract_facial_landmarks(image, face_mesh)
            if not landmarks_dict:
                print(f"No facial landmarks detected in image: {image_path}")
                all_landmarks[image_path] = None
                continue

            # Process landmarks
            processed_landmarks_dict = {}
            for face_idx, landmarks in landmarks_dict.items():
                processed_landmarks = processor.process_landmarks(landmarks)
                processed_landmarks_dict[face_idx] = processed_landmarks

            # Store the processed landmarks in the dictionary
            all_landmarks[image_path] = processed_landmarks_dict

        return all_landmarks




In [3]:
import cv2
import mediapipe as mp 
import numpy as np
import os
import json
import glob
from os import listdir
import time

class VideoProcessor:
    def __init__(self, video_path=None, image_size=(128, 128)):
        self.image_processor = ImageProcessor(image_size) if 'ImageProcessor' in globals() else None
        self.video_path = video_path
        self.frames_data = {}  # Will store frame-by-frame data
        self.frame_count = 0
        self.expression_transitions = []
        self.expression_durations = {}  # Track how long expressions last
        self.current_expression = None
        self.expression_start_frame = 0
        
        # Initialize MediaPipe components
        self.mp_face_mesh = mp.solutions.face_mesh
        self.mp_drawing = mp.solutions.drawing_utils
        self.mp_drawing_styles = mp.solutions.drawing_styles
        self.my_drawing_specs = self.mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=1)
        
        # Expression detection thresholds - can be calibrated
        self.expression_thresholds = {
            'mouth_ratio_surprise': 0.7,    # Mouth height/width ratio for surprise
            'mouth_ratio_smile': 0.45,      # Mouth height/width ratio for smile (increased)
            'mouth_width_smile': 0.065,     # Minimum mouth width for smile (reduced)
            'mouth_width_increase': 0.005,  # Minimum increase in width to detect smile
            'smile_measure_threshold': 0.001, # Minimum smile measure value
            'eye_closed_threshold': 0.015,  # Eye openness below this is considered closed
            'brow_raised_threshold': 0.04,  # Eyebrow height for raised brows
            'brow_furrowed_threshold': 0.02 # Eyebrow lowering for furrow
        }
        
        # Store baseline measurements for relative changes
        self.baseline_measurements = {
            'mouth_width': None,
            'mouth_height': None,
            'smile_measure': None
        }
        
        # Frames to wait before establishing baseline
        self.baseline_frames = 30
        self.frames_processed = 0
    
    def capture_webcam_improved(self):
        """Process video from webcam with improved key detection and expression tracking"""
        cap = cv2.VideoCapture(1)  # Try 0 if 1 doesn't work
        if not cap.isOpened():
            cap = cv2.VideoCapture(0)  # Fall back to default camera
            if not cap.isOpened():
                print("Error: Could not open webcam.")
                return {}
                
        fps = cap.get(cv2.CAP_PROP_FPS)
        if fps == 0:  # Sometimes webcams don't report correct FPS
            fps = 30
        
        print("Starting webcam capture. Press 'q' to stop.")
        start_time = time.time()
        
        with self.mp_face_mesh.FaceMesh(
            max_num_faces=1,
            refine_landmarks=True,
            min_detection_confidence=0.5,
            min_tracking_confidence=0.5) as face_mesh:
            
            while cap.isOpened():
                success, image = cap.read()
                if not success:
                    print("Failed to read from webcam.")
                    break
                
                # Calculate actual timestamp from start
                current_time = time.time() - start_time
                
                # Process frame
                image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                results = face_mesh.process(image_rgb)
                
                if results.multi_face_landmarks:
                    # Store landmark data for this frame
                    landmarks = self.store_frame_data(results.multi_face_landmarks, self.frame_count, current_time)
                    
                    # Detect expression for current frame
                    if landmarks:
                        expression = self.detect_expression(landmarks)
                        self.frames_data[self.frame_count]['expression'] = expression
                        
                        # Check for expression transition
                        self.check_expression_transition(self.frame_count)
                    
                    # Draw landmarks for visualization
                    self.draw_landmarks(image, results.multi_face_landmarks)
                else:
                    # No face detected in this frame
                    self.frames_data[self.frame_count] = {
                        'timestamp': current_time,
                        'landmarks': None,
                        'expression': 'no_face'
                    }
                    
                    if self.current_expression and self.current_expression != 'no_face':
                        # Expression ended because face is no longer detected
                        self.add_expression_transition(self.current_expression, 'no_face', self.frame_count)
                        self.current_expression = 'no_face'
                        self.expression_start_frame = self.frame_count
                
                # First flip the image for display
                display_image = cv2.flip(image, 1)
                
                # Then add text to the flipped image so it appears correctly
                if self.frame_count in self.frames_data and 'expression' in self.frames_data[self.frame_count]:
                    expr = self.frames_data[self.frame_count]['expression']
                    cv2.putText(display_image, f"Expression: {expr}", (10, 30), 
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
                    
                    # Add visualization of key metrics
                    if 'metrics' in self.frames_data[self.frame_count]:
                        metrics = self.frames_data[self.frame_count]['metrics']
                        y_pos = 60
                        for key, value in metrics.items():
                            cv2.putText(display_image, f"{key}: {value:.2f}", (10, y_pos), 
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
                            y_pos += 20
                
                cv2.imshow("Facial Expression Tracking", display_image)
                self.frame_count += 1
                
                # Use longer wait time (30ms) for more reliable key detection
                key = cv2.waitKey(30) & 0xFF
                if key == ord('q'):
                    print("Q key pressed - stopping capture")
                    break
            
            print(f"Capture complete. Processed {self.frame_count} frames.")
            cap.release()
            cv2.destroyAllWindows()
        
        # Calculate expression duration statistics
        self.calculate_expression_statistics()
        
        return self.frames_data
    
    def process_video_file(self):
        """Process video from file instead of webcam"""
        if not self.video_path:
            print("Error: No video path specified")
            return None
            
        cap = cv2.VideoCapture(self.video_path)
        if not cap.isOpened():
            print(f"Error: Could not open video file {self.video_path}")
            return None
            
        fps = cap.get(cv2.CAP_PROP_FPS)
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        print(f"Processing video with {total_frames} frames at {fps} FPS")
        
        with self.mp_face_mesh.FaceMesh(
            max_num_faces=1,
            refine_landmarks=True,
            min_detection_confidence=0.5,
            min_tracking_confidence=0.5) as face_mesh:
            
            for frame_idx in range(total_frames):
                ret, frame = cap.read()
                if not ret:
                    break
                
                # Calculate timestamp
                timestamp = frame_idx / fps
                
                # Process frame
                image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                results = face_mesh.process(image_rgb)
                
                if results.multi_face_landmarks:
                    # Store landmark data for this frame
                    landmarks = self.store_frame_data(results.multi_face_landmarks, frame_idx, timestamp)
                    
                    # Detect expression for current frame
                    if landmarks:
                        expression = self.detect_expression(landmarks)
                        self.frames_data[frame_idx]['expression'] = expression
                        
                        # Check for expression transition
                        self.check_expression_transition(frame_idx)
                
                # Optional: Display progress
                if frame_idx % 10 == 0 or frame_idx == total_frames - 1:
                    progress = (frame_idx + 1) / total_frames * 100
                    print(f"Processing frame {frame_idx+1}/{total_frames} ({progress:.1f}%)")
            
            cap.release()
        
        # Calculate expression duration statistics
        self.calculate_expression_statistics()
        
        return self.frames_data
    
    def store_frame_data(self, multi_face_landmarks, frame_idx, timestamp):
        """Extract and store landmark data for a frame"""
        landmarks = []
        for face_landmarks in multi_face_landmarks:
            face_points = []
            for landmark in face_landmarks.landmark:
                face_points.append([landmark.x, landmark.y, landmark.z])
            landmarks = face_points  # We only process the first face
        
        # Calculate key facial metrics for expression detection
        if landmarks:
            metrics = self.calculate_facial_metrics(landmarks)
            
            # Update baseline measurements if needed
            self.frames_processed += 1
            if self.frames_processed <= self.baseline_frames:
                # During initial frames, collect baseline data
                if self.baseline_measurements['mouth_width'] is None:
                    self.baseline_measurements['mouth_width'] = metrics['mouth_width']
                    self.baseline_measurements['mouth_height'] = metrics['mouth_height']
                    self.baseline_measurements['smile_measure'] = metrics['smile_measure']
                else:
                    # Update rolling average
                    alpha = 0.3  # Weight for new measurement
                    self.baseline_measurements['mouth_width'] = (1-alpha) * self.baseline_measurements['mouth_width'] + alpha * metrics['mouth_width']
                    self.baseline_measurements['mouth_height'] = (1-alpha) * self.baseline_measurements['mouth_height'] + alpha * metrics['mouth_height']
                    self.baseline_measurements['smile_measure'] = (1-alpha) * self.baseline_measurements['smile_measure'] + alpha * metrics['smile_measure']
                
                # Add relative metrics
                metrics['relative_mouth_width'] = 1.0  # No change during baseline collection
                metrics['relative_smile_measure'] = 0.0
            else:
                # Calculate relative changes from baseline
                if self.baseline_measurements['mouth_width'] is not None:
                    metrics['relative_mouth_width'] = metrics['mouth_width'] / self.baseline_measurements['mouth_width']
                    metrics['relative_smile_measure'] = metrics['smile_measure'] - self.baseline_measurements['smile_measure']
        
        # Store frame data
        self.frames_data[frame_idx] = {
            'timestamp': timestamp,
            'landmarks': landmarks
        }
        
        if landmarks:
            self.frames_data[frame_idx]['metrics'] = metrics
        
        # If we have previous frames, calculate movement
        if frame_idx > 0 and (frame_idx-1) in self.frames_data and self.frames_data[frame_idx-1]['landmarks']:
            self.calculate_velocity(frame_idx, timestamp - self.frames_data[frame_idx-1]['timestamp'])
            
            # Calculate acceleration if we have at least 3 frames
            if frame_idx > 1 and (frame_idx-2) in self.frames_data and self.frames_data[frame_idx-2]['landmarks']:
                self.calculate_acceleration(frame_idx)
        
        return landmarks
    
    def calculate_facial_metrics(self, landmarks):
        """Calculate important facial metrics for expression detection"""
        # Key facial landmarks (using MediaPipe indices)
        # Mouth corners
        mouth_left = np.array(landmarks[61])   # Left corner of the mouth
        mouth_right = np.array(landmarks[291]) # Right corner of the mouth
        
        # Mouth top and bottom
        mouth_top = np.array(landmarks[13])    # Top of the upper lip
        mouth_bottom = np.array(landmarks[14]) # Bottom of the lower lip
        
        # Eyes
        left_eye_top = np.array(landmarks[159])
        left_eye_bottom = np.array(landmarks[145])
        right_eye_top = np.array(landmarks[386])
        right_eye_bottom = np.array(landmarks[374])
        
        # Eyebrows
        left_brow = np.array(landmarks[107])
        right_brow = np.array(landmarks[336])
        
        # Nose
        nose_tip = np.array(landmarks[4])
        
        # Calculate distances for facial measurements
        mouth_width = np.linalg.norm(mouth_right - mouth_left)
        mouth_height = np.linalg.norm(mouth_top - mouth_bottom)
        
        # Eye openness
        left_eye_openness = np.linalg.norm(left_eye_top - left_eye_bottom)
        right_eye_openness = np.linalg.norm(right_eye_top - right_eye_bottom)
        
        # Eyebrow height relative to eyes
        left_brow_height = left_brow[1] - left_eye_top[1]
        right_brow_height = right_brow[1] - right_eye_top[1]
        
        # Mouth aspect ratio (height/width)
        mouth_ratio = mouth_height / mouth_width if mouth_width > 0 else 0
        
        # Enhanced smile measurement
        # Check for corner of mouth position relative to center point and resting position
        mouth_corner_avg_y = (mouth_left[1] + mouth_right[1]) / 2
        mouth_center_y = (mouth_top[1] + mouth_bottom[1]) / 2
        
        # Positive values indicate upturned corners (smile)
        smile_measure = mouth_center_y - mouth_corner_avg_y
        
        # Calculate cheek raise (another indicator of genuine smile)
        # Using points near the cheeks
        left_cheek = np.array(landmarks[117])
        right_cheek = np.array(landmarks[346])
        
        # Measure cheek position relative to mouth corners
        cheek_height = ((left_cheek[1] - mouth_left[1]) + (right_cheek[1] - mouth_right[1])) / 2
        
        return {
            'mouth_width': mouth_width,
            'mouth_height': mouth_height,
            'mouth_ratio': mouth_ratio,
            'left_eye_openness': left_eye_openness,
            'right_eye_openness': right_eye_openness,
            'avg_eye_openness': (left_eye_openness + right_eye_openness) / 2,
            'left_brow_height': left_brow_height,
            'right_brow_height': right_brow_height,
            'smile_measure': smile_measure,
            'cheek_height': cheek_height
        }
    
    def calculate_velocity(self, frame_idx, time_delta):
        """Calculate velocity vectors for all landmarks between current and previous frame"""
        if time_delta <= 0:
            # Avoid division by zero
            time_delta = 0.033  # Default to ~30fps
            
        prev_landmarks = self.frames_data[frame_idx-1]['landmarks']
        curr_landmarks = self.frames_data[frame_idx]['landmarks']
        
        velocities = []
        for i in range(len(curr_landmarks)):
            displacement = np.array(curr_landmarks[i]) - np.array(prev_landmarks[i])
            velocity = displacement / time_delta
            velocities.append(velocity.tolist())
        
        self.frames_data[frame_idx]['velocities'] = velocities
        
        # Calculate overall movement magnitude
        all_velocities = np.array(velocities)
        magnitude = np.linalg.norm(all_velocities, axis=1)
        self.frames_data[frame_idx]['movement_magnitude'] = np.mean(magnitude)
    
    def calculate_acceleration(self, frame_idx):
        """Calculate acceleration vectors for all landmarks"""
        if 'velocities' not in self.frames_data[frame_idx] or 'velocities' not in self.frames_data[frame_idx-1]:
            return
            
        time_delta = self.frames_data[frame_idx]['timestamp'] - self.frames_data[frame_idx-1]['timestamp']
        if time_delta <= 0:
            time_delta = 0.033  # Default to ~30fps
            
        prev_velocities = self.frames_data[frame_idx-1]['velocities']
        curr_velocities = self.frames_data[frame_idx]['velocities']
        
        accelerations = []
        for i in range(len(curr_velocities)):
            accel = (np.array(curr_velocities[i]) - np.array(prev_velocities[i])) / time_delta
            accelerations.append(accel.tolist())
        
        self.frames_data[frame_idx]['accelerations'] = accelerations
        
        # Calculate overall acceleration magnitude
        all_accelerations = np.array(accelerations)
        magnitude = np.linalg.norm(all_accelerations, axis=1)
        self.frames_data[frame_idx]['acceleration_magnitude'] = np.mean(magnitude)
    
    def detect_expression(self, landmarks):
        """Detect facial expression based on landmark configurations and calculated metrics"""
        # Get the metrics calculated for this frame
        if self.frame_count in self.frames_data and 'metrics' in self.frames_data[self.frame_count]:
            metrics = self.frames_data[self.frame_count]['metrics']
        else:
            metrics = self.calculate_facial_metrics(landmarks)
        
        # Enhanced expression detection logic with more expressions
        mouth_ratio = metrics['mouth_ratio']
        mouth_width = metrics['mouth_width']
        mouth_height = metrics['mouth_height']
        left_eye_openness = metrics['left_eye_openness']
        right_eye_openness = metrics['right_eye_openness']
        avg_eye_openness = metrics['avg_eye_openness']
        left_brow_height = metrics['left_brow_height']
        right_brow_height = metrics['right_brow_height']
        smile_measure = metrics['smile_measure']
        
        # Check if we have relative measurements
        has_relative = 'relative_mouth_width' in metrics and self.frames_processed > self.baseline_frames
        
        # Eyes closed detection
        if left_eye_openness < self.expression_thresholds['eye_closed_threshold'] and right_eye_openness < self.expression_thresholds['eye_closed_threshold']:
            return "eyes_closed"
        
        # Surprise detection - raised eyebrows and open mouth
        if mouth_ratio > self.expression_thresholds['mouth_ratio_surprise'] and (left_brow_height < -0.02 or right_brow_height < -0.02):
            return "surprise"
        
        # Smile detection with multiple methods
        is_smile = False
        
        # Method 1: Using absolute thresholds
        if mouth_width > self.expression_thresholds['mouth_width_smile'] and smile_measure > self.expression_thresholds['smile_measure_threshold']:
            is_smile = True
        
        # Method 2: Using relative changes from baseline (more personalized)
        if has_relative:
            relative_width = metrics['relative_mouth_width']
            relative_smile = metrics['relative_smile_measure']
            
            # Detect smile based on changes from baseline
            if relative_width > 1.05 or relative_smile > 0.005:  # 5% increase in width or positive smile measure change
                is_smile = True
            
            # Log relative values for debugging
            if self.frame_count % 10 == 0:
                print(f"Relative width: {relative_width:.3f}, Relative smile: {relative_smile:.3f}, Is smile: {is_smile}")
        
        if is_smile:
            return "smile"
            
        # Speaking detection - mouth open vertically but not extremely wide
        if mouth_ratio > 0.4 and mouth_ratio < self.expression_thresholds['mouth_ratio_surprise']:
            return "speaking"
            
        # Frown detection - downturned mouth
        if smile_measure < -0.02:  # More negative threshold
            return "frown"
        
        # Default to neutral expression
        return "neutral"
    
    def check_expression_transition(self, frame_idx):
        """Check if there's an expression transition in the current frame and track expression durations"""
        curr_expr = self.frames_data[frame_idx].get('expression')
        
        # Initialize current expression if this is the first frame
        if self.current_expression is None and curr_expr:
            self.current_expression = curr_expr
            self.expression_start_frame = frame_idx
            return
            
        # Check for expression change
        if curr_expr and curr_expr != self.current_expression:
            # We have a transition
            self.add_expression_transition(self.current_expression, curr_expr, frame_idx)
            
            # Update current expression
            self.current_expression = curr_expr
            self.expression_start_frame = frame_idx
    
    def add_expression_transition(self, from_expr, to_expr, frame_idx):
        """Add a new expression transition to the list"""
        # Calculate duration of the previous expression
        start_time = self.frames_data[self.expression_start_frame]['timestamp']
        end_time = self.frames_data[frame_idx]['timestamp']
        duration = end_time - start_time
        
        # Add to transition list
        transition = {
            'from': from_expr,
            'to': to_expr,
            'start_frame': self.expression_start_frame,
            'end_frame': frame_idx,
            'start_time': start_time,
            'end_time': end_time,
            'duration': duration
        }
        
        self.expression_transitions.append(transition)
        print(f"Expression transition: {from_expr} → {to_expr} at frame {frame_idx}")
        
        # Track duration of expressions
        if from_expr not in self.expression_durations:
            self.expression_durations[from_expr] = []
        self.expression_durations[from_expr].append(duration)
    
    def calculate_expression_statistics(self):
        """Calculate statistics about expression durations"""
        expression_stats = {}
        
        for expr, durations in self.expression_durations.items():
            if not durations:
                continue
                
            stats = {
                'count': len(durations),
                'total_duration': sum(durations),
                'avg_duration': sum(durations) / len(durations),
                'min_duration': min(durations),
                'max_duration': max(durations)
            }
            
            expression_stats[expr] = stats
        
        self.frames_data['expression_statistics'] = expression_stats
    
    def draw_landmarks(self, image, multi_face_landmarks):
        """Draw facial landmarks on the image for visualization"""
        for face_landmarks in multi_face_landmarks:
            self.mp_drawing.draw_landmarks(
                image=image,
                landmark_list=face_landmarks,
                connections=self.mp_face_mesh.FACEMESH_TESSELATION,
                landmark_drawing_spec=None,
                connection_drawing_spec=self.mp_drawing_styles.get_default_face_mesh_tesselation_style()
            )
            self.mp_drawing.draw_landmarks(
                image=image,
                landmark_list=face_landmarks,
                connections=self.mp_face_mesh.FACEMESH_CONTOURS,
                landmark_drawing_spec=None,
                connection_drawing_spec=self.my_drawing_specs
            )
    
    # ----- Analysis and Reporting Functions -----
    
    def get_all_expression_transitions(self):
        """Return all detected expression transitions"""
        return self.expression_transitions
    
    def get_transition_statistics(self):
        """Calculate statistics about expression transitions"""
        transition_types = {}
        transition_durations = {}
        
        for transition in self.expression_transitions:
            transition_type = f"{transition['from']}→{transition['to']}"
            
            # Count transition types
            if transition_type in transition_types:
                transition_types[transition_type] += 1
            else:
                 transition_types[transition_type] = 1
            
            # Collect durations by transition type
            if transition_type in transition_durations:
                transition_durations[transition_type].append(transition['duration'])
            else:
                transition_durations[transition_type] = [transition['duration']]
        
        # Calculate average durations
        avg_durations = {}
        for t_type, durations in transition_durations.items():
            avg_durations[t_type] = sum(durations) / len(durations)
        
        return {
            'counts': transition_types,
            'average_durations': avg_durations
        }
    
    def analyze_facial_movement(self, landmark_indices=None):
        """Analyze movement patterns for specific landmarks or all landmarks"""
        if not landmark_indices:
            # Default to analyzing key facial points if none specified
            landmark_indices = [
                13,   # Upper lip
                14,   # Lower lip
                61,   # Left mouth corner
                291,  # Right mouth corner
                159,  # Left eye top
                145,  # Left eye bottom
                386,  # Right eye top
                374,  # Right eye bottom
                107,  # Left eyebrow
                336   # Right eyebrow
            ]
        
        # Extract movement data for specified landmarks
        landmark_velocities = {idx: [] for idx in landmark_indices}
        landmark_accelerations = {idx: [] for idx in landmark_indices}
        
        for frame_idx in sorted(self.frames_data.keys()):
            if not isinstance(frame_idx, int):
                continue  # Skip non-integer keys like 'expression_statistics'
                
            if 'velocities' in self.frames_data[frame_idx]:
                for idx in landmark_indices:
                    if idx < len(self.frames_data[frame_idx]['velocities']):
                        landmark_velocities[idx].append(self.frames_data[frame_idx]['velocities'][idx])
            
            if 'accelerations' in self.frames_data[frame_idx]:
                for idx in landmark_indices:
                    if idx < len(self.frames_data[frame_idx]['accelerations']):
                        landmark_accelerations[idx].append(self.frames_data[frame_idx]['accelerations'][idx])
        
        # Calculate movement metrics
        movement_metrics = {}
        for idx in landmark_indices:
            # Convert to numpy arrays for easier calculation
            vel_array = np.array(landmark_velocities[idx]) if landmark_velocities[idx] else np.array([])
            accel_array = np.array(landmark_accelerations[idx]) if landmark_accelerations[idx] else np.array([])
            
            # Calculate magnitude of velocity and acceleration
            vel_magnitude = np.linalg.norm(vel_array, axis=1) if vel_array.size > 0 else np.array([])
            accel_magnitude = np.linalg.norm(accel_array, axis=1) if accel_array.size > 0 else np.array([])
            
            movement_metrics[idx] = {
                'avg_velocity': np.mean(vel_magnitude) if vel_magnitude.size > 0 else 0,
                'max_velocity': np.max(vel_magnitude) if vel_magnitude.size > 0 else 0,
                'avg_acceleration': np.mean(accel_magnitude) if accel_magnitude.size > 0 else 0,
                'max_acceleration': np.max(accel_magnitude) if accel_magnitude.size > 0 else 0
            }
        
        return movement_metrics
    
    def analyze_movement_by_expression(self):
        """Analyze movement patterns segmented by expression type"""
        expression_movements = {}
        
        # Group frames by expression
        for expr in set([frame.get('expression') for frame_idx, frame in self.frames_data.items() 
                        if isinstance(frame_idx, int) and 'expression' in frame]):
            if not expr:
                continue
                
            expr_frames = [frame_idx for frame_idx, frame in self.frames_data.items() 
                          if isinstance(frame_idx, int) and frame.get('expression') == expr]
            
            # Calculate average movement and acceleration
            movement_values = [self.frames_data[idx].get('movement_magnitude', 0) for idx in expr_frames 
                              if 'movement_magnitude' in self.frames_data[idx]]
            accel_values = [self.frames_data[idx].get('acceleration_magnitude', 0) for idx in expr_frames 
                           if 'acceleration_magnitude' in self.frames_data[idx]]
            
            if movement_values and accel_values:
                expression_movements[expr] = {
                    'frame_count': len(expr_frames),
                    'avg_movement': sum(movement_values) / len(movement_values),
                    'avg_acceleration': sum(accel_values) / len(accel_values),
                    'max_movement': max(movement_values),
                    'max_acceleration': max(accel_values)
                }
        
        return expression_movements
    
    def analyze_transitions_over_time(self):
        """Analyze expression transition patterns over time for neurological assessment"""
        if not self.expression_transitions:
            return {"message": "No transitions detected"}
            
        # Group transitions by time windows
        time_window = 5  # 5 second windows
        max_time = max([t['end_time'] for t in self.expression_transitions])
        windows = {}
        
        for window_start in range(0, int(max_time) + time_window, time_window):
            window_end = window_start + time_window
            window_key = f"{window_start}-{window_end}s"
            
            # Find transitions in this window
            transitions_in_window = [t for t in self.expression_transitions 
                                    if t['start_time'] >= window_start and t['end_time'] < window_end]
            
            if transitions_in_window:
                windows[window_key] = {
                    'count': len(transitions_in_window),
                    'transitions': transitions_in_window,
                    'avg_duration': sum([t['duration'] for t in transitions_in_window]) / len(transitions_in_window)
                }
        
        # Calculate if transition frequency changes over time
        window_counts = [w['count'] for w in windows.values()]
        if len(window_counts) > 1:
            count_changes = [window_counts[i] - window_counts[i-1] for i in range(1, len(window_counts))]
            avg_change = sum(count_changes) / len(count_changes)
            
            return {
                'windows': windows,
                'transition_frequency_change': avg_change,
                'transition_frequency_pattern': "increasing" if avg_change > 0 else 
                                                "stable" if avg_change == 0 else "decreasing"
            }
        
        return {'windows': windows, 'message': "Not enough time windows for trend analysis"}
    
    def save_analysis_results(self, output_path):
        """Save all analysis results to files"""
        import json
        import os
        
        # Create directory if it doesn't exist
        os.makedirs(output_path, exist_ok=True)
        
        # Save expression transitions
        with open(os.path.join(output_path, 'expression_transitions.json'), 'w') as f:
            json.dump(self.expression_transitions, f, indent=2)
        
        # Save transition statistics
        with open(os.path.join(output_path, 'transition_statistics.json'), 'w') as f:
            json.dump(self.get_transition_statistics(), f, indent=2)
        
        # Save movement analysis
        with open(os.path.join(output_path, 'movement_analysis.json'), 'w') as f:
            json.dump(self.analyze_facial_movement(), f, indent=2)
        
        # Save expression movement analysis
        with open(os.path.join(output_path, 'expression_movement_analysis.json'), 'w') as f:
            json.dump(self.analyze_movement_by_expression(), f, indent=2)
        
        # Save temporal analysis
        with open(os.path.join(output_path, 'temporal_analysis.json'), 'w') as f:
            json.dump(self.analyze_transitions_over_time(), f, indent=2)
        
        # Save expression statistics
        if 'expression_statistics' in self.frames_data:
            with open(os.path.join(output_path, 'expression_statistics.json'), 'w') as f:
                json.dump(self.frames_data['expression_statistics'], f, indent=2)
        
        print(f"Analysis results saved to {output_path}")


# ImageProcessor class definition for compatibility
class ImageProcessor:
    def __init__(self, image_size=(64, 64)):
        self.target_size = image_size

    def load_image(self, image_path):
        image = cv2.imread(image_path)
        if image is None:
            print(f"ERROR: Unable to read image at {image_path}")
        return image

    def check_if_valid(self, image):
        if not isinstance(image, np.ndarray):
            return False
        return True

    def convert_to_grayscale(self, image):
        if len(image.shape) == 3 and image.shape[2] == 3:
            image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        return image

    def normalize_image(self, image):
        normalized_image = image.astype(np.float32) / 255.0
        return normalized_image

    def resize_with_aspect_ratio(self, image, pad_color):
        h, w = image.shape[:2]
        target_h, target_w = self.target_size

        aspect_original = w / h
        aspect_target = target_w / target_h

        if h == target_h and w == target_w:
            return image

        padded_image = np.zeros((target_h, target_w), dtype=np.uint8)
        padded_image[:] = pad_color

        if aspect_original > aspect_target:
            new_w = target_w
            new_h = int(new_w / aspect_original)
            if new_h <= h:
                resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_AREA)
            else:
                resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_CUBIC)

            y_offset = (target_h - new_h) // 2
            padded_image[y_offset:y_offset + new_h, :] = resized

        else:
            new_h = target_h
            new_w = int(new_h * aspect_original)
            if new_w <= w:
                resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_AREA)
            else:
                resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_CUBIC)

            x_offset = (target_w - new_w) // 2
            padded_image[:, x_offset:x_offset + new_w] = resized

        return padded_image

In [4]:
import cv2
import mediapipe as mp 
import numpy as np
import os
import json
import glob
from os import listdir

mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
my_drawing_specs = mp_drawing.DrawingSpec(color = (0, 255, 0), thickness=1)

# Create an instance of the VideoProcessor
video_processor = VideoProcessor()

# Run the webcam capture
print("Starting facial expression tracking. Press 'q' to stop capturing.")
frames_data = video_processor.capture_webcam_improved()

# After stopping the webcam, analyze the results
print(f"\nCaptured data for {len(frames_data)} frames")

# Get expression transitions
transitions = video_processor.get_all_expression_transitions()
print(f"Detected {len(transitions)} expression transitions")

# Show transition statistics if transitions were detected
if transitions:
    stats = video_processor.get_transition_statistics()
    print("\nTransition Counts:")
    for t_type, count in stats['counts'].items():
        print(f"  {t_type}: {count}")
    
    print("\nAverage Transition Durations (seconds):")
    for t_type, duration in stats['average_durations'].items():
        print(f"  {t_type}: {duration:.3f}")
    
    # Analyze facial movement for key landmarks
    print("\nAnalyzing facial movement...")
    movement_metrics = video_processor.analyze_facial_movement()
    
    # Display movement metrics for a few key landmarks
    key_points = {
        13: "Upper lip",
        14: "Lower lip",
        61: "Left mouth corner",
        291: "Right mouth corner"
    }
    
    print("\nMovement Metrics for Key Facial Points:")
    for idx, name in key_points.items():
        if idx in movement_metrics:
            print(f"  {name} (landmark {idx}):")
            for metric, value in movement_metrics[idx].items():
                print(f"    {metric}: {value:.5f}")
    
    # Analyze expression-specific movement patterns
    print("\nAnalyzing movement patterns by expression type...")
    expression_movement = video_processor.analyze_movement_by_expression()
    
    print("\nMovement Patterns by Expression:")
    for expr, metrics in expression_movement.items():
        print(f"  {expr}:")
        for metric, value in metrics.items():
            if isinstance(value, float):
                print(f"    {metric}: {value:.5f}")
            else:
                print(f"    {metric}: {value}")
    
    # Create directory for saving results
    output_dir = "facial_analysis_results"
    print(f"\nSaving analysis results to {output_dir}...")
    video_processor.save_analysis_results(output_dir)
    print("Analysis complete!")
else:
    print("No expression transitions detected. Try making more expressions like smiling, surprise, etc.")

Starting facial expression tracking. Press 'q' to stop capturing.




Starting webcam capture. Press 'q' to stop.


I0000 00:00:1742415892.408207  133491 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M2
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1742415892.424799  143879 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742415892.434014  143877 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742415892.437013  143881 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


Expression transition: neutral → smile at frame 6
Expression transition: smile → neutral at frame 7
Expression transition: neutral → smile at frame 8
Expression transition: smile → neutral at frame 9
Expression transition: neutral → smile at frame 10
Expression transition: smile → neutral at frame 11
Expression transition: neutral → smile at frame 12
Expression transition: smile → neutral at frame 14
Expression transition: neutral → smile at frame 15
Expression transition: smile → eyes_closed at frame 23
Expression transition: eyes_closed → smile at frame 24
Expression transition: smile → neutral at frame 25
Expression transition: neutral → smile at frame 27
Relative width: 0.984, Relative smile: 0.003, Is smile: True
Expression transition: smile → neutral at frame 35
Relative width: 0.977, Relative smile: -0.003, Is smile: False
Expression transition: neutral → smile at frame 41
Relative width: 1.095, Relative smile: 0.002, Is smile: True
Expression transition: smile → eyes_closed at 

KeyboardInterrupt: 