In [1]:
import cv2
import dlib
import numpy as np
import random

Get detector

In [65]:
detector = dlib.get_frontal_face_detector()

# Set random seed for reproducibility
random.seed(0)

def percentage_change(old, new):
    return abs(new - old) / old if old != 0 else 0

def is_bounding_box_stable(previous_face, current_face, tolerance=0.10):
    x1_prev, y1_prev, x2_prev, y2_prev = previous_face
    x1_curr, y1_curr, x2_curr, y2_curr = current_face
    
    # Calculate changes in coordinates and size
    width_change = percentage_change(x2_prev - x1_prev, x2_curr - x1_curr)
    height_change = percentage_change(y2_prev - y1_prev, y2_curr - y1_curr)
    position_change_x = percentage_change(x1_prev, x1_curr)
    position_change_y = percentage_change(y1_prev, y1_curr)

    width_change = 0
    height_change = 0

    # Return True if changes are within tolerance
    return all(change < tolerance for change in [width_change, height_change, position_change_x, position_change_y])


Function to process a single frame of video

In [66]:
def process_frame(frame, faces, action="crop"):
    for face in faces:
        # Get bounding box coordinates
        x1, y1, x2, y2 = face.left(), face.top(), face.right(), face.bottom()

        # Find the center of the bounding box and size
        center_x = (x1 + x2) // 2
        center_y = (y1 + y2) // 2
        width = x2 - x1
        height = y2 - y1

        margin_x = int(0.2 * (x2 - x1))  # Reduce the width by 20%
        margin_y = int(0.2 * (y2 - y1))  # Reduce the height by 20%

        # Crop the face from the frame
        if action == "crop":
            # Adjust the coordinates inward by the margin
            x1 = max(0, x1 + margin_x)
            y1 = max(0, y1 + margin_y)
            x2 = min(frame.shape[1], x2 - margin_x)
            y2 = min(frame.shape[0], y2 - margin_y)
            
            cropped_face = frame[y1:y2, x1:x2]
            if cropped_face.size == 0:
                return None  # Return None if cropping failed
            
            # Resize the face to original size of the frame
            resized_face = cv2.resize(cropped_face, (frame.shape[1], frame.shape[0]))
            return resized_face

        # Put an ellipsoid on the face and mask only the face
        elif action == "mask":
            # Create a full white mask (the same size as the frame)
            mask = np.ones_like(frame) * 255

            # Draw a black ellipsoid to cover the face (inverse mask)
            cv2.ellipse(mask, (center_x, center_y), (width // 3  + width // 10 , height // 2 + height // 6), 0, 0, 360, (0, 0, 0), -1)

            # Apply the mask to the original frame using bitwise AND
            masked_frame = cv2.bitwise_and(frame, mask)

            return masked_frame

    return frame  # Return original frame if no face is detected

Function to process an entire video with the previous function

In [70]:
def process_video(input_video, output_video, action="crop"):
    # Open the video file
    cap = cv2.VideoCapture(input_video)

    # Check if the video was opened successfully
    if not cap.isOpened():
        print(f"Error: Could not open video {input_video}")
        return

    # Get video properties
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    video_duration = total_frames / fps

    #print(f"Video properties: {frame_width}x{frame_height} at {fps} FPS with duration {video_duration}")

    # Define the codec and create a VideoWriter object (using 'XVID' for .avi)
    fourcc = cv2.VideoWriter_fourcc(*'XVID')  # XVID codec for AVI
    out = cv2.VideoWriter(output_video, fourcc, fps, (frame_width, frame_height))

    # Find a stable 10-second window
    stable_segment_found = False


    found_times = []
    for i in range(int(video_duration//10)*2):
        # Every 5 seconds
        start_time = i * 5

        print(f"Trying with start time of {start_time} seconds")
        # Randomly pick a start time for the 10-second window if specified
        #print(f"Trying with start time of {start_time} seconds")
        cap.set(cv2.CAP_PROP_POS_MSEC, start_time * 1000)  # Seek to the start time

        previous_face = None
        stable_segment = True
        frames_to_process = int(10 * fps)

        for _ in range(frames_to_process):
            ret, frame = cap.read()
            if not ret:
                #print("End of video or can't read frame.")
                stable_segment = False
                break  # End of video

            # Convert frame to grayscale for face detection
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

            # Detect faces in the frame
            faces = detector(gray)

            # Ensure at least one face is detected
            if len(faces) == 0:
                stable_segment = False
                break

            # Get current face bounding box
            current_face = (faces[0].left(), faces[0].top(), faces[0].right(), faces[0].bottom())

            if previous_face is not None:
                # Check if the bounding box is stable compared to the previous frame
                if not is_bounding_box_stable(previous_face, current_face):
                    stable_segment = False
                    #print(f"The segment {start_time} to {start_time + 10} was not stable")
                    break

            # Update previous face to current
            previous_face = current_face
        
        
        # If stable segment found, process it
        if stable_segment:
            print(f"Stable segment found at {start_time:.2f} - {start_time + 10:.2f} seconds.")
            if action == 'extract_time_segment':
                found_times.append(start_time)
                continue
             
            stable_segment_found = True
            cap.set(cv2.CAP_PROP_POS_MSEC, start_time * 1000)  # Seek back to start of segment

            # Process the 10-second stable window
            for _ in range(frames_to_process):
                ret, frame = cap.read()
                if not ret:
                    break  # End of video

                # Convert frame to grayscale for face detection
                gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

                # Detect faces in the frame
                faces = detector(gray)

                # Process frame based on the selected action
                processed_frame = process_frame(frame, faces, action)
                if processed_frame is not None:
                    out.write(processed_frame)  # Write processed frame to output video
                else:
                    out.write(frame)  # Write original frame if face detection/cropping failed

            break  # Stop after processing a stable segment
    
    if action == 'extract_time_segment':
        return found_times

    if not stable_segment_found:
        print("Could not find a stable 10-second segment after retries.")

    # Release everything, because we are done and we need to close the output file
    cap.release()
    out.release()
    cv2.destroyAllWindows()
    print(f"Processing complete. Output saved as {output_video}")

In [71]:
input_video = "../Experiment_videos/s_sj1_EO.avi"
output_video_crop = "output_cropped.mp4"
output_video_ellipsoid = "output_ellipsoid.mp4"



# Crop the face in the video
#process_video(input_video, output_video_crop, action="crop")

# Apply an ellipsoid mask to the face in the video
#process_video(input_video, output_video_ellipsoid, action="mask")

times=[]
""" FOR ALL OF THEM
for i in range(1, 14):
    input_video = f"../Experiment_videos/s_sj{i}_EO.avi"
    for _ in range(13):
        start_time = process_video(input_video, output_video_crop, start_time=0, found_times=times, action='extract_time_segment')
        if start_time is None:
            break
        times.append(start_time)
    
    print(f"Times for person {i}")
    
    print(times)
    times = []

"""

' FOR ALL OF THEM\nfor i in range(1, 14):\n    input_video = f"../Experiment_videos/s_sj{i}_EO.avi"\n    for _ in range(13):\n        start_time = process_video(input_video, output_video_crop, start_time=0, found_times=times, action=\'extract_time_segment\')\n        if start_time is None:\n            break\n        times.append(start_time)\n    \n    print(f"Times for person {i}")\n    \n    print(times)\n    times = []\n\n'

In [79]:
# FOR SINGLE PERSON
person = 13
input_video = f"../Experiment_videos/s_sj{person}_EO.avi"
times = process_video(input_video, output_video_crop, action='extract_time_segment')
print(f"Times for person {person}")

print(times)
times = []

Trying with start time of 0 seconds
Stable segment found at 0.00 - 10.00 seconds.
Trying with start time of 5 seconds
Stable segment found at 5.00 - 15.00 seconds.
Trying with start time of 10 seconds
Stable segment found at 10.00 - 20.00 seconds.
Trying with start time of 15 seconds
Stable segment found at 15.00 - 25.00 seconds.
Trying with start time of 20 seconds
Stable segment found at 20.00 - 30.00 seconds.
Trying with start time of 25 seconds
Stable segment found at 25.00 - 35.00 seconds.
Trying with start time of 30 seconds
Stable segment found at 30.00 - 40.00 seconds.
Trying with start time of 35 seconds
Stable segment found at 35.00 - 45.00 seconds.
Trying with start time of 40 seconds
Stable segment found at 40.00 - 50.00 seconds.
Trying with start time of 45 seconds
Stable segment found at 45.00 - 55.00 seconds.
Trying with start time of 50 seconds
Stable segment found at 50.00 - 60.00 seconds.
Trying with start time of 55 seconds
Stable segment found at 55.00 - 65.00 secon