<a href="https://colab.research.google.com/github/Prudiusal/crop_eyes/blob/main/Face_Extract_Mediapipe.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
print("GPU Available:", torch.cuda.is_available())


GPU Available: False


In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
!pip install opencv-python mediapipe moviepy


Collecting mediapipe
  Downloading mediapipe-0.10.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting protobuf<5,>=4.25.3 (from mediapipe)
  Downloading protobuf-4.25.5-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (35.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.9/35.9 MB[0m [31m24.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading protobuf-4.25.5-cp37-abi3-manylinux2014_x86_64.whl (294 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.6/294.6 kB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sounddevice-0.5.1-py3-none-any.whl (32 kB)
Installing collected packages: protobuf, sounddevice, mediapipe
  Attempting uninstall: protobuf
    Found existing installation: protobuf 3.20.

In [None]:
from pathlib import Path
import shutil
from datetime import datetime
import math

from scipy.signal import savgol_filter

import cv2
import mediapipe as mp
import numpy as np

OUTPUT_VIDEO_WIDTH = 1920
OUTPUT_VIDEO_HEIGHT = 1080
LEFT_EYE_SHIFT = 480
RIGHT_EYE_SHIFT = 480
OUTPUT_PATH_TMP = 'output_video_tmp.mp4'
EYE_OPEN_THRESHOLD = 0.2
RECALC_AREA_FRAMES = 10
TARGET_WIDTH = 1280
TARGET_HEIGHT = 720


In [None]:
def get_pupils(landmarks):
  left_pupil = (
      (landmarks.landmark[33].x + landmarks.landmark[36].x) / 2,
      (landmarks.landmark[33].y + landmarks.landmark[36].y) / 2
    )

  right_pupil = (
      (landmarks.landmark[133].x + landmarks.landmark[136].x) / 2,
      (landmarks.landmark[133].y + landmarks.landmark[136].y) / 2
    )
  return left_pupil, right_pupil


In [None]:
def get_eye_aspect_ratio(landmarks):
    # Calculate distances for EAR
    left_eye_height = (
        landmarks.landmark[159].y - landmarks.landmark[145].y +  # Vertical distance
        landmarks.landmark[158].y - landmarks.landmark[144].y
    )
    right_eye_height = (
        landmarks.landmark[386].y - landmarks.landmark[374].y +  # Vertical distance
        landmarks.landmark[387].y - landmarks.landmark[373].y
    )
    left_eye_width = landmarks.landmark[36].x - landmarks.landmark[39].x  # Horizontal distance
    right_eye_width = landmarks.landmark[123].x - landmarks.landmark[130].x  # Horizontal distance

    # Calculate EAR for both eyes
    left_eye_ratio = left_eye_height / (2.0 * left_eye_width)
    right_eye_ratio = right_eye_height / (2.0 * right_eye_width)

    return left_eye_ratio, right_eye_ratio

In [None]:
def get_eye_center(landmarks):
  left_eye_landmarks = [(landmarks.landmark[i].x, landmarks.landmark[i].y) for i in range(33, 39)]
  right_eye_landmarks = [(landmarks.landmark[i].x, landmarks.landmark[i].y) for i in range(133, 139)]

  left_eye_center = (
      sum(x for x, _ in left_eye_landmarks) / len(left_eye_landmarks),
      sum(y for _, y in left_eye_landmarks) / len(left_eye_landmarks)
      )
  right_eye_center = (
      sum(x for x, _ in right_eye_landmarks) / len(right_eye_landmarks),
      sum(y for _, y in right_eye_landmarks) / len(right_eye_landmarks)
      )
  return left_eye_center, right_eye_center

In [None]:
def exponential_smoothing(angles, angle, alpha=0.8):
    if angles:
        smoothed_angle = alpha * angles[-1] + (1 - alpha) * angle
    else:
        smoothed_angle = angle  # If no previous angles, just return the current angle
    return smoothed_angle


In [None]:
def exponential_smoothing_midpoint(midpoints, new_midpoint, alpha=0.8):
    if midpoints:
        smoothed_midpoint = (
            alpha * midpoints[-1][0] + (1 - alpha) * new_midpoint[0],
            alpha * midpoints[-1][1] + (1 - alpha) * new_midpoint[1]
        )
    else:
        smoothed_midpoint = new_midpoint  # If no previous midpoints, just return the current midpoint
    return smoothed_midpoint


In [None]:
def get_rotation_matrix(landmarks, angles, midpoints):

  # Get eye aspect ratios
  left_eye_ratio, right_eye_ratio = get_eye_aspect_ratio(landmarks)

  if left_eye_ratio < EYE_OPEN_THRESHOLD and right_eye_ratio < EYE_OPEN_THRESHOLD:
    print("Eyes are closed.")
    if angles and midpoints:
      print('Previous angle was used')
      angle = angles[-1]
      midpoint = midpoints[-1]

  else:
    left_pupil, right_pupil = get_pupils(landmarks)
    dx = right_pupil[0] - left_pupil[0]
    dy = right_pupil[1] - left_pupil[1]
    midpoint = (
      (left_pupil[0] + right_pupil[0]) / 2,
      (left_pupil[1] + right_pupil[1]) / 2
    )

    angle = math.degrees(math.atan2(dy, dx))
    smoothed_angle = exponential_smoothing(angles, angle)
    angles.append(smoothed_angle)
    smoothed_midpoint = exponential_smoothing_midpoint(midpoints, midpoint)
    midpoints.append(smoothed_midpoint)

  rotation_matrix = cv2.getRotationMatrix2D(smoothed_midpoint, smoothed_angle, 1.0)
  return rotation_matrix, angles, midpoints



In [None]:
def process_frame(frame, landmarks, angles, midpoints):

  height, width = frame.shape[:2]

  # Create rotation matrix
  # rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
  rotation_matrix, angles, midpoints = get_rotation_matrix(landmarks, angles, midpoints)
  # Rotate the frame
  rotated_frame = cv2.warpAffine(frame, rotation_matrix, (width, height))
  # crop the fram
  # fill to 1920x1080

  return rotated_frame, angles, midpoints

In [None]:
def update_crop_params(landmarks, params):
    frame_height = params['frame_height']

    left_pupil, right_pupil = get_pupils(landmarks)

    dx = right_pupil[0] - left_pupil[0]
    dy = right_pupil[1] - left_pupil[1]
    midpoint = (
      (left_pupil[0] + right_pupil[0]) / 2,
      (left_pupil[1] + right_pupil[1]) / 2)

    w = int(2 * dx)
    h = int(9 / 16 * w)
    x = int(midpoint[0] - dx)
    y = int(min(frame_height - h, midpoint[1] - 0.5 * h))

    # params['crop_wide'].append(w)
    # params['crop_height'].append(h)
    # params['crop_x'].append(x)
    # params['crop_y'].append(y)
  # new_params = {}
    params['crop_wide'].append(200)
    params['crop_height'].append(200)
    params['crop_x'].append(200)
    params['crop_y'].append(200)
  # new_params = {}
  # params.append(new_params)

In [None]:
def crop_frame(frame, params):
  # if not (params['crop_wide'] and params['crop_height'] and params['crop_x'] and params['crop_y']):
    # return fram
  x = params['crop_x'][-1]
  y = params['crop_y'][-1]
  w = params['crop_wide'][-1]
  h = params['crop_height'][-1]
  # if not (params['crop_wide'] and params['crop_height'] and params['crop_x'] and params['crop_y']):
    # return frame
  frame = frame[y:y + h, x:x + w]
  return frame

In [None]:
def process_video(video_path: str, face_mesh):
    n_face, n_no_face = 0, 0
    # angles = []
    # midpoints = []
    params = {}
    params['crop_wide'] = []
    params['crop_height'] = []
    params['crop_x'] = []
    params['crop_y'] = []

    assert Path(video_path).exists(), f"Video file not found: {video_path}"
    cap = cv2.VideoCapture(video_path)
    # FRAME_WIDTH = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    FRAME_HEIGHT = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    params['frame_height'] = FRAME_HEIGHT


    fps = cap.get(cv2.CAP_PROP_FPS)
    print(f"FPS: {fps}")
    out_video = get_out_video_writer(fps)
    frames_counter = 0

    while True:
      ret, frame = cap.read()
      if not ret:
          break
      rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

      if not frames_counter:
        face_mask_results = face_mesh.process(rgb_frame)
        if face_mask_results.multi_face_landmarks:
          frames_counter = RECALC_AREA_FRAMES
          landmarks = face_mask_results.multi_face_landmarks[0]
          update_crop_params(landmarks, params)
        # TODO: apply angle transformation and recalc landmarks
          # get the crop/matrix/parameters here
          # use those parameters or smth to change frame
          # new_frame, angles, midpoints = process_frame(frame, landmarks, angles, midpoints)
          # out.write(new_frame)
        else:
          print('Face not found')
      else:
        frames_counter -= 1

      cropped_frame = crop_frame(frame, params)
      resized_frame = cv2.resize(cropped_frame, (OUTPUT_VIDEO_WIDTH, OUTPUT_VIDEO_HEIGHT))
      out_video.write(resized_frame)

    cap.release()
    out_video.release()

    output_path = Path(video_path).parent / (Path(video_path).stem + '.mp4')
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_path = Path(video_path).parent / f"{Path(video_path).stem}_{timestamp}_processed.mp4"

    shutil.copy(OUTPUT_PATH_TMP, output_path)
    # print(f"Number of faces: {n_face}")
    # print(f"Number of no faces: {n_no_face}")
    print(f'Saved as {output_path}')

    return True


In [None]:
def get_out_video_writer(fps):
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(OUTPUT_PATH_TMP,
                          fourcc,
                          fps,
                          (OUTPUT_VIDEO_WIDTH, OUTPUT_VIDEO_HEIGHT))
    return out

In [None]:
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1)
video_path = '/content/drive/MyDrive/Personal/BUDKA/cropped_short.mp4'
process_video(video_path, face_mesh)

FPS: 25.0
Saved as /content/drive/MyDrive/Personal/BUDKA/cropped_short_20241029_152110_processed.mp4


True