In [None]:
!pip -q install mediapipe opencv-python-headless

In [None]:
import numpy as np
import cv2
import mediapipe as mp
import time
from google.colab import files, drive
from IPython.display import display, Image
import os

In [None]:
# Mount Google Drive
drive.mount('/content/drive')

# Define the folder path in Google Drive
output_folder = '/content/drive/MyDrive/HeadPoseFrames'
os.makedirs(output_folder, exist_ok=True)  # Create the folder if it doesn't exist

In [None]:
# Step 1: Upload the video file
# uploaded = files.upload()  # Manually upload the video file in Colab
video_path = "/content/b.mp4"  # Get the path of the uploaded video


In [None]:
# Initialize MediaPipe FaceMesh
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5)

# Initialize drawing utilities
mp_drawing = mp.solutions.drawing_utils
drawing_spec = mp_drawing.DrawingSpec(color=(128, 0, 128), thickness=2, circle_radius=1)


In [None]:
# Step 2: Load the video file
cap = cv2.VideoCapture(video_path)


In [None]:
# Step 3: Process and display each frame
frame_number = 0
while cap.isOpened():
    success, image = cap.read()
    if not success:
        break  # Exit if the video has ended

    start = time.time()  # Record start time for FPS calculation
    image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = face_mesh.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    img_h, img_w, img_c = image.shape
    face_2d = []
    face_3d = []

    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            for idx, lm in enumerate(face_landmarks.landmark):
                if idx in {33, 263, 1, 61, 291, 199}:
                    x, y = int(lm.x * img_w), int(lm.y * img_h)
                    face_2d.append([x, y])
                    face_3d.append([x, y, lm.z])

                    if idx == 1:  # Nose landmark
                        nose_2d = (lm.x * img_w, lm.y * img_h)
                        nose_3d = (lm.x * img_w, lm.y * img_h, lm.z * 3000)

            face_2d = np.array(face_2d, dtype=np.float64)
            face_3d = np.array(face_3d, dtype=np.float64)

            focal_length = 1 * img_w
            cam_matrix = np.array([[focal_length, 0, img_w / 2],
                                   [0, focal_length, img_h / 2],
                                   [0, 0, 1]])
            distortion_matrix = np.zeros((4, 1), dtype=np.float64)

            success, rotation_vec, translation_vec = cv2.solvePnP(face_3d, face_2d, cam_matrix, distortion_matrix)
            rmat, _ = cv2.Rodrigues(rotation_vec)
            angles, _, _, _, _, _ = cv2.RQDecomp3x3(rmat)

            # Check the length of angles before unpacking
            if len(angles) == 3:
                x, y, z = [angle * 360 for angle in angles]  # Convert each to degrees
            else:
                x, y, z = 0, 0, 0  # Default to zero if angles has an unexpected structure


            if y < -8:
                text = "Looking Left"
            elif y > 8:
                text = "Looking Right"
            elif x < -10:
                text = "Looking Down"
            elif x > 10:
                text = "Looking Up"
            else:
                text = "Forward"

            nose_3d_projection, _ = cv2.projectPoints(nose_3d, rotation_vec, translation_vec, cam_matrix, distortion_matrix)
            p1 = (int(nose_2d[0]), int(nose_2d[1]))
            p2 = (int(nose_2d[0] + y * 10), int(nose_2d[1] - x * 10))
            cv2.line(image, p1, p2, (255, 0, 0), 3)

            cv2.putText(image, text, (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 2)
            cv2.putText(image, f"x: {np.round(x, 2)}", (500, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            cv2.putText(image, f"y: {np.round(y, 2)}", (500, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            cv2.putText(image, f"z: {np.round(z, 2)}", (500, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

            mp_drawing.draw_landmarks(image=image,
                                      landmark_list=face_landmarks,
                                      connections=mp_face_mesh.FACEMESH_CONTOURS,
                                      landmark_drawing_spec=drawing_spec,
                                      connection_drawing_spec=drawing_spec)

        end = time.time()
        totalTime = end - start

# Save each frame as an image file to Google Drive
    frame_output_path = os.path.join(output_folder, f"frame_{frame_number}.jpg")
    cv2.imwrite(frame_output_path, image)
    frame_number += 1

    # Optional: Limit frame processing to first 100 frames for efficiency during testing
#

cap.release()
cv2.destroyAllWindows()
