In [ ]:
import cv2
import mediapipe as mp
import os
import csv
import numpy as np

In [ ]:
# Mediapipe extraction
# Description: This file contains a simple code for extracting a landmarks from a video and then
# it saves the landmark points as csv file at a specified location.

# Initialize Mediapipe
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # COLOR CONVERSION BGR 2 RGB(Refer the guidelines for clarity)
    image.flags.writeable = False  # Image is no longer writeable
    results = model.process(image)  # Processing of video
    image.flags.writeable = True  # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)  # COLOR COVERSION RGB 2 BGR(Refer the guidelines for clarity)
    return image, results

def extract_keypoints(results):
    # Extract pose landmarks if available
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    
    # Extract hand landmarks if available
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    
    # Extract face landmarks if available
    if results.face_landmarks:
        face_landmarks = results.face_landmarks.landmark
        # Indices for mouth, eyes, and nose
        mouth_indices = [61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291]
        left_eye_indices = [263, 249, 390, 373, 374, 380, 381, 382, 362]
        right_eye_indices = [33, 7, 163, 144, 145, 153, 154, 155, 133]
        nose_indices = [1, 2, 98, 327, 168]
        relevant_indices = mouth_indices + left_eye_indices + right_eye_indices + nose_indices
        face = np.array([[face_landmarks[idx].x, face_landmarks[idx].y, face_landmarks[idx].z] for idx in relevant_indices]).flatten()
    else:
        face = np.zeros(52*3)
    
    return np.concatenate([pose, face, lh, rh])

# collecting the video
cap = cv2.VideoCapture('FAIL.mp4')  #give the video file name

# Initialize the holistic model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    all_landmarks = []
    frame_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        image, results = mediapipe_detection(frame, holistic)
        
        keypoints = extract_keypoints(results)
        all_landmarks.append(keypoints)

        frame_count += 1

    cap.release()

# Convert the list of landmarks to a numpy array
all_landmarks = np.array(all_landmarks)

# Save the landmarks to a CSV file 
np.savetxt('video_landmarks.csv', all_landmarks, delimiter=',')


In [2]:
# MidiaPipe visualization
# This file contains a simple code for extracting a landmarks from a video and then
# it saves the new video with landmarks at a specified location.

# setting the mediapipe solutions
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_face_mesh = mp.solutions.face_mesh
mp_hands = mp.solutions.hands
mp_pose = mp.solutions.pose

# Capturing video
cap = cv2.VideoCapture('test_for_extraction/FAIL.mp4') #preferably use the complete path of the file for comprehension.
if not cap.isOpened():
    print("Error: Could not open video file.")
    exit()

fps = cap.get(cv2.CAP_PROP_FPS)
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

print(f"Video FPS: {fps}, Width: {frame_width}, Height: {frame_height}")

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # You can use other codecs like 'XVID', 'DIVX', etc.
out = cv2.VideoWriter(r'output_test_for_extraction/output_with_landmarks.mp4', fourcc, fps, (frame_width, frame_height))

# Init ialize solutions
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.5, min_tracking_confidence=0.5)
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5, min_tracking_confidence=0.5)
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)

while cap.isOpened():
    success,image = cap.read()
    if not success:
        print("Error: Could not read frame.")
        break

    # Check if the frame was correctly captured
    if image is None:
        print("Error: Frame is None.")
        break

    # Convert the BGR image to RGB, as it defaulty converts it to BGR
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Processing all
    face_results = face_mesh.process(image_rgb)
    hands_results = hands.process(image_rgb)
    pose_results = pose.process(image_rgb)

    # Drawing landmarks of face
    if face_results.multi_face_landmarks:
        for face_landmarks in face_results.multi_face_landmarks:
            mp_drawing.draw_landmarks(
                image=image,
                landmark_list=face_landmarks,
                connections=mp_face_mesh.FACEMESH_TESSELATION,
                landmark_drawing_spec=None,
                connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_tesselation_style()
            )     

    # Draw the hand landmarks
    if hands_results.multi_hand_landmarks:
        for hand_landmarks in hands_results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                image=image,
                landmark_list=hand_landmarks,
                connections=mp_hands.HAND_CONNECTIONS,
                landmark_drawing_spec=mp_drawing_styles.get_default_hand_landmarks_style(),
                connection_drawing_spec=mp_drawing_styles.get_default_hand_connections_style()
            )

    # Draw the pose landmarks
    if pose_results.pose_landmarks:
        mp_drawing.draw_landmarks(
            image=image,
            landmark_list=pose_results.pose_landmarks,
            connections=mp_pose.POSE_CONNECTIONS,
            landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()
        )

    out.write(image)

    cv2.imshow('MediaPipe Face, Hand, and Pose', image)

    # Break the loop on 'q' key press
    if cv2.waitKey(100) & 0xFF == ord('q'):
        break
#close all if finished
cap.release()
out.release()
cv2.destroyAllWindows()


Video FPS: 25.0, Width: 480, Height: 360


AttributeError: 'FaceMesh' object has no attribute 'shape'