In [None]:
pip install mediapipe opencv-python


Collecting mediapipe
  Downloading mediapipe-0.10.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting protobuf<5,>=4.25.3 (from mediapipe)
  Downloading protobuf-4.25.5-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (35.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.9/35.9 MB[0m [31m26.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading protobuf-4.25.5-cp37-abi3-manylinux2014_x86_64.whl (294 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.6/294.6 kB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sounddevice-0.5.1-py3-none-any.whl (32 kB)
Installing collected packages: protobuf, sounddevice, mediapipe
  Attempting uninstall: protobuf
    Found existing installation: protobuf 3.20.

In [None]:
import cv2
import mediapipe as mp

# Initialize MediaPipe Holistic model
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Load the video file
video_path = 'angry.mp4'
cap = cv2.VideoCapture(video_path)

frame_count = 0  # Frame counter
frame_skip = 5   # Process every 5th frame

# Initialize holistic model
with mp_holistic.Holistic(static_image_mode=False, model_complexity=2) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Only process every 5th frame
        if frame_count % frame_skip == 0:
            # Convert the image to RGB as mediapipe expects it
            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            # Process the image and detect landmarks
            results = holistic.process(image)

            # Convert the image back to BGR for rendering
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

            # Draw the landmarks on the image (optional, for visualization)
            if results.pose_landmarks:
                mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
            if results.face_landmarks:
                mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS)
            if results.left_hand_landmarks:
                mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
            if results.right_hand_landmarks:
                mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

            # Display the image (optional, for real-time visualization)
            from google.colab.patches import cv2_imshow
            cv2_imshow(image)

            # Extract landmarks (data points)
            if results.pose_landmarks:
                print("Pose landmarks:")
                for id, lm in enumerate(results.pose_landmarks.landmark):
                    print(f"ID: {id}, X: {lm.x}, Y: {lm.y}, Z: {lm.z}, Visibility: {lm.visibility}")

            if results.face_landmarks:
                print("Face landmarks:")
                for id, lm in enumerate(results.face_landmarks.landmark):
                    print(f"ID: {id}, X: {lm.x}, Y: {lm.y}, Z: {lm.z}")

            if results.left_hand_landmarks:
                print("Left hand landmarks:")
                for id, lm in enumerate(results.left_hand_landmarks.landmark):
                    print(f"ID: {id}, X: {lm.x}, Y: {lm.y}, Z: {lm.z}")

            if results.right_hand_landmarks:
                print("Right hand landmarks:")
                for id, lm in enumerate(results.right_hand_landmarks.landmark):
                    print(f"ID: {id}, X: {lm.x}, Y: {lm.y}, Z: {lm.z}")

        # Increment the frame counter
        frame_count += 1

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

# Release the video capture and close windows
cap.release()
cv2.destroyAllWindows()


Downloading model to /usr/local/lib/python3.10/dist-packages/mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite


In [None]:
import cv2
import mediapipe as mp
import time
import numpy as np

# Initialize MediaPipe Holistic model
mp_holistic = mp.solutions.holistic
holistic = mp_holistic.Holistic(static_image_mode=False)

# Capture webcam input
cap = cv2.VideoCapture(0)
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))

fps = 10
duration = 5  # seconds
frame_count = fps * duration

frames = []
landmarks_data = []

# Start recording for 5 seconds
print("Recording started...")
start_time = time.time()
while len(frames) < frame_count:
    ret, frame = cap.read()
    if not ret:
        print("Failed to capture frame.")
        break

    frames.append(frame)

    # Convert BGR image to RGB for processing
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process with holistic model
    results = holistic.process(rgb_frame)

    # Save pose, face, and hand landmarks
    pose_landmarks = results.pose_landmarks
    face_landmarks = results.face_landmarks
    left_hand_landmarks = results.left_hand_landmarks
    right_hand_landmarks = results.right_hand_landmarks

    # Prepare vectors
    frame_landmarks = {}

    if pose_landmarks:
        frame_landmarks['pose'] = np.array([[lm.x, lm.y, lm.z] for lm in pose_landmarks.landmark])

    if face_landmarks:
        frame_landmarks['face'] = np.array([[lm.x, lm.y, lm.z] for lm in face_landmarks.landmark])

    if left_hand_landmarks:
        frame_landmarks['left_hand'] = np.array([[lm.x, lm.y, lm.z] for lm in left_hand_landmarks.landmark])

    if right_hand_landmarks:
        frame_landmarks['right_hand'] = np.array([[lm.x, lm.y, lm.z] for lm in right_hand_landmarks.landmark])

    landmarks_data.append(frame_landmarks)

    if time.time() - start_time >= duration:
        break

cap.release()
print("Recording finished.")

# Function to return the data points as vectors
def get_landmarks_as_vectors(landmarks_data):
    vectors = []
    for frame in landmarks_data:
        frame_vectors = {}
        if 'pose' in frame:
            frame_vectors['pose'] = frame['pose']
        if 'face' in frame:
            frame_vectors['face'] = frame['face']
        if 'left_hand' in frame:
            frame_vectors['left_hand'] = frame['left_hand']
        if 'right_hand' in frame:
            frame_vectors['right_hand'] = frame['right_hand']

        vectors.append(frame_vectors)
    return vectors

# Get the landmarks as vectors
landmarks_vectors = get_landmarks_as_vectors(landmarks_data)

# Output the vectors for each frame
for i, vectors in enumerate(landmarks_vectors):
    print(f"Frame {i+1}:")
    for landmark_type, coords in vectors.items():
        print(f"  {landmark_type}: {coords}")

Recording started...
Failed to capture frame.
Recording finished.
