In [1]:
import mediapipe as mp
import cv2
import numpy as np
import uuid
import os
import json
import csv

In [2]:
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

In [3]:
#variables for recording video
recording = False
out = None
all_landmarks = []

In [23]:
# Open the video file
cap = cv2.VideoCapture(r"C:\Users\ravik\practice\projects\Hand Pose detection\mediapipe videos\OBS recordings\participant_10.mkv")

# Video writer to save the processed video
filename = f"{uuid.uuid4()}.mp4"
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS)) or 30
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(os.path.join("mediapipe videos", filename), fourcc, fps, (frame_width, frame_height))

In [24]:
with mp_hands.Hands(min_detection_confidence=0.8, min_tracking_confidence=0.5,max_num_hands=2) as hands:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("End of video")
            break
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  #cv2 reads BGR format, mediapipe uses RGB format, so convert
        image.flags.writeable =False

        frame_width, frame_height = frame.shape[1], frame.shape[0]
        landmarks_3d = [] #to store the xyz coordinates of each joint
        #mediapipe processing the image
        results = hands.process(image)

        # Create a blank frame with the same dimensions as the video frame
        blank_frame = np.zeros_like(frame)
        
        image.flags.writeable=True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        if results.multi_hand_landmarks: #gives the coordinates of hand if a hand is detected
            for num, hand in enumerate(results.multi_hand_landmarks):
                mp_drawing.draw_landmarks(image, hand, mp_hands.HAND_CONNECTIONS,  #use image instead of blank_frame to save actual video
                                         mp_drawing.DrawingSpec(color=(255, 0, 21), thickness=2, circle_radius=2), #for joints and dots
                                         mp_drawing.DrawingSpec(color=(16, 255, 0), thickness=2, circle_radius=2)) #for connections and lines
                #this is to store the x,y,z co-ordinates
                for lm in hand.landmark:
                    x = int(lm.x * frame_width)
                    y = int(lm.y * frame_height)
                    z = lm.z  # Depth can be scaled if needed
                    landmarks_3d.append((x, y, z))

                if recording and landmarks_3d:
                    all_landmarks.append(landmarks_3d)  # Append current frame landmarks
                    
         # Save the processed frame to the video
        out.write(image)

    # Write landmarks to CSV file
    datafile = filename.replace(".mp4", ".csv")
    with open(os.path.join("coordinates data", datafile), mode='w', newline='') as f:
        csv_writer = csv.writer(f)
        header = [f'joint_{i}_{axis}' for i in range(21) for axis in ['x', 'y', 'z']]
        csv_writer.writerow(header)
        for frame_landmarks in all_landmarks:
            row = [value for joint in frame_landmarks for value in joint]
            csv_writer.writerow(row)
            
cap.release()
out.release()
cv2.destroyAllWindows()
print("Processing and saving completed.")

End of video
Processing and saving completed.
