In [2]:

File Name: video_csv_specific_with_landmarks.ipynb
Description: This file contains a simple code for extracting a landmarks from a video and then, it saves the landmark points as csv file at a specified location.

Email:potnurupavan51@gmail.com
Date: 2024-06-14

Project Name: Sign language translation

Dependencies:
- openCV or cv2  (perferably cv2)(used for capturing the image\video data and process)
- mediapipe(used to extract the landmarks)
- csv(to handle the csv type files)
-numpy (used for the manipulation of the numerical values)

Revision History:
- 2024-06-10: Initiation.
- 2024-06-14: Added new features and fixed bugs.

Usage Instructions:
- Run this file using Python 3.x.
- Ensure all dependencies are installed.

In [None]:
import cv2
import numpy as np
import mediapipe as mp
import csv

# Initialize Mediapipe
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # COLOR CONVERSION BGR 2 RGB(Refer the guidelines for clarity)
    image.flags.writeable = False  # Image is no longer writeable
    results = model.process(image)  # Processing of video
    image.flags.writeable = True  # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)  # COLOR COVERSION RGB 2 BGR(Refer the guidelines for clarity)
    return image, results

def extract_keypoints(results):
    # Extract pose landmarks if available
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    
    # Extract hand landmarks if available
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    
    # Extract face landmarks if available
    if results.face_landmarks:
        face_landmarks = results.face_landmarks.landmark
        # Indices for mouth, eyes, and nose
        mouth_indices = [61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291]
        left_eye_indices = [263, 249, 390, 373, 374, 380, 381, 382, 362]
        right_eye_indices = [33, 7, 163, 144, 145, 153, 154, 155, 133]
        nose_indices = [1, 2, 98, 327, 168]
        relevant_indices = mouth_indices + left_eye_indices + right_eye_indices + nose_indices
        face = np.array([[face_landmarks[idx].x, face_landmarks[idx].y, face_landmarks[idx].z] for idx in relevant_indices]).flatten()
    else:
        face = np.zeros(52*3)
    
    return np.concatenate([pose, face, lh, rh])

# collecting the video
cap = cv2.VideoCapture('FAIL.mp4')  #give the video file name

# Initialize the holistic model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    all_landmarks = []
    frame_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        image, results = mediapipe_detection(frame, holistic)
        
        keypoints = extract_keypoints(results)
        all_landmarks.append(keypoints)

        frame_count += 1

    cap.release()

# Convert the list of landmarks to a numpy array
all_landmarks = np.array(all_landmarks)

# Save the landmarks to a CSV file 
np.savetxt('video_landmarks.csv', all_landmarks, delimiter=',')


In [4]:
import pandas as pd # Pandas is used for handling the data manipulation

In [5]:
# Read the CSV file
df = pd.read_csv('video_landmarks.csv')

# Print the DataFrame shape (rows, columns)
print("DataFrame shape:", df.shape)

DataFrame shape: (75, 360)
