In [1]:
import json
import cv2
import numpy as np
import mediapipe as mp
import os
from tqdm.auto import tqdm
import pandas as pd

In [2]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

In [3]:
def mediapipe_detections(img, model):
    img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    img.flags.writeable = False
    results = model.process(img)
    img.flags.writeable = True
    img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    return img,results

In [4]:
def extract_keypoints(results):
    nose = sorted([168,6,197,195,5,4,1,48,278,75,305,42,62,2])
    left_brow = [70,63,105,66,107]
    right_brow = [336,296,334,293,300]
    left_eye = [33,246,161,160,159,158,157,173,133,155,154,153,145,144,163,7]
    right_eye =[362,382,381,380,374,373,390,249,263,466,388,387,386,385,384,398]
    eyes = sorted(left_eye + right_eye + left_brow + right_brow)#42
    
    lips_external = [61,146,91,181,84,17,314,405,321,375,291,409,270,269,267,0,37,39,40,185]
    lips_inner = [78,95,88,178,87,14,317,402,318,324,306,415,310,311,312,13,82,81,80,191,78]
    lips = sorted(lips_external + lips_inner)#41
    
    external_contour = sorted([21, 54, 103, 67, 109, 10, 338, 297, 332, 284, 251, 389, 356,454,323,361,288,397,365,379,378,400,377,152,148,176,149,150,136,172,58,132,93,234,127,162])
    #36
    
    face_indices = sorted(nose+eyes+lips+external_contour)#133
    pose = np.array([[res.x,res.y,res.z,res.visibility] for res in results.pose_landmarks.landmark])if results.pose_landmarks else np.zeros((33,4))#132
    if results.face_landmarks :
        face = []
        for idx in face_indices :
            landmark = results.face_landmarks.landmark[idx]
            x, y, z = landmark.x, landmark.y, landmark.z
            face.append((x, y, z))
        face = np.array(face)
    else :
        face = np.zeros((133,3))
    # face = np.array([[res.x,res.y,res.z] for res in results.face_landmarks.landmark]) if results.face_landmarks else np.zeros((133,3))#339
    left_hand = np.array([[res.x,res.y,res.z] for res in results.left_hand_landmarks.landmark]) if results.left_hand_landmarks else np.zeros((21,3))#63
    right_hand = np.array([[res.x,res.y,res.z] for res in results.right_hand_landmarks.landmark]) if results.right_hand_landmarks else np.zeros((21,3))#63
    # return pose,face,left_hand,right_hand
    return pose.flatten(),face.flatten(),left_hand.flatten(),right_hand.flatten()

In [5]:
df = pd.read_csv('ALL DATA.csv')
df

Unnamed: 0,Video Name,Sentences,Path
0,1.mp4,.انا اسمى حسام حسن,COMBINED_WITHOUT_FLIP\1.mp4
1,2.mp4,.انا عندى 17 سنة,COMBINED_WITHOUT_FLIP\2.mp4
2,3.mp4,انت عامل ايه ؟,COMBINED_WITHOUT_FLIP\3.mp4
3,4.mp4,.هنا مدرسة الأمل للصم,COMBINED_WITHOUT_FLIP\4.mp4
4,5.mp4,.الحمدلله,COMBINED_WITHOUT_FLIP\5.mp4
...,...,...,...
5386,605.mp4,ربنا معاك,ZOOM\605.mp4
5387,606.mp4,انا احب رياضه كره القدم,ZOOM\606.mp4
5388,607.mp4,انا جامعه عين شمس,ZOOM\607.mp4
5389,608.mp4,اللون غامق اللون فاتح,ZOOM\608.mp4


In [6]:
videos_dir = 'Videos'
all_video_keypoints = []
for path in tqdm(df['Path'][:1000]):
    # Get video properties
    cap = cv2.VideoCapture(os.path.join(videos_dir, path))
    with mp.solutions.holistic.Holistic(min_detection_confidence=0.50,
                                        min_tracking_confidence=0.50,
                                        refine_face_landmarks=True,
                                        model_complexity=0) as holistic:
        video_keypoints = []
        while True:
            # Capture frame-by-frame
            ret, frame = cap.read()
            if not ret:
                print(f"{path} Finished reading the video.")
                break

            # Process frame
            frame, results = mediapipe_detections(frame, holistic)
            pose, face, left_hand, right_hand = extract_keypoints(results)

            # Store keypoints for the current frame
            keypoints = pose.tolist() + face.tolist() + left_hand.tolist() + right_hand.tolist()
            video_keypoints.append(keypoints)
            
        num_files = len(video_keypoints)
                
        # Check if the number of JSON files is less than 406
        if num_files < 406:
            for i in range(num_files, 406):
                # Create new JSON file with zero-filled content
                video_keypoints.append([0] * 657)
        # Store keypoints for the current video
        all_video_keypoints.append(video_keypoints)

        # Release the video capture
        cap.release()

  0%|          | 0/1000 [00:00<?, ?it/s]

COMBINED_WITHOUT_FLIP\1.mp4 Finished reading the video.
COMBINED_WITHOUT_FLIP\2.mp4 Finished reading the video.
COMBINED_WITHOUT_FLIP\3.mp4 Finished reading the video.
COMBINED_WITHOUT_FLIP\4.mp4 Finished reading the video.
COMBINED_WITHOUT_FLIP\5.mp4 Finished reading the video.
COMBINED_WITHOUT_FLIP\7.mp4 Finished reading the video.
COMBINED_WITHOUT_FLIP\8.mp4 Finished reading the video.
COMBINED_WITHOUT_FLIP\9.mp4 Finished reading the video.
COMBINED_WITHOUT_FLIP\10.mp4 Finished reading the video.
COMBINED_WITHOUT_FLIP\11.mp4 Finished reading the video.
COMBINED_WITHOUT_FLIP\12.mp4 Finished reading the video.
COMBINED_WITHOUT_FLIP\13.mp4 Finished reading the video.
COMBINED_WITHOUT_FLIP\15.mp4 Finished reading the video.
COMBINED_WITHOUT_FLIP\19.mp4 Finished reading the video.
COMBINED_WITHOUT_FLIP\21.mp4 Finished reading the video.
COMBINED_WITHOUT_FLIP\22.mp4 Finished reading the video.
COMBINED_WITHOUT_FLIP\23.mp4 Finished reading the video.
COMBINED_WITHOUT_FLIP\24.mp4 Finished r

In [7]:
np.array(all_video_keypoints).shape

(1000, 406, 657)

In [8]:
np.save('landmarks1000.npy',all_video_keypoints)