# Installing all the dependencies 

In [1]:
import numpy as np
import cv2 
import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands
from pathlib import Path
import glob
import os 

# Adding Mediapipe Holistic 

In [2]:
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic


# Getting real time videocam

In [3]:


def modifyframe(frame):
    # Example: Convert to grayscale
    return frame


In [4]:
def start():
    cap = cv2.VideoCapture(0, cv2.CAP_V4L2)

    # Set camera properties
    cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG'))
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
    cap.set(cv2.CAP_PROP_FPS, 30)

    if not cap.isOpened():
        print("Camera couldn't be opened.")
        return

    print("Press 'q' to quit.")

    with mp_holistic.Holistic(
        min_detection_confidence=0.5, 
        min_tracking_confidence=0.5,
                 ) as holistic:
        # Defined parameters to the holistic model 
        while True:
            ret, frame = cap.read()
            if not ret:
                print("Failed to grab frame.")
                break

            # Apply any preprocessing (e.g., grayscale, filters)
            modified = modifyframe(frame)
            frame = cv2.flip(modified, 1)

            # To improve performance, mark the image as not writeable
            frame.flags.writeable = False
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = holistic.process(rgb_frame)

            # Draw landmark annotation on the image
            frame.flags.writeable = True
            frame = cv2.cvtColor(rgb_frame, cv2.COLOR_RGB2BGR)

            # Draw face landmarks
            mp_drawing.draw_landmarks(
                frame,
                results.face_landmarks,
                mp_holistic.FACEMESH_CONTOURS
            )
            
            # Draw pose landmarks
            mp_drawing.draw_landmarks(
                frame,
                results.pose_landmarks,
                mp_holistic.POSE_CONNECTIONS,
                landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()
            )
            
            # Draw left-hand landmarks
            mp_drawing.draw_landmarks(
                frame,
                results.left_hand_landmarks,
                mp_holistic.HAND_CONNECTIONS
            )

            # Draw right-hand landmarks
            mp_drawing.draw_landmarks(
                frame,
                results.right_hand_landmarks,
                mp_holistic.HAND_CONNECTIONS
            )


            # Show the frame
            cv2.imshow("Landmarks", frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    cap.release()
    cv2.destroyAllWindows()


In [5]:
start()

Press 'q' to quit.


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
qt.qpa.plugin: Could not find the Qt platform plugin "wayland" in ""


In [6]:

  

print(mp.__version__)

0.10.11


# Data Exploration 

In [101]:

video_dir = Path("Data/INCLUDE/Home/Videos/Table")
landmark_dir = Path("Data/INCLUDE/Home/Landmarks/Table")


def show():
    #load the first video 
    avi_files = list(video_dir.glob("*.MP4"))
    
    # Check if at least one .avi file was found
    if avi_files:
        first_video_path = avi_files[1]  # This is a Path object
        cap = cv2.VideoCapture(str(first_video_path))  # Convert to string for OpenCV
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 580)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
    
        if not cap.isOpened():
            print("Error: Could not open video.")
        else:
            print(f"Playing: {first_video_path.name}")
            while True:
                ret, frame = cap.read()
                if not ret:
                    break
    
                cv2.imshow('Video Playback', frame)
    
                # Press 'q' to quit
                if cv2.waitKey(25) & 0xFF == ord('q'):
                    break
        cap.release()
        cv2.destroyAllWindows()
            
    else:
        print("No .avi files found.")




In [102]:
#show()

## Converting to Landmarks and Saving as .npy files 


In [103]:
#Initialize mediapipe holistic\
#https://github.com/google-ai-edge/mediapipe/blob/master/docs/solutions/holistic.md

holistic = mp_holistic.Holistic(
    static_image_mode = False,
    model_complexity =1,
    smooth_landmarks = True,
    refine_face_landmarks = True,
    min_detection_confidence = 0.5,
    min_tracking_confidence = 0.5
)



# Globals to store expected face landmark count
EXPECTED_FACE_LANDMARKS = None

# Helper to extract and concatenate landmarks
def extract_frame_landmarks(results):
    global EXPECTED_FACE_LANDMARKS
    lm = []
    
    # Right hand (21 points × x,y)
    if results.right_hand_landmarks:
        for p in results.right_hand_landmarks.landmark:
            lm.extend([p.x, p.y])
    else:
        lm.extend([0] * 42)
        
    # Left hand3
    if results.left_hand_landmarks:
        for p in results.left_hand_landmarks.landmark:
            lm.extend([p.x, p.y])
    else:
        lm.extend([0] * 42)
        
    # Face (468 points × x,y = 936 values)
    face_lms =  results.face_landmarks.landmark if results.face_landmarks else []
    count = len(face_lms)

    if EXPECTED_FACE_LANDMARKS is None:
        EXPECTED_FACE_LANDMARKS = count or 468

    for i in range(min(count,EXPECTED_FACE_LANDMARKS)):
        p = face_lms[i]
        lm.extend([p.x,p.y])

    missing = EXPECTED_FACE_LANDMARKS - min(count, EXPECTED_FACE_LANDMARKS)
    if missing > 0:
        lm.extend([0.0, 0.0] * missing)

    
    # ---- Final consistency check ----
    total_features = 42 * 2 + EXPECTED_FACE_LANDMARKS * 2
    if len(lm) != total_features:
        raise ValueError(f"Inconsistent landmark length {len(lm)} vs expected {total_features}")

    return np.array(lm, dtype=np.float32)



def process_video(video_dir, landmark_dir):
    for file in os.listdir(video_dir):
        if not file.lower().endswith((".avi", ".mp4",".mov")):
            continue
            
        video_path = os.path.join(video_dir, file)
        
        cap = cv2.VideoCapture(video_path)
        frames_lm = []
    
        print(f"Processing {file}...")
        frame_count = 0
    
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            # Convert BGR to RGB
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = holistic.process(rgb_frame)
            
            # Extract and store landmarks
            frame_landmarks = extract_frame_landmarks(results)  # Store in variable
            frames_lm.append(frame_landmarks)                   # Append to list
            print(f"Frame {frame_count}: shape = {frame_landmarks.shape}")  # Now defined!
            
            frame_count += 1
    
        cap.release()

        # Check if all frames have same shape before stacking
        if frames_lm:
            shapes = [arr.shape for arr in frames_lm]
            print(f"All frame shapes: {set(shapes)}")  # Should show only one unique shape
            
            # Stack into [num_frames × num_features]
            sequence = np.stack(frames_lm, axis=0)
            # Save to .npy
            out_file = os.path.join(landmark_dir, file.rsplit(".", 1)[0] + ".npy")
            np.save(out_file, sequence)
            print(f"Saved landmarks to {out_file} with shape {sequence.shape}")
        
    holistic.close()


W0000 00:00:1758099402.637559   41388 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1758099402.649682   41396 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1758099402.651130   41390 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1758099402.651702   41389 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1758099402.652001   41392 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1758099402.661800   41389 inference_feedback_manager.cc:114] Feedback manager 

In [104]:
process_video(video_dir,landmark_dir)

Processing MVI_4344.MOV...
Frame 0: shape = (1040,)
Frame 1: shape = (1040,)
Frame 2: shape = (1040,)
Frame 3: shape = (1040,)
Frame 4: shape = (1040,)
Frame 5: shape = (1040,)
Frame 6: shape = (1040,)
Frame 7: shape = (1040,)
Frame 8: shape = (1040,)
Frame 9: shape = (1040,)
Frame 10: shape = (1040,)
Frame 11: shape = (1040,)
Frame 12: shape = (1040,)
Frame 13: shape = (1040,)
Frame 14: shape = (1040,)
Frame 15: shape = (1040,)
Frame 16: shape = (1040,)
Frame 17: shape = (1040,)
Frame 18: shape = (1040,)
Frame 19: shape = (1040,)
Frame 20: shape = (1040,)
Frame 21: shape = (1040,)
Frame 22: shape = (1040,)
Frame 23: shape = (1040,)
Frame 24: shape = (1040,)
Frame 25: shape = (1040,)
Frame 26: shape = (1040,)
Frame 27: shape = (1040,)
Frame 28: shape = (1040,)
Frame 29: shape = (1040,)
Frame 30: shape = (1040,)
Frame 31: shape = (1040,)
Frame 32: shape = (1040,)
Frame 33: shape = (1040,)
Frame 34: shape = (1040,)
Frame 35: shape = (1040,)
Frame 36: shape = (1040,)
Frame 37: shape = (10

In [28]:
#find average frames

video_dir = video_dir
landmark_dir = landmark_dir


file_list = [f for f in os.listdir(landmark_dir) if f.endswith(".npy")]
file_list.sort()

# Extract the number of frames (shape[0]) from each file
frame_counts = [np.load(os.path.join(landmark_dir, f)).shape[0] for f in file_list]

# Compute the average
average_frame_count = np.mean(frame_counts)

print(f"Average number of frames = {average_frame_count}")   

Average number of frames = 62.6
