In [1]:
pip install opencv-python mediapipe pandas

Note: you may need to restart the kernel to use updated packages.


In [3]:

import cv2
import os
import pandas as pd
import numpy as np
import mediapipe as mp
import time
from datetime import datetime

# Load gesture labels from gesture construction_gestures_template.csv 
GESTURE_CSV = "construction_gestures_template.csv"
DATA_DIR = "gesture_data"
SEQUENCE_LENGTH = 60  # Number of frames per sample

# Load gesture labels
gesture_df = pd.read_csv(GESTURE_CSV)
gesture_labels = gesture_df["Label"].tolist()

# Promt user to choose gesture for recording samples
print("Available Gestures:")
for i, g in enumerate(gesture_labels):
    print(f"[{i}] {g}")
gesture_index = int(input("Select gesture index to record: "))
gesture_name = gesture_labels[gesture_index]
save_path = os.path.join(DATA_DIR, gesture_name)
os.makedirs(save_path, exist_ok=True)
print(f"Recording gesture: {gesture_name}")

# Initialize MediaPipe Holistic
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

holistic = mp_holistic.Holistic(
    static_image_mode=False,
    model_complexity=1,
    smooth_landmarks=True,
    enable_segmentation=False,
    refine_face_landmarks=False,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

# Extract Landmarks from pose and both hands
def extract_landmarks(results):
    landmarks = []

    def get_coords(landmark_list):
        if landmark_list:
            return np.array([[lm.x, lm.y, lm.z] for lm in landmark_list.landmark]).flatten()
        else:
            return np.zeros(21 * 3)

    if results.pose_landmarks:
        landmarks.extend(np.array([[lm.x, lm.y, lm.z] for lm in results.pose_landmarks.landmark]).flatten())
    else:
        landmarks.extend(np.zeros(33 * 3))

    landmarks.extend(get_coords(results.left_hand_landmarks))
    landmarks.extend(get_coords(results.right_hand_landmarks))

    return landmarks

# Start Webcam
cap = cv2.VideoCapture(0)
sample_counter = len(os.listdir(save_path)) + 1

print("\n[INFO] Press 's' to start 5-second countdown for recording, 'q' to quit.")

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        continue

    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = holistic.process(image)
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    
    # Draw landmarks 
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

    # Display current gesture name
    cv2.putText(image, f'Gesture: {gesture_name}', (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
    cv2.imshow("Gesture Recorder", image)

    key = cv2.waitKey(10)
    if key == ord('s'):
        # Countdown of 5 seconds 
        for i in range(5, 0, -1):
            ret, frame = cap.read()
            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
            cv2.putText(image, f"Recording in {i}", (10, 60),
                        cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 255), 3)
            cv2.imshow("Gesture Recorder", image)
            cv2.waitKey(1000)

        # Record 60 frames
        sequence = []
        print("[INFO] Recording gesture now...")
        while len(sequence) < SEQUENCE_LENGTH:
            ret, frame = cap.read()
            if not ret:
                continue

            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = holistic.process(image)
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

            mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
            mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
            mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

            keypoints = extract_landmarks(results)
            sequence.append(keypoints)

            cv2.imshow("Gesture Recorder", image)
            cv2.waitKey(10)

        # Save sequence as a csv file
        filename = f"sample_{sample_counter:03d}.csv"
        file_path = os.path.join(save_path, filename)
        pd.DataFrame(sequence).to_csv(file_path, index=False, header=False)
        print(f"[SAVED] {filename}")
        sample_counter += 1

    elif key == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


Available Gestures:
[0] stop
[1] emergency_stop
[2] hoist
[3] lower_load


Select gesture index to record:  0


Recording gesture: stop

[INFO] Press 's' to start 5-second countdown for recording, 'q' to quit.


