In [1]:
import cv2
import numpy as np
import mediapipe as mp
import torch

In [2]:
def getLabel(results):
    X_test = [[]]
    for i, landmark in enumerate(results.pose_landmarks.landmark):
        X_test[0].append(landmark.x)
        X_test[0].append(landmark.y)
        X_test[0].append(landmark.z)
        X_test[0].append(landmark.visibility)
    X_test, len(X_test[0])

    model = torch.load("models/model_0.pth", weights_only=False)
    model.eval()
    X_test = torch.tensor(X_test, dtype=torch.float).to(device="cuda")
    with torch.inference_mode():
        y_logits = model(X_test)
    y_logits

    y_preds_probs = torch.softmax(y_logits, dim=1)
    return torch.argmax(y_preds_probs)

In [None]:
# Initialize MediaPipe Pose
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(min_detection_confidence=0.7,
                    min_tracking_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils

In [None]:
# Open webcam
cap = cv2.VideoCapture(0)

landmarks = []
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Flip the frame horizontally for a mirrored view
    frame = cv2.flip(frame, 1)

    # Convert the BGR image to RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process the frame with MediaPipe Hands
    results = pose.process(rgb_frame)

    label = getLabel(results)
    color = mp.solutions.drawing_utils.DrawingSpec(color=(255, 0, 0)) if label == 0 else mp.solutions.drawing_utils.DrawingSpec(color=(0, 255, 0))

    # Draw hand landmarks if detected
    if results.pose_landmarks:
        mp_drawing.draw_landmarks(
            frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS, landmark_drawing_spec=color)

    # Display the frame
    cv2.imshow('MediaPipe Hand Tracking', frame)

    # Exit if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
pose.close()
cap.release()
cv2.destroyAllWindows()