In [10]:
# # Install the required library
# !pip install ultralytics opencv-python-headless matplotlib numpy

# Import necessary modules
from ultralytics import YOLO
import cv2
import numpy as np
import json
import matplotlib.pyplot as plt


In [11]:
# Load the YOLO model for pose detection
model = YOLO('yolov8n-pose.pt')  # Using YOLOv8 with pre-trained pose weights


In [12]:
def load_image(image_path):
    """
    Load an image from the provided path.
    """
    image = cv2.imread(image_path)
    if image is None:
        raise FileNotFoundError(f"Image not found at {image_path}")
    return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB for display


In [13]:
def annotate_image(image, keypoints, color=(0, 255, 0), radius=5):
    """
    Annotate the image with keypoints.
    """
    for x, y, conf in keypoints:
        if conf > 0.5:  # Confidence threshold
            cv2.circle(image, (int(x), int(y)), radius, color, -1)
    return image


In [14]:
def extract_hand_keypoints(results):
    """
    Extract hand keypoints from YOLO pose detection results.
    Safely handles cases where keypoints are incomplete or missing.
    """
    hand_keypoints = []
    
    for pose in results[0].keypoints:  # Iterate over detected poses
        pose_numpy = pose.cpu().numpy()  # Convert to numpy
        
        # Check if keypoints array is large enough to include hands
        if pose_numpy.shape[0] > 10:  # Ensure we have at least 11 keypoints
            left_hand = pose_numpy[9]  # Index 9 corresponds to the left hand
            right_hand = pose_numpy[10]  # Index 10 corresponds to the right hand
        else:
            # Handle missing keypoints by setting default values
            left_hand = [None, None]
            right_hand = [None, None]
        
        # Append the detected hands
        hand_keypoints.append({
            "hand1": [float(left_hand[0]) if left_hand[0] is not None else None,
                      float(left_hand[1]) if left_hand[1] is not None else None],
            "hand2": [float(right_hand[0]) if right_hand[0] is not None else None,
                      float(right_hand[1]) if right_hand[1] is not None else None]
        })
    
    return hand_keypoints


In [15]:
def detect_pose(image_path):
    """
    Perform pose detection on the input image.
    Returns annotated image and hand keypoints.
    """
    # Load the image
    image = load_image(image_path)
    
    # Run YOLO pose detection
    results = model(image)
    
    # Extract human keypoints
    hand_keypoints = extract_hand_keypoints(results)
    
    # Annotate the image with hand keypoints
    annotated_image = image.copy()
    for keypoints in hand_keypoints:
        if keypoints["hand1"][0] is not None and keypoints["hand1"][1] is not None:
            cv2.circle(annotated_image, 
                       (int(keypoints["hand1"][0]), int(keypoints["hand1"][1])), 
                       radius=5, color=(0, 255, 0), thickness=-1)
        if keypoints["hand2"][0] is not None and keypoints["hand2"][1] is not None:
            cv2.circle(annotated_image, 
                       (int(keypoints["hand2"][0]), int(keypoints["hand2"][1])), 
                       radius=5, color=(255, 0, 0), thickness=-1)
    
    return annotated_image, hand_keypoints


In [16]:
def save_to_json(image_name, hand_keypoints, output_path="output.json"):
    """
    Save hand keypoints to a JSON file.
    """
    json_data = {"hands": [{"imagename": image_name, "hand1": hk["hand1"], "hand2": hk["hand2"]} for hk in hand_keypoints]}
    with open(output_path, "w") as json_file:
        json.dump(json_data, json_file, indent=4)


In [None]:
# Input image path
image_path = "test_image_1.jpg"  # Replace with your image path
image_name = image_path.split("/")[-1]

# Perform pose detection
annotated_image, hand_keypoints = detect_pose(image_path)

# Save JSON results
save_to_json(image_name, hand_keypoints, "hand_keypoints.json")

# Display annotated image
plt.figure(figsize=(10, 10))
plt.imshow(annotated_image)
plt.axis("off")
plt.title("Annotated Image with Hand Keypoints")
plt.show()



0: 448x640 1 person, 157.6ms
Speed: 11.8ms preprocess, 157.6ms inference, 13.4ms postprocess per image at shape (1, 3, 448, 640)
