In [1]:
!pip install python-dotenv opencv-python pillow datasets tqdm 
!pip install mediapipe 



In [2]:
!curl -o hand_landmarker.task https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 7635k  100 7635k    0     0  14.4M      0 --:--:-- --:--:-- --:--:-- 14.5M


In [1]:
from datasets import load_dataset
import cv2
import numpy as np
from PIL import Image
from io import BytesIO
from pathlib import Path
import math
import traceback

import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision



  from .autonotebook import tqdm as notebook_tqdm


In [10]:
dataset = load_dataset("cj-mills/hagrid-sample-500k-384p", split="train", streaming=True)

output_dir = Path("hand_crops_mediapipe") 
output_dir.mkdir(parents=True, exist_ok=True)

In [13]:

model_path = 'hand_landmarker.task'
BaseOptions = mp.tasks.BaseOptions
HandLandmarker = mp.tasks.vision.HandLandmarker
HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode

options = HandLandmarkerOptions(
    base_options=BaseOptions(model_asset_path=model_path),
    running_mode=VisionRunningMode.IMAGE,
    num_hands=2,
    min_hand_detection_confidence=0.5,
    min_hand_presence_confidence=0.5,
    min_tracking_confidence=0.5)

def calculate_bounding_box(image_width, image_height, landmarks, padding=5):
    if not landmarks: return None
    x_coords = [lm.x * image_width for lm in landmarks]
    y_coords = [lm.y * image_height for lm in landmarks]
    if not x_coords or not y_coords: return None 
    x_min = int(min(x_coords) - padding)
    y_min = int(min(y_coords) - padding)
    x_max = int(max(x_coords) + padding)
    y_max = int(max(y_coords) + padding)
    x1 = max(0, x_min); y1 = max(0, y_min)
    x2 = min(image_width, x_max); y2 = min(image_height, y_max)
    if x1 >= x2 or y1 >= y2: return None
    return x1, y1, x2, y2

print("Starting processing with MediaPipe...")
try:
    with HandLandmarker.create_from_options(options) as landmarker:
        print("MediaPipe Hand Landmarker initialized successfully.")
        processed_count = 0
        for idx, sample in enumerate(dataset):
            if processed_count >= 50:
                print("Processed 50 images. Stopping.")
                break
            try:
                pil_img = sample["image"].convert("RGB")
                img_np = np.array(pil_img)
                h_img, w_img, _ = img_np.shape

                mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=img_np)

                detection_result = landmarker.detect(mp_image)

                
                if detection_result and detection_result.hand_landmarks:
                    num_hands_detected = len(detection_result.hand_landmarks)
                    

                    for hand_idx, hand_landmarks in enumerate(detection_result.hand_landmarks):
                        bbox = calculate_bounding_box(w_img, h_img, hand_landmarks)
                        if bbox is None: continue
                        x1, y1, x2, y2 = bbox
                        hand_crop = img_np[y1:y2, x1:x2]
                        if hand_crop.size == 0: continue
                        hand_crop_resized = cv2.resize(hand_crop, (224, 224))
                        handedness = "unknown"
                        if detection_result.handedness and hand_idx < len(detection_result.handedness):
                            handedness = detection_result.handedness[hand_idx][0].category_name.lower()
                        out_path = output_dir / f"image{idx}_hand{hand_idx}_{handedness}.jpg"
                        cv2.imwrite(str(out_path), cv2.cvtColor(hand_crop_resized, cv2.COLOR_RGB2BGR))

                    processed_count += 1

            except Exception as e_inner:
                print(f"❌ Error processing image {idx}: {e_inner}")
                traceback.print_exc() 

except Exception as e_outer:
    print(f"Error initializing MediaPipe or during loop setup: {e_outer}")
    traceback.print_exc() 

print("Processing finished.")

Starting processing with MediaPipe...
MediaPipe Hand Landmarker initialized successfully.


I0000 00:00:1744849548.741822 3386130 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.4), renderer: Apple M2 Pro
W0000 00:00:1744849548.753776 3423851 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1744849548.760574 3423851 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Processed 50 images. Stopping.
Processing finished.
