In [5]:
from asyncio import Protocol
from contextlib import AbstractContextManager
from typing import Optional, Sequence, Tuple, cast
from mediapipe.python.solutions.hands import Hands
import numpy as np


class SafeHandsResult(Protocol):
    multi_hand_landmarks: Optional[Sequence]
    multi_hand_world_landmarks: Optional[Sequence]
    multi_handedness: Optional[Sequence]

class SafeHands(AbstractContextManager):
	def __init__(self, 
              	static_image_mode: bool = False,
			    max_num_hands: int = 2,
			    model_complexity: int = 1,
			    min_detection_confidence: float = 0.5,
			    min_tracking_confidence: float = 0.5):
		self.max_num_hands = max_num_hands
		self.hands = Hands(static_image_mode = static_image_mode,
			    max_num_hands = max_num_hands,
			    model_complexity = model_complexity,
			    min_detection_confidence = min_detection_confidence,
			    min_tracking_confidence = min_tracking_confidence)
    
	def __enter__(self) -> "SafeHands":
		return self
    
	def __exit__(self, exc_type, exc_value, traceback) -> None:
		self.hands.close()

	def process(self, frame_rgb: np.ndarray) -> Optional[Tuple[np.ndarray, np.ndarray, np.ndarray]]:
		mp_result = cast(SafeHandsResult, self.hands.process(frame_rgb))
		if (not mp_result.multi_hand_landmarks and not mp_result.multi_hand_world_landmarks and not mp_result.multi_handedness):
			return None
		landmarks = np.zeros((self.max_num_hands, 21, 3), dtype=np.float32)
		world_landmarks = np.zeros((self.max_num_hands, 21, 3), dtype=np.float32)
		handedness = np.zeros((self.max_num_hands, 2), dtype=np.float32)
		if mp_result.multi_hand_landmarks:
			for i, hand in enumerate(mp_result.multi_hand_landmarks[:self.max_num_hands]):
				for j, lm in enumerate(hand.landmark):
					landmarks[i, j, 0] = lm.x
					landmarks[i, j, 1] = lm.y
					landmarks[i, j, 2] = lm.z

		if mp_result.multi_hand_world_landmarks:
			for i, hand in enumerate(mp_result.multi_hand_world_landmarks[:self.max_num_hands]):
				for j, lm in enumerate(hand.landmark):
					world_landmarks[i, j, 2] = lm.z
					world_landmarks[i, j, 0] = lm.x
					world_landmarks[i, j, 1] = lm.y

		if mp_result.multi_handedness:
			for i, h in enumerate(mp_result.multi_handedness[:self.max_num_hands]):
				handedness[i, 0] = 0 if h.classification[0].label == "Left" else 1
				handedness[i, 1] = h.classification[0].score

		return landmarks, world_landmarks, handedness


In [None]:
import pandas as pd
from datasets import load_dataset
import numpy as np
import time



# ----------------------------
# Configuración del dataset
# ----------------------------

dataset_name = "Vincent-luo/hagrid-mediapipe-hands"
split_name = "train"
N = 1000  # número de imágenes a procesar

# Cargar dataset en streaming
dataset = load_dataset(dataset_name, split=split_name, streaming=True)

# ----------------------------
# Procesar imágenes y guardar en DataFrame
# ----------------------------

start_timer = True
start_time = 0
with SafeHands(static_image_mode=True, max_num_hands=2) as safe_hands:
    for i, item in enumerate(dataset):
        if start_timer:
            start_time = time.time()
            start_timer = False
        if i >= N:
            break

        img = item['image']
        frame_rgb = np.array(img.convert('RGB'))
        result = safe_hands.process(frame_rgb)
        print(f"Result: {result}")
        del img, frame_rgb

end_time = time.time()
f"Processing finished in {end_time - start_time} seconds."



2025-12-09 02:25:58,287 [MainThread] INFO - HTTP Request: HEAD https://huggingface.co/datasets/Vincent-luo/hagrid-mediapipe-hands/resolve/main/README.md "HTTP/1.1 307 Temporary Redirect"
2025-12-09 02:25:58,295 [MainThread] INFO - HTTP Request: HEAD https://huggingface.co/api/resolve-cache/datasets/Vincent-luo/hagrid-mediapipe-hands/d4a37fd0729013021dcd1d5bef6a172c0f18b914/README.md "HTTP/1.1 200 OK"
2025-12-09 02:25:58,429 [MainThread] INFO - HTTP Request: HEAD https://huggingface.co/datasets/Vincent-luo/hagrid-mediapipe-hands/resolve/d4a37fd0729013021dcd1d5bef6a172c0f18b914/hagrid-mediapipe-hands.py "HTTP/1.1 404 Not Found"
2025-12-09 02:25:58,799 [MainThread] INFO - HTTP Request: HEAD https://s3.amazonaws.com/datasets.huggingface.co/datasets/datasets/Vincent-luo/hagrid-mediapipe-hands/Vincent-luo/hagrid-mediapipe-hands.py "HTTP/1.1 404 Not Found"
2025-12-09 02:25:59,149 [MainThread] INFO - HTTP Request: HEAD https://huggingface.co/datasets/Vincent-luo/hagrid-mediapipe-hands/resolve/

In [None]:

from abc import ABC, abstractmethod
from typing import Any, Callable, Dict, Generic, TypeVar
from datasets import load_dataset
import logging
import time
import numpy as np

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(threadName)s] %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)

import multiprocessing


I = TypeVar("I")
R = TypeVar("R")

class AbstractWorker(ABC, Generic[R, I]):
	def __init__(self, in_queue: multiprocessing.Queue, out_queue: multiprocessing.Queue):
		self.in_queue = in_queue
		self.out_queue = out_queue
	@abstractmethod
	def work(self, item:I) -> R:
		pass
	

class BatchProcessor(Generic[R,I]):
    def __init__(self, n_workers:int, worker_generator:Callable[[multiprocessing.Queue, multiprocessing.Queue], AbstractWorker[R,I]]):
        self.n_workers = n_workers
        self.worker_generator = worker_generator
        self.in_queue = multiprocessing.Queue()
        self.out_queue = multiprocessing.Queue()
        self.workers = []
        self.on:bool = False
        
    def _worker(self) -> None:
        worker = self.worker_generator(self.in_queue, self.out_queue)
        while True:
            item = self.in_queue.get()
            if item is None:
                break
            self.out_queue.put(worker.work(item))
            
    def start(self):
        for _ in range(self.n_workers):
            p = multiprocessing.Process(target=self._worker)
            p.start()
            self.workers.append(p)
        self.on = True

    def stop(self) -> None:
        for _ in range(self.n_workers):
            self.in_queue.put(None)
        for p in self.workers:
            p.join()
        self.workers = []
        self.out_queue.put(None)
        self.on = False

    def enQueue(self, item:I) -> None:
        if self.on == False:
            raise RuntimeError("Processor not started. Call start() before enqueuing items.")
        self.in_queue.put(item)

    def deQueue(self) -> Optional[R] :
        if self.on == False:
            raise RuntimeError("Processor not started. Call start() before enqueuing items.")
        return self.out_queue.get()

    def __enter__(self):
        self.start()
        return self
    
    def __exit__(self, exc_type, exc_value, traceback) -> None:
        self.stop()



def worker_generator(in_queue, out_queue):
	class MPWorker(AbstractWorker[Optional[Dict], np.ndarray]):
		def __init__(self, in_queue, out_queue):
			super().__init__(in_queue, out_queue)
			self.safe_hands = SafeHands(static_image_mode=True, max_num_hands=1)
		def work(self, item:np.ndarray) -> Dict | None:
			logger.info(f"Processing frame...")
			result = self.safe_hands.process(item)
			if result is None:
				return None
			landmarks, world_landmarks, handedness = result
			del item
			return{
				'landmarks': landmarks,
				'world_landmarks': world_landmarks,
				'handedness': handedness
			}
	return MPWorker(in_queue, out_queue)



def result_deamon(processor, n):
    logger.info("Daemon started.")
    i = 0
    while True:
        result = processor.deQueue()
        if i == n:
            logger.info("Daemon finished.")
            break
        logger.info(f"Result: {result}")
        i += 1


if __name__ == "__main__":
    start_time = 0
    dataset_name = "Vincent-luo/hagrid-mediapipe-hands"
    split_name = "train"
    N = 1000
    dataset = load_dataset(dataset_name, split=split_name, streaming=True)
    logger.info(f"Stream started.")

    processor = BatchProcessor(n_workers=5, worker_generator=worker_generator)
    processor.start()

    # Start daemon
    p = multiprocessing.Process(target=result_deamon, args=(processor, N,))
    p.start()
    start_timer = True
    start_time = 0
    for i, item in enumerate(dataset):
        #logger.info(f"item{i} entered pipeline.")
        if start_timer:
            start_time = time.time()
            start_timer = False
        if i > N:
            break
        img = item['image']
        frame_rgb = np.array(img.convert('RGB'))
        processor.enQueue(frame_rgb)
        del img

    # Stop workers and signal daemon
    processor.stop()

    # Wait for daemon to finish
    p.join()
    end_time = time.time()
    logger.info(f"Processing finished in {end_time - start_time} seconds.")
		



2025-12-09 02:17:54,470 [MainThread] INFO - HTTP Request: HEAD https://huggingface.co/datasets/Vincent-luo/hagrid-mediapipe-hands/resolve/main/README.md "HTTP/1.1 307 Temporary Redirect"
2025-12-09 02:17:54,486 [MainThread] INFO - HTTP Request: HEAD https://huggingface.co/api/resolve-cache/datasets/Vincent-luo/hagrid-mediapipe-hands/d4a37fd0729013021dcd1d5bef6a172c0f18b914/README.md "HTTP/1.1 200 OK"
2025-12-09 02:17:54,615 [MainThread] INFO - HTTP Request: HEAD https://huggingface.co/datasets/Vincent-luo/hagrid-mediapipe-hands/resolve/d4a37fd0729013021dcd1d5bef6a172c0f18b914/hagrid-mediapipe-hands.py "HTTP/1.1 404 Not Found"
2025-12-09 02:17:54,927 [MainThread] INFO - HTTP Request: HEAD https://s3.amazonaws.com/datasets.huggingface.co/datasets/datasets/Vincent-luo/hagrid-mediapipe-hands/Vincent-luo/hagrid-mediapipe-hands.py "HTTP/1.1 404 Not Found"
2025-12-09 02:17:55,065 [MainThread] INFO - HTTP Request: HEAD https://huggingface.co/datasets/Vincent-luo/hagrid-mediapipe-hands/resolve/

In [4]:

from datasets import load_dataset
import logging
import time
import numpy as np

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(threadName)s] %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)

import multiprocessing


class BatchProcessor:
    def __init__(self, n_workers, process_fn):
        self.n_workers = n_workers
        self.process_fn = process_fn
        self.in_queue = multiprocessing.Queue()
        self.out_queue = multiprocessing.Queue()
        self.workers = []

    def _worker(self):
        while True:
            item = self.in_queue.get()
            if item is None:
                break
            result = self.process_fn(item)
            self.out_queue.put(result)

    def start(self):
        for _ in range(self.n_workers):
            p = multiprocessing.Process(target=self._worker)
            p.start()
            self.workers.append(p)

    def stop(self):
        for _ in range(self.n_workers):
            self.in_queue.put(None)
        for p in self.workers:
            p.join()
        self.workers = []
        self.out_queue.put(None)

    def enQueue(self, item):
        self.in_queue.put(item)

    def deQueue(self):
        try:
            return self.out_queue.get()
        except Exception:
            return None
     
    def __enter__(self):
        self.start()
        return self
    
    def __exit__(self, exc_type, exc_value, traceback) -> None:
        self.stop()

  

def process(frame_rgb):
	with SafeHands(static_image_mode=True, max_num_hands=2) as safe_hands:
		logger.info(f"Processing frame...")
		result = safe_hands.process(frame_rgb)
		if result is None:
			return None
		landmarks, world_landmarks, handedness = result
		del frame_rgb
		return{
			'landmarks': landmarks,
			'world_landmarks': world_landmarks,
			'handedness': handedness
		}

	


def result_deamon(processor, n):
    logger.info("Daemon started.")
    i = 0
    while True:
        result = processor.deQueue()
        if i == n:
            logger.info("Daemon finished.")
            break
        logger.info(f"Result: {result}")
        i += 1


if __name__ == "__main__":
    dataset_name = "Vincent-luo/hagrid-mediapipe-hands"
    split_name = "train"
    N = 100
    dataset = load_dataset(dataset_name, split=split_name, streaming=True)
    logger.info(f"Stream started.")

    processor = BatchProcessor(n_workers=5, process_fn=process)
    processor.start()

    # Start daemon
    p = multiprocessing.Process(target=result_deamon, args=(processor, N,))
    p.start()

    for i, item in enumerate(dataset):
        #logger.info(f"item{i} entered pipeline.")
        if i > N:
            break
        img = item['image']
        frame_rgb = np.array(img.convert('RGB'))
        processor.enQueue(frame_rgb)
        del img

    # Stop workers and signal daemon
    processor.stop()

    # Wait for daemon to finish
    p.join()
		



2025-12-09 01:10:39,170 [MainThread] INFO - HTTP Request: HEAD https://huggingface.co/datasets/Vincent-luo/hagrid-mediapipe-hands/resolve/main/README.md "HTTP/1.1 307 Temporary Redirect"
2025-12-09 01:10:39,203 [MainThread] INFO - HTTP Request: HEAD https://huggingface.co/api/resolve-cache/datasets/Vincent-luo/hagrid-mediapipe-hands/d4a37fd0729013021dcd1d5bef6a172c0f18b914/README.md "HTTP/1.1 200 OK"
2025-12-09 01:10:39,332 [MainThread] INFO - HTTP Request: HEAD https://huggingface.co/datasets/Vincent-luo/hagrid-mediapipe-hands/resolve/d4a37fd0729013021dcd1d5bef6a172c0f18b914/hagrid-mediapipe-hands.py "HTTP/1.1 404 Not Found"
2025-12-09 01:10:39,643 [MainThread] INFO - HTTP Request: HEAD https://s3.amazonaws.com/datasets.huggingface.co/datasets/datasets/Vincent-luo/hagrid-mediapipe-hands/Vincent-luo/hagrid-mediapipe-hands.py "HTTP/1.1 404 Not Found"
2025-12-09 01:10:39,789 [MainThread] INFO - HTTP Request: HEAD https://huggingface.co/datasets/Vincent-luo/hagrid-mediapipe-hands/resolve/