In [33]:

import cv2
import mediapipe as mp
import pytesseract
import os

# Initialize MediaPipe Hands for hand detection
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.6)
mp_drawing = mp.solutions.drawing_utils

# Path to Tesseract OCR (modify if needed)
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

def detect_text_positions(image_path):
    """
    Detect text positions in an image using pytesseract.
    Returns a dictionary with detected text and bounding box coordinates.
    """
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # OCR to detect text
    data = pytesseract.image_to_data(gray, output_type=pytesseract.Output.DICT)
    text_positions = {}

    for i in range(len(data['text'])):
        text = data['text'][i].strip()
        if text:  # Ignore empty text
            x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i]
            text_positions[text.upper()] = (x, y, x + w, y + h)

    return text_positions

def map_hand_to_desk(wrist_x, wrist_y, text_positions):
    """
    Map hand (wrist) coordinates to desk labels using text positions.
    """
    for desk_name, (x1, y1, x2, y2) in text_positions.items():
        if x1 <= wrist_x <= x2 and y1 <= wrist_y <= y2:
            return desk_name
    return None

def detect_hands_and_desks(video_path, desk_image_path, output_frames_dir):
    """
    Detect raised hands and map to desks dynamically based on OCR-detected desk labels.
    """
    # Detect desk label positions
    desk_positions = detect_text_positions(desk_image_path)

    cap = cv2.VideoCapture(video_path)
    os.makedirs(output_frames_dir, exist_ok=True)
    frame_count = 0
    hand_detection_sequence = []  # To track the sequence of detected hands

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(rgb_frame)

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                # Get wrist coordinates
                wrist = hand_landmarks.landmark[mp_hands.HandLandmark.WRIST]
                h, w, _ = frame.shape
                wrist_x, wrist_y = int(w * wrist.x), int(h * wrist.y)

                # Map wrist to desk using OCR-detected text positions
                desk_name = map_hand_to_desk(wrist_x, wrist_y, desk_positions)
                if desk_name:
                    print(f"Hand detected at desk: {desk_name}")
                    cv2.putText(frame, f"Hand at {desk_name}", (wrist_x, wrist_y - 10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
                    
                    # Add the desk name to the detection sequence
                    hand_detection_sequence.append(desk_name)

                # Draw landmarks
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
                cv2.circle(frame, (wrist_x, wrist_y), 10, (0, 0, 255), -1)

        # Save frames with hand detection
        output_frame_path = os.path.join(output_frames_dir, f"frame_{frame_count:04d}.jpg")
        cv2.imwrite(output_frame_path, frame)

        # Display frame
        cv2.imshow("Hand Detection", frame)
        frame_count += 1

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

    # Summarize the detection sequence
    detection_summary = {}
    for desk_name in hand_detection_sequence:
        detection_summary[desk_name] = detection_summary.get(desk_name, 0) + 1

    # Print summary
    print("\nHand Detection Summary:")
    for desk_name, count in detection_summary.items():
        print(f"{desk_name}: {count} hand(s) raised")

    # Save stitched output
    stitch_and_save_images(output_frames_dir, detection_summary)

def stitch_and_save_images(output_frames_dir, detection_summary):
    """
    Stitch all saved frames into a single image and save it to the Desktop.
    """
    frame_files = sorted([os.path.join(output_frames_dir, f) for f in os.listdir(output_frames_dir) if f.endswith('.jpg')])
    if not frame_files:
        print("No frames to stitch.")
        return

    # Load all frames and stack them vertically
    frames = [cv2.imread(frame) for frame in frame_files]
    stitched_image = cv2.vconcat(frames)

    # Save the stitched image to Desktop
    desktop_path = os.path.join(os.path.expanduser("~"), "Desktop", "stitched_output.jpg")
    cv2.imwrite(desktop_path, stitched_image)
    print(f"\nStitched image saved to: {desktop_path}")

if __name__ == "__main__":
    video_path = r"C:\Users\ASHIK\Downloads\Video\desk_video.mp4"       # Path to the provided video
    desk_image_path = r"C:\Users\ASHIK\Downloads\IMG desk.png"    # Path to the desk mapping image
    output_frames_dir = "output_frames"

    # Run hand detection
    detect_hands_and_desks(video_path, desk_image_path, output_frames_dir)



Hand Detection Summary:

Stitched image saved to: C:\Users\ASHIK\Desktop\stitched_output.jpg
