### Hand Tracking with OpenCV and Mediapipe!

**Steps to Get Started:**
1. Ensure you have Python 3.11.9 installed on your computer (we will go through how to set it up)
2. Create a virtual environment using this python version
3. pip install requirements.txt (ensures no issues with different versions)
4. Once you've installed the necessary packages, you're ready to begin!

In [1]:
import pyautogui
import cv2
import mediapipe as mp
import numpy as np
import time

print("Everything imported successfully!")

Everything imported successfully!


In [2]:
mpHands = mp.solutions.hands
hands = mpHands.Hands(max_num_hands=1, min_detection_confidence=0.7)
mpDraw = mp.solutions.drawing_utils

In [3]:
s = pyautogui.size()

In [4]:
# Initialize webcam
cap = cv2.VideoCapture(0)

while True:
    # Capture a frame from the webcam
    ret, frame = cap.read()
    if not ret:
        break

    # Flip the frame horizontally for a mirror effect
    frame = cv2.flip(frame, 1)

    # Display the frame in a window titled 'Output'
    cv2.imshow('Output', frame)

    # Exit the loop if 'q' is pressed in the OpenCV window
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the webcam and close any OpenCV windows
cap.release()
cv2.destroyAllWindows()
cv2.waitKey(1) # delay of 1 second each time it reads a key press

-1

In [6]:
#Hand tracking
# Initialize webcam
cap = cv2.VideoCapture(0)
while True:
    # capture webcame frame and shape (width and height)
    ret, frame = cap.read()
    # defining the frame
    frame = cv2.flip(frame, 1)
    # -------- ADD THIS CODE NEXT ---------------
    # have mediapipe hands predict hand landmarks
    x, y, c = frame.shape
    result = hands.process(frame)
    # iterate through the predicted landmarks adjusting them to the window, and
    # and outputting them to the opencv window
    if result.multi_hand_landmarks:
        landmarks = []
        for handslms in result.multi_hand_landmarks:
            for lm in handslms.landmark:
                lmx = int(lm.x * x)
                lmy = int(lm.y * y)
                landmarks.append([lmx, lmy])
            mpDraw.draw_landmarks(frame, handslms, mpHands.HAND_CONNECTIONS)
    # ------- FINISHED ADDING NEW CODE ----------
    cv2.imshow('Output', frame)
    # if q is pressed, program exits
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()
cv2.waitKey(1)

-1

<img src="hand_tracking_landmarks.webp" alt="Hand Tracker Visualization with Mediapipe" width="400">

In [8]:
#Moving Cursor
# Initialize webcam
cap = cv2.VideoCapture(0)
while True:
    # capture webcame frame and shape (width and height)
    ret, frame = cap.read()
    # defining the frame
    frame = cv2.flip(frame, 1)
    x, y, c = frame.shape
    # have mediapipe hands predict hand landmarks
    result = hands.process(frame)
    # iterate through the predicted landmarks adjusting them to the window, and
    # and outputting them to the opencv window
    if result.multi_hand_landmarks:
        landmarks = []
        for handslms in result.multi_hand_landmarks:
            # take the 8th landmark (index finger point) and move the cursor to that landmarks x and y value
            # ADD THE LINE BELOW NEXT
            index_x = int(handslms.landmark[8].x * s[0])
            index_y = int(handslms.landmark[8].y * s[1])
            pyautogui.moveTo(index_x, index_y, _pause=False)
            for lm in handslms.landmark:
                lmx = int(lm.x * x)
                lmy = int(lm.y * y)
                landmarks.append([lmx, lmy])
            mpDraw.draw_landmarks(frame, handslms, mpHands.HAND_CONNECTIONS)

    cv2.imshow('Output', frame)
    # if q is pressed, program exits
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()
cv2.waitKey(1)

-1

In [10]:
#Clicking
# Initialize webcam
cap = cv2.VideoCapture(0)
# -------- ADD THIS VARIABLE ---------------
last_click = 0
while True:
    # capture webcame frame and shape (width and height)
    ret, frame = cap.read()
    x, y, c = frame.shape
    # defining the frame
    frame = cv2.flip(frame, 1)
    # have mediapipe hands predict hand landmarks
    result = hands.process(frame)
    # iterate through the predicted landmarks adjusting them to the window, and
    # and outputting them to the opencv window
    if result.multi_hand_landmarks:
        landmarks = []
        for handslms in result.multi_hand_landmarks:
            # take the 8th landmark (index finger point) and move the cursor to that landmarks x and y value
            pyautogui.moveTo(int(handslms.landmark[8].x * s[0]), int(handslms.landmark[8].y * s[1]), _pause=False)

            # -------- ADD THIS CODE NEXT ---------------
            # Detect click gesture by calculating distance between thumb (landmark 4) and index (landmark 8)
            thumb_x = int(handslms.landmark[4].x * s[0])
            thumb_y = int(handslms.landmark[4].y * s[1])
            distance = ((index_x - thumb_x) ** 2 + (index_y - thumb_y) ** 2) ** 0.5

            # If distance is small enough, simulate a click
            if distance < 40 and time.time() - last_click > 0.5:
                pyautogui.click()
                last_click = time.time()
            # ------- FINISHED ADDING NEW CODE ----------

            # for lm in handslms.landmark:
            #     lmx = int(lm.x * x)
            #     lmy = int(lm.y * y)
            #     landmarks.append([lmx, lmy])
            mpDraw.draw_landmarks(frame, handslms, mpHands.HAND_CONNECTIONS)
    cv2.imshow('Output', frame)
    # if q is pressed, program exits
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()
cv2.waitKey(1)

-1

In [6]:
def sum_dict(dict):
    result = 0
    for key in dict:
        result += dict[key]
    return result

In [None]:
# Adding Finger Counter

# initialize web cam
cap = cv2.VideoCapture(0)

while True:

    # capture webcame frame and shape (width and height)
    ret, frame = cap.read()

    if not ret:
        break

    # flip frame for mirror effect
    frame = cv2.flip(frame, 1)

    h, w, c = frame.shape

    # convert to rgb for mediapipe
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # have mediapipe hands predict hand landmarks
    result = hands.process(rgb_frame)

    # dict of fingers
    fingers = {"4": 0, "8": 0, "12": 0, "16": 0, "20": 0}


    # iterate through the predicted landmarks adjusting them to the window, and
    # and outputting them to the opencv window
    if result.multi_hand_landmarks:
        for handslms in result.multi_hand_landmarks:

            # thumb coordinates

            # a note here is we have to scale coordinates relative to the frame (height, width)
            x4, y4 = int(handslms.landmark[4].x * w), int(handslms.landmark[4].y * h)
            x2, y2 = int(handslms.landmark[2].x * w), int(handslms.landmark[2].y * h)

            # check if thumb is up
            if x4 > x2:
                fingers['4'] = 1
            
            # index finger coordinates
            x8, y8 = int(handslms.landmark[8].x * w), int(handslms.landmark[8].y * h)
            x6, y6 = int(handslms.landmark[6].x * w), int(handslms.landmark[6].y * h)

            if y8 < y6:
                fingers['8'] = 1
            
            # middle finger
            x12, y12 = int(handslms.landmark[12].x * w), int(handslms.landmark[12].y * h)
            x10, y10 = int(handslms.landmark[10].x * w), int(handslms.landmark[10].y * h)

            if y12 < y10:
                fingers['12'] = 1
            
            # ring finger
            x16, y16 = int(handslms.landmark[16].x * w), int(handslms.landmark[16].y * h)
            x14, y14 = int(handslms.landmark[14].x * w), int(handslms.landmark[14].y * h)

            if y16 < y14:
                fingers['16'] = 1
            
            # pinky finger
            x20, y20 = int(handslms.landmark[20].x * w), int(handslms.landmark[20].y * h)
            x18, y18 = int(handslms.landmark[18].x * w), int(handslms.landmark[18].y * h)
        
            if y20 < y18:
                fingers['20'] = 1

            mpDraw.draw_landmarks(frame, handslms, mpHands.HAND_CONNECTIONS)

    # count raised fingers
    number_fingers = sum_dict(fingers)

    cv2.rectangle(frame, (25, 150), (100, 400), (0, 128, 0), cv2.FILLED)
    cv2.putText(frame, str(number_fingers), (35, 300), cv2.FONT_HERSHEY_PLAIN,
                3, (0, 71, 71), 2)
    
    cv2.imshow('Output', frame)
    # if q is pressed, program exits
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()
cv2.waitKey(1)
    

-1

Article if you have trouble understanding! https://medium.com/@Mert.A/how-to-create-a-finger-counter-with-python-and-mediapipe-cc6c3911ad09