### Hand tracking
This algorithm detects and tracks the index finger of the hand to create a drawing when the index finger is extended. If the index finger is not extended, no drawing is made. When no hand is detected, the generated drawing is saved.  

Assumptions for the algorithm:

- Only one hand will be present on camera.
- The hand should be positioned in the middle of the video frame's height.
- The drawing must be continuous with no discontinuities.

Sources used use the library mediapipe focused on the hand detection.  
https://omes-va.com/mediapipe-hands-python/  
https://omes-va.com/contando-dedos-mediapipe-opencv-python/  
https://www.toolify.ai/es/ai-news-es/domina-la-deteccin-de-manos-y-la-estimacin-de-posturas-con-mediapipe-443934  


In [1]:
import cv2
import mediapipe as mp
import numpy as np

In [2]:
def initialize_video_writer(cap):
    """
    Initialize VideoWriter to save the video 
    with attributes from the video loaded.
        
    Args:
        cap (cv2.VideoCapture): Loaded video.
        
    Returns:
        VideoWriter: Interface for writing video files.
    """
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    fourcc = cv2.VideoWriter_fourcc(*'mp4v') 
    return cv2.VideoWriter('output_hand_video.mp4', fourcc, fps, (frame_width, frame_height))
    
def meanshift_tip_points(index_tip,frame_hsv):
    """
    Set up the ROI for tracking, and the termination criteria.
    Apply meanshift to obtain the new location.
        
    Args:
        index_tip(landmark): Instance of the Landmark class. 
            to obtain the coordinates from the tip.
        frame_hsv (np.ndarray): Current frame from the video in HSV.
        
    Returns:
        int: Coordinate x.
        int: Coordinate y.
    """    
    h, w, _ = frame_hsv.shape
    # Normalize coordinates into pixels
    tip_x, tip_y = int(index_tip.x * w), int(index_tip.y * h)     
    term_crit = ( cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1 )
    w, h = 5, 5 # simply hardcoded the values
    # Setup initial location of window
    track_window = (tip_x, tip_y, w, h)
    # set up the ROI for tracking
    roi = frame_hsv[tip_y:tip_y+h, tip_x:tip_x+w]
    hsv_roi =  cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
    #Establish ranges in HSV for the skin color 
    mask = cv2.inRange(hsv_roi, np.array((9., 102., 173.)), np.array((11., 145., 230.)))
    roi_hist = cv2.calcHist([hsv_roi],[0],mask,[180],[0,180])
    cv2.normalize(roi_hist,roi_hist,0,255,cv2.NORM_MINMAX)
    dst = cv2.calcBackProject([frame_hsv],[0],roi_hist,[0,180],1)
    # Apply meanshift to get the new location
    ret, track_window = cv2.meanShift(dst, track_window, term_crit)
    x,y,_,_ = track_window
    return x,y

def update_draw_points(paint_points, x,y):
    """
    Add new draw points into the array paint_points.
        
    Args:
        paint_points(np.ndarray): List of points from the draw.
        x(int): Coordinate x.
        y(int): Coordinate y.        
    Returns:
        np.ndarray: Array of coordinates [x,y]. 
    """    
    tmp_point = np.array([[x, y]], dtype=np.int32)
    return np.append(paint_points,tmp_point,axis=0)
    
def is_index_finger_pointing(hand_landmarks):
    """
    Method to check if the index finger is pointing or extended. 
        
    Args:
        hand_landmarks(NormalizedLandmarkList): List of landmarks.   
    Returns:
        boolean: If the distance betwwen the tip and base is greater 
            to the distance between base and wrist.
    """   
    tip = np.array([hand_landmarks.landmark[8].x, hand_landmarks.landmark[8].y])
    base = np.array([hand_landmarks.landmark[5].x, hand_landmarks.landmark[5].y])
    wrist = np.array([hand_landmarks.landmark[0].x, hand_landmarks.landmark[0].y])
    #
    distance_tip_base = np.linalg.norm(tip - base)
    distance_base_wrist = np.linalg.norm(base - wrist)
    
    return distance_tip_base > distance_base_wrist

In [3]:

mp_hands = mp.solutions.hands
"""
Setting static_image_mode as False, to not to increase the computational cost, 
we do not apply the detector to all frames. 
As we know from advance in the video is there just one hand.
Set min_detection_confidence with default value.
"""
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)

video_path = "hand_video.mp4"
cap = cv2.VideoCapture(video_path)
out = initialize_video_writer(cap)

# List of points used for the draw
paint_points= np.empty((0, 2), dtype=np.int32)
#Flag to control the creation of the images from the draw.
frame_saved = False

while True:
    ret, frame = cap.read()
    if not ret:
        break
        
    frame_hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(frame_rgb)
    index_tip,term_crit, roi_hist, track_window =None, None, None, None
   
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            
            if is_index_finger_pointing(hand_landmarks):
                index_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP] #Represents the index finger tip
                #Applay the tracking to index finger tip
                x,y=meanshift_tip_points(index_tip,frame_hsv)
                paint_points=update_draw_points(paint_points,x,y)
                cv2.putText(frame, "Drawing", (10, 30),cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                #Draw blue line to follow the index tip
                cv2.polylines(frame, [paint_points], isClosed=False, color=(255,0,0), thickness=2)
            else:
                cv2.putText(frame, "Hand detected not drawing", (10, 30),cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    else:
        if not frame_saved and paint_points.size!=0:
            cv2.polylines(frame, [paint_points], isClosed=False, color=(255,0,0), thickness=2)
            current_frame = int(cap.get(cv2.CAP_PROP_POS_FRAMES))
            cv2.imwrite(f'frame_{current_frame}.jpg', frame)
            frame_saved = True
            cv2.putText(frame, "Saving frame with draw...", (10, 65),cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2)
        cv2.putText(frame, "No hand detected...", (10, 30),cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
        paint_points=np.delete(paint_points, slice(None), axis=0)

    cv2.imshow("Index finger Detection", frame)
    out.write(frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
    frame_saved = False

cap.release()
out.release()
cv2.destroyAllWindows()