In [1]:
import cv2
import numpy as np

In [1]:
import cv2
import numpy as np
from IPython.display import clear_output, Image, display
import ipywidgets as widgets
from PIL import Image as PILImage
import io

In [2]:
def initialize_video(video_path):
    """Initialize the video and return the video capture object"""
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise IOError("Cannot open video file")
    return cap

In [None]:
video_path = "hand_tracking.mp4"

cap = initialize_video(video_path)

# Get video properties
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

canvas = np.zeros((frame_height, frame_width, 3), dtype=np.uint8)

while True:
    ret, frame = cap.read()
    if not ret:
        # If video ends, loop back to start
        cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
        continue
        
    cv2.imshow('Original', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Clean up
cap.release()
cv2.destroyAllWindows()

## IPY

In [None]:
def display_frame(frame, processed=None):
    """Display frame(s) in Jupyter notebook"""
    
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Display side by side if we have two frames
    if processed is not None:
        processed_rgb = cv2.cvtColor(processed, cv2.COLOR_BGR2RGB)
        
        combined = np.hstack((rgb_frame, processed_rgb))
        pil_img = PILImage.fromarray(combined)
    else:
        pil_img = PILImage.fromarray(rgb_frame)
    
    # Create binary stream
    bio = io.BytesIO()
    pil_img.save(bio, format='PNG')
    
    # Display using IPython
    display(Image(data=bio.getvalue()))
    clear_output(wait=True)  

In [None]:
def preprocess_frame(frame):
    """Preprocess the frame for hand detection"""
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    
    # Apply Gaussian blur to reduce noise
    blurred = cv2.GaussianBlur(hsv, (5, 5), 0)
    
    # Define skin color range in HSV (not perfect. Capturing range of skin colours is challenging)
    lower_skin = np.array([0, 20, 70], dtype=np.uint8)
    upper_skin = np.array([20, 255, 255], dtype=np.uint8)
    
    skin_mask = cv2.inRange(blurred, lower_skin, upper_skin)
    
    # Apply morphological operations to clean up the mask
    kernel = np.ones((5,5), np.uint8)
    skin_mask = cv2.erode(skin_mask, kernel, iterations=2)
    skin_mask = cv2.dilate(skin_mask, kernel, iterations=2)
    
    skin = cv2.bitwise_and(frame, frame, mask=skin_mask)
    
    return skin, skin_mask

In [None]:
def detect_hand(frame, skin_mask):
    """Detect hand contour and fingertips"""
    
    contours, _ = cv2.findContours(skin_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # If no contours found, return original frame
    if not contours:
        return frame, None, None
    
    # Get the largest contour (assumed to be the hand)
    # This assumption is false. It doesn't work well and generalize. 
    # There are other areas bigger than the hands in the video
    hand_contour = max(contours, key=cv2.contourArea)
    
    hull = cv2.convexHull(hand_contour, returnPoints=False)
    defects = cv2.convexityDefects(hand_contour, hull)
    
    output = frame.copy()
    
    cv2.drawContours(output, [hand_contour], -1, (0, 255, 0), 2)
    
    fingertips = []
    if defects is not None:
        for i in range(defects.shape[0]):
            s, e, f, d = defects[i, 0]
            start = tuple(hand_contour[s][0])
            end = tuple(hand_contour[e][0])
            far = tuple(hand_contour[f][0])
            
            a = np.sqrt((end[0] - start[0])**2 + (end[1] - start[1])**2)
            b = np.sqrt((far[0] - start[0])**2 + (far[1] - start[1])**2)
            c = np.sqrt((end[0] - far[0])**2 + (end[1] - far[1])**2)
            angle = np.arccos((b**2 + c**2 - a**2)/(2*b*c)) * 57
            
            if angle <= 90:
                fingertips.append(end)
                cv2.circle(output, end, 5, (0, 0, 255), -1)
    
    return output, hand_contour, fingertips

In [None]:
video_path = "hand_tracking.mp4"

cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise IOError("Cannot open video file")

# Get video properties
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

canvas = np.zeros((frame_height, frame_width, 3), dtype=np.uint8)

try:
    while True:
        ret, frame = cap.read()
        if not ret:
            cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
            continue
        
        skin, skin_mask = preprocess_frame(frame)
        
        # Plot skin and skin_mask
        cv2.imshow('Skin', skin)
        cv2.imshow('Skin Mask', skin_mask)
        
        processed_frame, hand_contour, fingertips = detect_hand(frame, skin_mask)
        
        display_frame(frame, processed_frame)
        
        cv2.waitKey(int(1000/fps))
        
except KeyboardInterrupt:
    print("Interrupted by user")
finally:
    cap.release()

Interrupted by user
