In [1]:
'''
CS585 Image and Video Computing
HW 2 Part 2
--------------
Daniel Kehr U1712152
--------------

HAND SHAPE RECOGNITION
----------------------
A program able to distinguish five (5) distinct hand shapes from the user in a live video feed.
1. Fist
2. Hand Splayed
3. Thumbs Up
4. Peace Sign or Two Fingers Held Up
5. L-Shape

The program will create an outline of the user's hand, a bounding convex hull, and a bounding rectangle.
The program calculates the cicularity and direction of the smallest bounding ellipse of the detected hand,
as well as the solidity (or how much negative space there is between the hand and its convex hull).
The program also creates a centerpoint of the detected shape. The outline and centerpoint are mostly
for user calibration so that they can tell when the light is sufficient and the outline is correct.
'''

import cv2
import sys
import numpy as np

# Global variables
thresh = 100
max_thresh = 255


In [2]:

# function that takes the circularity, solidity, and orientation angle 
# of an object and outputs the hand gesture as a string
def detect_gesture(circ, sld, angle):
    
    #dictionary containing all programmed gestures
    gestures= {"None": .5, "Peace Sign" : 0, "L Shape" : 0, "Thumbs Up" : 0, "Fist" : 0, "Five" : 0}
    
    
    #ranges for circularity, solidity, and angle were found and honed through trial and error
    
    if(circ > 0.34 and circ < 0.36):
        gestures["Peace Sign"] += 1
    elif(circ > 0.48 and circ < 0.51):
        gestures["L Shape"] += 1
    elif(circ > 0.68 and circ < 0.71):
        gestures["Thumbs Up"] += 1  
    elif(circ > 0.54 and circ < 0.56):
        gestures["Fist"] += 1
    elif(circ > 0.73 and circ < 0.83):
        gestures["Five"] += 1   
    
    if(sld > 0.69 and sld < 0.76):
        gestures["Peace Sign"] += 1
    elif(sld > 0.65 and sld < 0.67):
        gestures["L Shape"] += 1
    elif(sld > 0.8 and sld < 0.85):
        gestures["Thumbs Up"] += 1  
    elif(sld > 0.89 and sld < 0.98):
        gestures["Fist"] += 1    
    elif(sld > 0.58 and sld < 0.64):
        gestures["Five"] += 1    
    
    #angle is weighted higher than circularity and solidity in my implementation
    if(angle > 178 and angle < 180):
        gestures["Peace Sign"] += 2
    elif(angle > 148 and angle < 168):
        gestures["L Shape"] += 2
    elif(angle > 5 and angle < 10):
        gestures["Thumbs Up"] += 2  
    elif(angle > 160 and angle < 163):
        gestures["Fist"] += 2
    elif(angle > 3 and angle < 14):
        gestures["Five"] += 2   
    
    # retrieve the gesture with the maximum likelihood
    gesture = "None"
    maxG = 0 
    for g in gestures:
        if(gestures[g] > maxG):
            gesture = g
    return gesture
    
# function that takes in the current frame and outputs the largest contour onscreen given by skin color
def detect_hand(src):
    '''
    Function to detect skin color. Color mask values taken from:
    Vezhnevets, Vladimir, Vassili Sazonov, and Alla Andreeva. "A survey on pixel-based skin color detection techniques." Proc. Graphicon. Vol. 3. 2003.

    '''
    
    # blur image
    src = cv2.blur(src, (3, 3))
    dst = np.zeros(np.shape(src)[:-1], dtype=np.uint8)
    
    # create mask for skin color
    mask = np.logical_and.reduce((src[:,:,0] > 20, src[:,:,1] > 40, src[:,:,2] > 95, 
                                    src.max(axis=-1) - src.min(axis=-1) > 15, 
                                    abs(src[:,:,2] - src[:,:,1]) > 15, 
                                    src[:,:,2] > src[:,:,1], src[:,:,2] > src[:,:,0]))
    
    dst[mask] = 255
        
    # undergo thresholding
    _, thres_output = cv2.threshold(dst, thresh, max_thresh, 0)
    
    #obtain contour
    contours, hierarchy = cv2.findContours(thres_output, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    contour_output = cv2.cvtColor(np.zeros(np.shape(thres_output), dtype='uint8'), cv2.COLOR_GRAY2BGR)
    
    
    if (len(contours) > 0):
        # Find largest contour
        contours = max(contours, key=lambda x: cv2.contourArea(x))
        
    return contours


# function that draws all graphical add-ons (bounding box, convex hull, centerpoint, hand outline, gesture name)
def draw_hand(src):
    
    #retrieve outline of hand
    hand = detect_hand(src)
    
    # retrieve the coordinates of the centroid of the hand
    M = cv2.moments(hand)
    
    cX = 0
    cY = 0
    if(M["m00"] != 0):
        cX = int(M["m10"] / M["m00"])
        cY = int(M["m01"] / M["m00"])
    
    # retrieve the major and minor axes of the hand
    _, (MA,ma), angle = cv2.fitEllipse(hand)
    
    
    # retrieve area of hand
    area = cv2.contourArea(hand)
    # create convex hull around hand
    hull = cv2.convexHull(hand)
    # retrieve area of convex hull
    hull_area = cv2.contourArea(hull)
    
    # divide the area of the hand by the area of its convex hull to gain the solidity
    # (how much the hand fills the hull)
    solidity = 0
    if(hull_area != 0):
        solidity = float(area)/hull_area
        
    # divide the major and minor axes to obtain the circularity of the smallest bounding ellipse
    circularity = 0
    if(ma != 0):
        circularity = MA/ma
    
    # obtain a bounding rectangle from the hand
    boundrec = cv2.boundingRect(hand)
    
    #print(str(circularity) + " " + str(solidity) + " " + str(angle))
    
    # draw hand outline
    cv2.drawContours(src, [hand], -1, (0, 0, 255), 2, 8)
    # draw convex hull
    cv2.drawContours(src, [hull], -1,  (0, 255, 255), 2, 8)
    # draw bounding rectangle
    cv2.rectangle(src, boundrec, (0, 255, 0), 1, 8, 0)
    # draw filled in rectanlge in the top-right corner of the bounding rectangle to put text over
    cv2.rectangle(src, (boundrec[0]+boundrec[2], boundrec[1]), (boundrec[0]+boundrec[2]+150, boundrec[1]+30), (0,255,0), -1)
    # generate text of the given gesture
    cv2.putText(src, detect_gesture(circularity, solidity, angle), (boundrec[0]+boundrec[2]+5, boundrec[1]+22),cv2.FONT_HERSHEY_SIMPLEX, .8, (0, 0, 0), 2)
    # draw centroid
    cv2.circle(src, (cX, cY), 7, (255, 255, 255), -1)
    
    return src

In [6]:

# begin video capture
cap = cv2.VideoCapture(0)

# if not successful, exit program
if not cap.isOpened():
    print("Cannot open the video cam")
    sys.exit()

# read a new frame from video
ret, frame0 = cap.read()
if not ret:
    print("Cannot read a frame from video stream")

cv2.namedWindow("Hand Shape Detection", cv2.WINDOW_AUTOSIZE)

while(1):
    # read a new frame from video
    ret, frame = cap.read()
    # if not successful, break loop
    if not ret:
        print("Cannot read a frame from video stream")
        break
        
    # mirror image for clarity
    frame = cv2.flip(frame, 1)

    # draw and classify the hand
    frame = draw_hand(frame)
    
    cv2.imshow("Hand Shape Detection", frame)

    # wait for 'esc' key press for 30ms. If 'esc' key is pressed, break loop
    if cv2.waitKey(30) == 27:
        print("esc key is pressed by user")
        break

# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()

esc key is pressed by user
