In [1]:
!pip install imutils



In [2]:
import cv2
import imutils
import numpy as np
from sklearn.metrics import pairwise


In [3]:
# For Game Emulation
from selenium import webdriver
from selenium.webdriver.common.by import By

from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys

import time

In [4]:
# global variables
bg = None
prev_pos = 0

In [5]:
## Initialize Selenium Browser

# https://sites.google.com/a/chromium.org/chromedriver/home
# Download ChromeDriver and extract. Then enter the fullpath here.
PATH_TO_CHROME = r"/Users/saadbazaz/Documents/FAST Studies/Digital Image Processing Lab/Project/Dino-Game-Hand-Gestures/t-rex-runner-gh-pages/chromedriver"

# The link to your Dino Game's index.html
DINO_GAME_LINK = r"/Users/saadbazaz/Documents/FAST Studies/Digital Image Processing Lab/Project/Dino-Game-Hand-Gestures/t-rex-runner-gh-pages/index.html"


browser = webdriver.Chrome(PATH_TO_CHROME)
res = browser.get('file://' + DINO_GAME_LINK)

In [6]:
## Initialize Selenium Action Chains

actions = ActionChains(browser)

## Segmentation
Separate the foreground (hand) from the background (everything else).

### Separate main background

In [7]:
#--------------------------------------------------
# To find the running average over the background
#--------------------------------------------------
def run_avg(image, aWeight):
    global bg
    # initialize the background
    if bg is None:
        bg = image.copy().astype("float")
        return

    # compute weighted average, accumulate it and update the background
    cv2.accumulateWeighted(image, bg, aWeight)

### Separate hand from segmented background

In [8]:
#---------------------------------------------
# To segment the region of hand in the image
#---------------------------------------------
def segment(image, threshold=25):
    global bg
    # find the absolute difference between background and current frame
    diff = cv2.absdiff(bg.astype("uint8"), image)

    # threshold the diff image so that we get the foreground
    thresholded = cv2.threshold(diff, threshold, 255, cv2.THRESH_BINARY)[1]

    # get the contours in the thresholded image
    (cnts, _) = cv2.findContours(thresholded.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # return None, if no contours detected
    if len(cnts) == 0:
        return
    else:
        # based on contour area, get the maximum contour which is the hand
        segmented = max(cnts, key=cv2.contourArea)
        return (thresholded, segmented)

### Test Segmentation

In [9]:
# #-----------------
# # MAIN FUNCTION
# #-----------------
# if __name__ == "__main__":
#     # initialize weight for running average
#     aWeight = 0.5

#     # get the reference to the webcam
#     camera = cv2.VideoCapture(0)

#     # region of interest (ROI) coordinates
#     top, right, bottom, left = 10, 350, 225, 590

#     # initialize num of frames
#     num_frames = 0

#     # keep looping, until interrupted
#     while(True):
#         # get the current frame
#         (grabbed, frame) = camera.read()

#         # resize the frame
#         frame = imutils.resize(frame, width=700)

#         # flip the frame so that it is not the mirror view
#         frame = cv2.flip(frame, 1)

#         # clone the frame
#         clone = frame.copy()

#         # get the height and width of the frame
#         (height, width) = frame.shape[:2]

#         # get the ROI
#         roi = frame[top:bottom, right:left]

#         # convert the roi to grayscale and blur it
#         gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
#         gray = cv2.GaussianBlur(gray, (7, 7), 0)

#         # to get the background, keep looking till a threshold is reached
#         # so that our running average model gets calibrated
#         if num_frames < 30:
#             run_avg(gray, aWeight)
#         else:
#             # segment the hand region
#             hand = segment(gray)

#             # check whether hand region is segmented
#             if hand is not None:
#                 # if yes, unpack the thresholded image and
#                 # segmented region
#                 (thresholded, segmented) = hand

#                 # draw the segmented region and display the frame
#                 cv2.drawContours(clone, [segmented + (right, top)], -1, (0, 0, 255))
#                 cv2.imshow("Thesholded", thresholded)

#         # draw the segmented hand
#         cv2.rectangle(clone, (left, top), (right, bottom), (0,255,0), 2)

#         # increment the number of frames
#         num_frames += 1

#         # display the frame with segmented hand
#         cv2.imshow("Video Feed", clone)

#         # observe the keypress by the user
#         keypress = cv2.waitKey(1) & 0xFF

#         # if the user pressed "q", then stop looping
#         if keypress == ord("q"):
#             break

# # free up memory
# camera.release()
# cv2.destroyAllWindows()

### Recognize Hands using fingers

In [10]:
#--------------------------------------------------------------
# Create a feature vector from the segmented hand region
#--------------------------------------------------------------
def create_feature_vector(thresholded, segmented):
    # find the convex hull of the segmented hand region
    chull = cv2.convexHull(segmented)

    # find the most extreme points in the convex hull
    extreme_top    = tuple(chull[chull[:, :, 1].argmin()][0])
    extreme_bottom = tuple(chull[chull[:, :, 1].argmax()][0])
    extreme_left   = tuple(chull[chull[:, :, 0].argmin()][0])
    extreme_right  = tuple(chull[chull[:, :, 0].argmax()][0])

    extreme_points = np.array([
            extreme_top,
            extreme_bottom,
            extreme_left,
            extreme_right
        ])
    
    chull_mean = np.mean(extreme_points)
    chull_std = np.std(extreme_points)

    
    
    # find the center of the palm
    cX = int((extreme_left[0] + extreme_right[0]) / 2)
    cY = int((extreme_top[1] + extreme_bottom[1]) / 2)

#     palm_center = cX, cY
    
    # find the maximum euclidean distance between the center of the palm
    # and the most extreme points of the convex hull
    distance = pairwise.euclidean_distances([(cX, cY)], Y=[extreme_left, extreme_right, extreme_top, extreme_bottom])[0]

    maximum_distance = distance[distance.argmax()]
    minimum_distance = distance[distance.argmin()]
    distances_mean = np.mean(distance)
    distances_std = np.std(distance)
    
#     print(type(distance))

    # calculate the radius of the circle with 80% of the max euclidean distance obtained
    radius = int(0.8 * maximum_distance)

    # find the circumference of the circle
    circumference = (2 * np.pi * radius)

    # take out the circular region of interest which has 
    # the palm and the fingers
    circular_roi = np.zeros(thresholded.shape[:2], dtype="uint8")

    
    # draw the circular ROI
    cv2.circle(circular_roi, (cX, cY), radius, 255, 1)

    # take bit-wise AND between thresholded hand using the circular ROI as the mask
    # which gives the cuts obtained using mask on the thresholded hand image
    circular_roi = cv2.bitwise_and(thresholded, thresholded, mask=circular_roi)

    circular_roi_mean = np.mean(circular_roi)
    circular_roi_std = np.std(circular_roi)
    
    
    # compute the contours in the circular ROI
    (cnts, _) = cv2.findContours(circular_roi.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

#     print (cnts)


    cnts_mean = [np.mean(c) for c in cnts]
    cnts_std = [np.std(c) for c in cnts]

    
    # initalize the finger count
    count = 0

    # loop through the contours found
    for c in cnts:
        # compute the bounding box of the contour
        (x, y, w, h) = cv2.boundingRect(c)

        # increment the count of fingers only if -
        # 1. The contour region is not the wrist (bottom area)
        # 2. The number of points along the contour does not exceed
        #     25% of the circumference of the circular ROI
        if ((cY + (cY * 0.25)) > (y + h)) and ((circumference * 0.25) > c.shape[0]):
            count += 1


            
#     print("chull_mean is ", type(chull_mean))
#     print("chull_std is ", type(chull_std))
#     print("maximum_distance is ", type(maximum_distance))
#     print("minimum_distance is ", type(minimum_distance))
#     print("distances_mean is ", type(distances_mean))
#     print("distances_std is ", type(distances_std))
#     print("radius is ", type(radius))
#     print("circumference is ", type(circumference))
#     print("circular_roi_mean is ", type(circular_roi_mean))
#     print("circular_roi_std is ", type(circular_roi_std))
#     print("cnts_mean is ", type(cnts_mean))
#     print("cnts_std is ", type(cnts_std))
#     print("count is ", type(count))           
            
            
    feature_vector = np.array([
        chull_mean,
        chull_std,
        maximum_distance,
        minimum_distance,
        distances_mean,
        distances_std,
        radius,
        circumference,
        circular_roi_mean,
        circular_roi_std,
        np.mean(cnts_mean),
        np.mean(cnts_std),
        count
    ], dtype=np.float32)
    return feature_vector

### Segment hand region

In [11]:
#---------------------------------------------
# To segment the region of hand in the image
#---------------------------------------------
def segment(image, threshold=25):
    global bg
    # find the absolute difference between background and current frame
    diff = cv2.absdiff(bg.astype("uint8"), image)

    # threshold the diff image so that we get the foreground
    thresholded = cv2.threshold(diff, threshold, 255, cv2.THRESH_BINARY)[1]

    # get the contours in the thresholded image
    (cnts, _) = cv2.findContours(thresholded.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # return None, if no contours detected
    if len(cnts) == 0:
        return
    else:
        # based on contour area, get the maximum contour which is the hand
        segmented = max(cnts, key=cv2.contourArea)
        return (thresholded, segmented)

In [12]:
# print("Type of Contour: " + str(type(segmented)))
# print("Contour shape: " + str(segmented.shape))
# print("First 5 points in contour: " + str(segmented[:5]))


In [13]:
# # find the convex hull of the segmented hand region
# chull = cv2.convexHull(segmented)

# print("Type of Convex hull: " + str(type(chull)))
# print("Length of Convex hull: " + str(len(chull)))
# print("Shape of Convex hull: " + str(chull.shape))

# cv2.drawContours(image, [chull], -1, (0, 255, 0), 2)
# cv2.imshow("Convex Hull", image)

In [14]:
## Train for Hand Open

In [15]:
recording = False
training_data_feature_vectors = []

if __name__ == "__main__":
    # initialize accumulated weight
    accumWeight = 0.5

    # get the reference to the webcam
    camera = cv2.VideoCapture(0)

    # region of interest (ROI) coordinates
    top, right, bottom, left = 10, 350, 225, 590

    # initialize num of frames
    num_frames = 0

    # calibration indicator
    calibrated = False

    # keep looping, until interrupted
    while(True):
        # get the current frame
        (grabbed, frame) = camera.read()

        # resize the frame
        frame = imutils.resize(frame, width=700)

        # flip the frame so that it is not the mirror view
        frame = cv2.flip(frame, 1)

        # clone the frame
        clone = frame.copy()

        # get the height and width of the frame
        (height, width) = frame.shape[:2]

        # get the ROI
        roi = frame[top:bottom, right:left]

        # convert the roi to grayscale and blur it
        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
        gray = cv2.GaussianBlur(gray, (7, 7), 0)

        # to get the background, keep looking till a threshold is reached
        # so that our weighted average model gets calibrated
        if num_frames < 30:
            run_avg(gray, accumWeight)
            if num_frames == 1:
                print("[STATUS] please wait! calibrating...")
            elif num_frames == 29:
                print("[STATUS] calibration successfull...")
        else:
            
            if recording:
                # segment the hand region
                hand = segment(gray)

                # check whether hand region is segmented
                if hand is not None:
                    # if yes, unpack the thresholded image and
                    # segmented region

                    (thresholded, segmented) = hand

    #                 print("Thresholded is: ", type(thresholded))
    #                 print("Segmented is: ", type(segmented))

#                     feature_vector = np.concatenate( (thresholded.flatten(), segmented.flatten()) )
    #                 print (hand.flatten())

    #                 print ("FV is: ", feature_vector, ", dtype is:", feature_vector.dtype, ", length is:", feature_vector.size)

                    # draw the segmented region and display the frame
                    cv2.drawContours(clone, [segmented + (right, top)], -1, (0, 0, 255))

                    # count the number of fingers
                    feature_vector = create_feature_vector(thresholded, segmented)
                    
                    training_data_feature_vectors.append(feature_vector)

                
                # show the thresholded image
                cv2.imshow("Thresholded", thresholded)

        # draw the segmented hand
        cv2.rectangle(clone, (left, top), (right, bottom), (0,255,0), 2)

        # increment the number of frames
        num_frames += 1

        # display the frame with segmented hand
        cv2.imshow("Video Feed", clone)

        # observe the keypress by the user
        keypress = cv2.waitKey(1) & 0xFF

        # if the user pressed "q", then stop looping
        if keypress == ord("q"):
            break
            
        # if the user pressed "q", then stop looping
        elif keypress == ord("r"):
            
            recording = not recording            
            
    
    # After the loop release the cap object
    vid.release()
    # Destroy all the windows
    cv2.destroyAllWindows()

[STATUS] please wait! calibrating...
[STATUS] calibration successfull...
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.n

In [None]:
training_data_feature_vectors = []

if __name__ == "__main__":
    # initialize accumulated weight
    accumWeight = 0.5

    # get the reference to the webcam
    camera = cv2.VideoCapture(0)

    # region of interest (ROI) coordinates
    top, right, bottom, left = 10, 350, 225, 590

    # initialize num of frames
    num_frames = 0

    # calibration indicator
    calibrated = False

    # keep looping, until interrupted
    while(True):
        # get the current frame
        (grabbed, frame) = camera.read()

        # resize the frame
        frame = imutils.resize(frame, width=700)

        # flip the frame so that it is not the mirror view
        frame = cv2.flip(frame, 1)

        # clone the frame
        clone = frame.copy()

        # get the height and width of the frame
        (height, width) = frame.shape[:2]

        # get the ROI
        roi = frame[top:bottom, right:left]

        # convert the roi to grayscale and blur it
        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
        gray = cv2.GaussianBlur(gray, (7, 7), 0)

        # to get the background, keep looking till a threshold is reached
        # so that our weighted average model gets calibrated
        if num_frames < 30:
            run_avg(gray, accumWeight)
            if num_frames == 1:
                print("[STATUS] please wait! calibrating...")
            elif num_frames == 29:
                print("[STATUS] calibration successfull...")
        else:
            
            if recording:
                # segment the hand region
                hand = segment(gray)

                # check whether hand region is segmented
                if hand is not None:
                    # if yes, unpack the thresholded image and
                    # segmented region

                    (thresholded, segmented) = hand

    #                 print("Thresholded is: ", type(thresholded))
    #                 print("Segmented is: ", type(segmented))

#                     feature_vector = np.concatenate( (thresholded.flatten(), segmented.flatten()) )
    #                 print (hand.flatten())

    #                 print ("FV is: ", feature_vector, ", dtype is:", feature_vector.dtype, ", length is:", feature_vector.size)

                    # draw the segmented region and display the frame
                    cv2.drawContours(clone, [segmented + (right, top)], -1, (0, 0, 255))

                    # count the number of fingers
                    feature_vector = count(thresholded, segmented)
                    
                    training_data_feature_vectors.append(feature_vector)

                
#         if results.right_hand_landmarks is not None:
#             pos_y = results.right_hand_landmarks.landmark[10].y

#             ## Draw threshold lines
#             y_pix = int(np.multiply(lower_thresh, image.shape[0]))
#             cv2.line(image, (0, y_pix), (image.shape[1],  y_pix), (0,255,0), thickness=2)
            
            
#             y_pix = int(np.multiply(upper_thresh, image.shape[0]))
#             cv2.line(image, (0, y_pix), (image.shape[1],  y_pix), (255,0,0), thickness=2)
    

                if fingers == 1:
                    # print ("Jump!")


                    if prev_pos == 1:
                        pass  
                    elif prev_pos == 2:
                        actions = ActionChains(browser)                    
                        actions.key_up(Keys.DOWN)
                        actions.send_keys(Keys.SPACE)
                        actions.perform() 
                    else:
                        actions = ActionChains(browser)
                        actions.send_keys(Keys.SPACE)
                        actions.perform()                 

                    prev_pos = 1

                elif fingers == 2:
                    # print ("Duck.")

                    if prev_pos == 1:
                        actions = ActionChains(browser)
                        actions.key_down(Keys.DOWN)
                        actions.perform()     
                    elif prev_pos == 2:
                        pass
                    else:
                        actions = ActionChains(browser)
                        actions.key_down(Keys.DOWN)
                        actions.perform()                     

                    prev_pos = 2


                else:
                    # print ("Keep running.")

                    if prev_pos == 2:
                        actions = ActionChains(browser)                    
                        actions.key_up(Keys.DOWN)
                        actions.perform() 

                    prev_pos = 0                   
                
                
                cv2.putText(clone, str(fingers), (70, 45), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2)
                
                # show the thresholded image
                cv2.imshow("Thesholded", thresholded)

        # draw the segmented hand
        cv2.rectangle(clone, (left, top), (right, bottom), (0,255,0), 2)

        # increment the number of frames
        num_frames += 1

        # display the frame with segmented hand
        cv2.imshow("Video Feed", clone)

        # observe the keypress by the user
        keypress = cv2.waitKey(1) & 0xFF

        # if the user pressed "q", then stop looping
        if keypress == ord("q"):
            break
            
        # if the user pressed "q", then stop looping
        elif keypress == ord("r"):
            recording = True            
            