# Dino Game using OpenCV SVM for gesture recognition

    Saad Bazaz  - i180621
    Abraar Raza - i180544

Our own attempt of self-made gesture recognition algorithm, inspired by the following guide:
    1. https://gogul.dev/software/hand-gesture-recognition-p1
    2. https://gogul.dev/software/hand-gesture-recognition-p2

## Installations

In [1]:
# !pip install imutils

## Imports

In [4]:
import cv2
import imutils
import numpy as np
from sklearn.metrics import pairwise

import random

In [5]:
# For Game Emulation
from selenium import webdriver
from selenium.webdriver.common.by import By

from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys

import time

## Initialize

In [6]:
## Initialize# global variables
bg = None
prev_pos = 0

## Segmentation
Separate the foreground (hand) from the background (everything else).

### Separate main background

In [7]:
#--------------------------------------------------
# To find the running average over the background
#--------------------------------------------------
def run_avg(image, aWeight):
    global bg
    # initialize the background
    if bg is None:
        bg = image.copy().astype("float")
        return

    # compute weighted average, accumulate it and update the background
    cv2.accumulateWeighted(image, bg, aWeight)

### Separate hand from segmented background

In [8]:
#---------------------------------------------
# To segment the region of hand in the image
#---------------------------------------------
def segment(image, threshold=25):
    global bg
    # find the absolute difference between background and current frame
    diff = cv2.absdiff(bg.astype("uint8"), image)

    # threshold the diff image so that we get the foreground
    thresholded = cv2.threshold(diff, threshold, 255, cv2.THRESH_BINARY)[1]

    # get the contours in the thresholded image
    (cnts, _) = cv2.findContours(thresholded.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # return None, if no contours detected
    if len(cnts) == 0:
        return
    else:
        # based on contour area, get the maximum contour which is the hand
        segmented = max(cnts, key=cv2.contourArea)
        return (thresholded, segmented)

### Recognize Hands using fingers

In [9]:
#--------------------------------------------------------------
# Create a feature vector from the segmented hand region
#--------------------------------------------------------------
def create_feature_vector(thresholded, segmented):
    # find the convex hull of the segmented hand region
    chull = cv2.convexHull(segmented)

    # find the most extreme points in the convex hull
    extreme_top    = tuple(chull[chull[:, :, 1].argmin()][0])
    extreme_bottom = tuple(chull[chull[:, :, 1].argmax()][0])
    extreme_left   = tuple(chull[chull[:, :, 0].argmin()][0])
    extreme_right  = tuple(chull[chull[:, :, 0].argmax()][0])

    extreme_points = np.array([
            extreme_top,
            extreme_bottom,
            extreme_left,
            extreme_right
        ])
    
    chull_mean = np.mean(extreme_points)
    chull_std = np.std(extreme_points)    
    
    # find the center of the palm
    cX = int((extreme_left[0] + extreme_right[0]) / 2)
    cY = int((extreme_top[1] + extreme_bottom[1]) / 2)
    
    # find the maximum euclidean distance between the center of the palm
    # and the most extreme points of the convex hull
    distance = pairwise.euclidean_distances([(cX, cY)], Y=[extreme_left, extreme_right, extreme_top, extreme_bottom])[0]

    maximum_distance = distance[distance.argmax()]
    minimum_distance = distance[distance.argmin()]
    distances_mean = np.mean(distance)
    distances_std = np.std(distance)
    
    # calculate the radius of the circle with 80% of the max euclidean distance obtained
    radius = int(0.8 * maximum_distance)

    # find the circumference of the circle
    circumference = (2 * np.pi * radius)

    # take out the circular region of interest which has 
    # the palm and the fingers
    circular_roi = np.zeros(thresholded.shape[:2], dtype="uint8")

    
    # draw the circular ROI
    cv2.circle(circular_roi, (cX, cY), radius, 255, 1)

    # take bit-wise AND between thresholded hand using the circular ROI as the mask
    # which gives the cuts obtained using mask on the thresholded hand image
    circular_roi = cv2.bitwise_and(thresholded, thresholded, mask=circular_roi)

    circular_roi_mean = np.mean(circular_roi)
    circular_roi_std = np.std(circular_roi)
    
    
    # compute the contours in the circular ROI
    (cnts, _) = cv2.findContours(circular_roi.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

    cnts_mean = [np.mean(c) for c in cnts]
    cnts_std = [np.std(c) for c in cnts]

    
    # initalize the finger count
    count = 0

    # loop through the contours found
    for c in cnts:
        # compute the bounding box of the contour
        (x, y, w, h) = cv2.boundingRect(c)

        # increment the count of fingers only if -
        # 1. The contour region is not the wrist (bottom area)
        # 2. The number of points along the contour does not exceed
        #     25% of the circumference of the circular ROI
        if ((cY + (cY * 0.25)) > (y + h)) and ((circumference * 0.25) > c.shape[0]):
            count += 1


            
#     print("chull_mean is ", type(chull_mean))
#     print("chull_std is ", type(chull_std))
#     print("maximum_distance is ", type(maximum_distance))
#     print("minimum_distance is ", type(minimum_distance))
#     print("distances_mean is ", type(distances_mean))
#     print("distances_std is ", type(distances_std))
#     print("radius is ", type(radius))
#     print("circumference is ", type(circumference))
#     print("circular_roi_mean is ", type(circular_roi_mean))
#     print("circular_roi_std is ", type(circular_roi_std))
#     print("cnts_mean is ", type(cnts_mean))
#     print("cnts_std is ", type(cnts_std))
#     print("count is ", type(count))           
            
            
    feature_vector = np.array([
        chull_mean,
        chull_std,
        maximum_distance,
        minimum_distance,
        distances_mean,
        distances_std,
        radius,
        circumference,
        circular_roi_mean,
        circular_roi_std,
        np.mean(cnts_mean),
        np.mean(cnts_std),
        count
    ], dtype=np.float32)
    return feature_vector

### Segment hand region

In [10]:
#---------------------------------------------
# To segment the region of hand in the image
#---------------------------------------------
def segment(image, threshold=25):
    global bg
    # find the absolute difference between background and current frame
    diff = cv2.absdiff(bg.astype("uint8"), image)

    # threshold the diff image so that we get the foreground
    thresholded = cv2.threshold(diff, threshold, 255, cv2.THRESH_BINARY)[1]

    # get the contours in the thresholded image
    (cnts, _) = cv2.findContours(thresholded.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # return None, if no contours detected
    if len(cnts) == 0:
        return
    else:
        # based on contour area, get the maximum contour which is the hand
        segmented = max(cnts, key=cv2.contourArea)
        return (thresholded, segmented)

## Train for Hand Open

In [11]:
recording = False
training_data_feature_vectors__hand_open = []

if __name__ == "__main__":
    # initialize accumulated weight
    accumWeight = 0.5

    # get the reference to the webcam
    camera = cv2.VideoCapture(0)


    # region of interest (ROI) coordinates
    top, right, bottom, left = 10, 350, 225, 590

    # initialize num of frames
    num_frames = 0

    # calibration indicator
    calibrated = False

    # keep looping, until interrupted
    while(True):
        # get the current frame
        (grabbed, frame) = camera.read()

        if not grabbed:
            break
        # resize the frame
        frame = imutils.resize(frame, width=700)

        # flip the frame so that it is not the mirror view
        frame = cv2.flip(frame, 1)

        # clone the frame
        clone = frame.copy()

        # get the height and width of the frame
        (height, width) = frame.shape[:2]

        # get the ROI
        roi = frame[top:bottom, right:left]

        # convert the roi to grayscale and blur it
        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
        gray = cv2.GaussianBlur(gray, (7, 7), 0)

        # to get the background, keep looking till a threshold is reached
        # so that our weighted average model gets calibrated
        if num_frames < 30:
            run_avg(gray, accumWeight)
            if num_frames == 1:
                print("[STATUS] Please wait, calibrating...")
            elif num_frames == 29:
                print("[STATUS] Calibration successful! You can now put your hand in the box.")
        else:
            
            if recording:
                # segment the hand region
                hand = segment(gray)

                # check whether hand region is segmented
                if hand is not None:
                    # if yes, unpack the thresholded image and
                    # segmented region

                    (thresholded, segmented) = hand

                    # draw the segmented region and display the frame
                    cv2.drawContours(clone, [segmented + (right, top)], -1, (0, 0, 255))

                    # count the number of fingers
                    feature_vector = create_feature_vector(thresholded, segmented)
                    
                    training_data_feature_vectors__hand_open.append(feature_vector)

                
                # show the thresholded image
                cv2.imshow("Thresholded", thresholded)

        # draw the segmented hand
        cv2.rectangle(clone, (left, top), (right, bottom), (0,255,0), 2)

        # increment the number of frames
        num_frames += 1

        # display the frame with segmented hand
        cv2.imshow("Video Feed", clone)

        # observe the keypress by the user
        keypress = cv2.waitKey(1) & 0xFF

        # if the user pressed "q", then stop looping
        if keypress == ord("q"):
            break
            
        # if the user pressed "q", then stop looping
        elif keypress == ord("r"):
            recording = not recording            
            
    
    # After the loop release the cap object
    camera.release()
    # Destroy all the windows
    cv2.destroyAllWindows()
    for i in range (1,5):
        cv2.waitKey(1)

[STATUS] Please wait, calibrating...
[STATUS] Calibration successful! You can now put your hand in the box.


In [12]:
print(training_data_feature_vectors__hand_open)
print([a.size for a in training_data_feature_vectors__hand_open])

[array([102.625     ,  52.595005  ,  83.74366   ,  63.134777  ,
        75.3123    ,   8.264813  ,  66.        , 414.69022   ,
         0.46453488,  10.87385   , 100.80161   ,  26.781635  ,
         4.        ], dtype=float32), array([102.25      ,  52.670555  ,  84.17244   ,  61.77378   ,
        75.57524   ,   8.768315  ,  67.        , 420.97342   ,
         0.46947673,  10.93143   , 100.31853   ,  27.210442  ,
         4.        ], dtype=float32), array([102.25      ,  52.615944  ,  83.95237   ,  61.77378   ,
        75.35298   ,   8.742329  ,  67.        , 420.97342   ,
         0.45959303,  10.81596   , 100.18039   ,  27.137466  ,
         4.        ], dtype=float32), array([102.125     ,  52.56291   ,  83.546394  ,  61.77378   ,
        75.08614   ,   8.678797  ,  66.        , 414.69022   ,
         0.46453488,  10.87385   , 100.67253   ,  26.53238   ,
         4.        ], dtype=float32), array([102.125    ,  52.56291  ,  83.546394 ,  61.77378  ,  75.08614  ,
         8.678797 ,

## Training for Hand closed

In [13]:
recording = False
training_data_feature_vectors__hand_closed = []

if __name__ == "__main__":
    # initialize accumulated weight
    accumWeight = 0.5

    # get the reference to the webcam
    camera = cv2.VideoCapture(0)

    # region of interest (ROI) coordinates
    top, right, bottom, left = 10, 350, 225, 590

    # initialize num of frames
    num_frames = 0

    # calibration indicator
    calibrated = False

    # keep looping, until interrupted
    while(True):
        # get the current frame
        (grabbed, frame) = camera.read()

        if not grabbed:
            break
        
        # resize the frame
        frame = imutils.resize(frame, width=700)

        # flip the frame so that it is not the mirror view
        frame = cv2.flip(frame, 1)

        # clone the frame
        clone = frame.copy()

        # get the height and width of the frame
        (height, width) = frame.shape[:2]

        # get the ROI
        roi = frame[top:bottom, right:left]

        # convert the roi to grayscale and blur it
        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
        gray = cv2.GaussianBlur(gray, (7, 7), 0)

        # to get the background, keep looking till a threshold is reached
        # so that our weighted average model gets calibrated
        if num_frames < 30:
            run_avg(gray, accumWeight)
            if num_frames == 1:
                print("[STATUS] Please wait, calibrating...")
            elif num_frames == 29:
                print("[STATUS] Calibration successful! You can now put your hand in the box.")
        else:
            
            if recording:
                # segment the hand region
                hand = segment(gray)

                # check whether hand region is segmented
                if hand is not None:
                    # if yes, unpack the thresholded image and
                    # segmented region

                    (thresholded, segmented) = hand

                    # draw the segmented region and display the frame
                    cv2.drawContours(clone, [segmented + (right, top)], -1, (0, 0, 255))

                    # create the feature vector from the hand
                    feature_vector = create_feature_vector(thresholded, segmented)
                    
                    # add the feature vector to a list
                    training_data_feature_vectors__hand_closed.append(feature_vector)

                
                # show the thresholded image
                cv2.imshow("Thresholded", thresholded)

        # draw the segmented hand
        cv2.rectangle(clone, (left, top), (right, bottom), (0,255,0), 2)

        # increment the number of frames
        num_frames += 1

        # display the frame with segmented hand
        cv2.imshow("Video Feed", clone)

        # observe the keypress by the user
        keypress = cv2.waitKey(1) & 0xFF

        # if the user pressed "q", then stop looping
        if keypress == ord("q"):
            break
            
        # if the user pressed "r", then start recording
        elif keypress == ord("r"):            
            recording = not recording            
            
    
    # After the loop release the cap object
    camera.release()
    # Destroy all the windows
    cv2.destroyAllWindows()
    for i in range (1,5):
        cv2.waitKey(1)    

[STATUS] Please wait, calibrating...
[STATUS] Calibration successful! You can now put your hand in the box.


In [14]:
print(training_data_feature_vectors__hand_closed)
print([a.shape for a in training_data_feature_vectors__hand_closed])

[array([101.5     ,  70.25489 ,  89.04493 ,  70.035706,  78.59262 ,
         6.797034,  71.      , 446.10617 ,   0.6375  ,  12.734053,
        95.76138 ,  48.79127 ,   1.      ], dtype=float32), array([102.        ,  70.098145  ,  89.04493   ,  70.00714   ,
        78.70826   ,   6.7830515 ,  71.        , 446.10617   ,
         0.64244187,  12.78319   ,  95.94552   ,  48.897423  ,
         1.        ], dtype=float32), array([102.25      ,  70.02991   ,  89.587944  ,  70.00714   ,
        78.77512   ,   6.9985723 ,  71.        , 446.10617   ,
         0.64738375,  12.832137  ,  96.27135   ,  48.698803  ,
         1.        ], dtype=float32), array([102.25     ,  69.99598  ,  89.587944 ,  70.724815 ,  79.020454 ,
         6.7651696,  71.       , 446.10617  ,   0.6523256,  12.880897 ,
        96.45783  ,  48.812176 ,   0.       ], dtype=float32), array([102.       ,  70.09101  ,  90.426765 ,  70.724815 ,  79.471016 ,
         7.0609765,  72.       , 452.38934  ,   0.6078488,  12.43511  ,


## Create SVM Model

In [15]:
svm = cv2.ml.SVM_create()
svm.setType(cv2.ml.SVM_C_SVC)
svm.setKernel(cv2.ml.SVM_LINEAR)
svm.setTermCriteria((cv2.TERM_CRITERIA_MAX_ITER, 100, 1e-6))

## Train SVM Model

In [16]:
trainingData = np.array(training_data_feature_vectors__hand_open + training_data_feature_vectors__hand_closed)
labels = np.array(([1] * len(training_data_feature_vectors__hand_open) ) + ([2] * len(training_data_feature_vectors__hand_closed) ) )

# Fix the labels!
labels = np.reshape(labels, (labels.size,1))

print (trainingData.shape)
print (labels.shape)

# Train the SVM using the training feature vectors
svm.train(np.array(trainingData), cv2.ml.ROW_SAMPLE, np.array(labels))

(1318, 13)
(1318, 1)


True

## Randomly test SVM

In [17]:
num = random.randint(0, trainingData.shape[0]-1)

print ("Training Data index #", num, " is ", trainingData[num])
test_data = np.reshape(trainingData[num], (trainingData[num].size,1))

print (test_data.T.shape)
svm.predict(test_data.T)

Training Data index # 1006  is  [105.5        77.012985   98.4124     59.39697    85.544365   15.5454645
  78.        490.08844     0.6968023  13.311614   82.88802    46.413136
   1.       ]
(1, 13)


(0.0, array([[2.]], dtype=float32))

## Initialize

In [18]:
## Initialize Selenium Browser

# https://sites.google.com/a/chromium.org/chromedriver/home
# Download ChromeDriver and extract. Then enter the fullpath here.
PATH_TO_CHROME = r"/Users/saadbazaz/Documents/FAST Studies/Digital Image Processing Lab/Project/Dino-Game-Hand-Gestures/t-rex-runner-gh-pages/chromedriver"

# The link to your Dino Game's index.html
DINO_GAME_LINK = r"/Users/saadbazaz/Documents/FAST Studies/Digital Image Processing Lab/Project/Dino-Game-Hand-Gestures/t-rex-runner-gh-pages/index.html"


browser = webdriver.Chrome(PATH_TO_CHROME)
res = browser.get('file://' + DINO_GAME_LINK)

In [19]:
## Initialize Selenium Action Chains

actions = ActionChains(browser)

## Run game (Predict hand gesture using SVM)

In [20]:

if __name__ == "__main__":
    # initialize accumulated weight
    accumWeight = 0.5

    # get the reference to the webcam
    camera = cv2.VideoCapture(0)

    # region of interest (ROI) coordinates
    top, right, bottom, left = 10, 350, 225, 590

    # initialize num of frames
    num_frames = 0

    # calibration indicator
    calibrated = False

    # keep looping, until interrupted
    while(True):
        # get the current frame
        (grabbed, frame) = camera.read()

        # resize the frame
        frame = imutils.resize(frame, width=700)

        # flip the frame so that it is not the mirror view
        frame = cv2.flip(frame, 1)

        # clone the frame
        clone = frame.copy()

        # get the height and width of the frame
        (height, width) = frame.shape[:2]

        # get the ROI
        roi = frame[top:bottom, right:left]

        # convert the roi to grayscale and blur it
        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
        gray = cv2.GaussianBlur(gray, (7, 7), 0)

        # to get the background, keep looking till a threshold is reached
        # so that our weighted average model gets calibrated
        if num_frames < 30:
            run_avg(gray, accumWeight)
            if num_frames == 1:
                print("[STATUS] Please wait, calibrating...")
            elif num_frames == 29:
                print("[STATUS] Calibration successful! You can now put your hand in the box.")
        else:
            
            # segment the hand region
            hand = segment(gray)

            # check whether hand region is segmented
            if hand is not None:
                # if yes, unpack the thresholded image and
                # segmented region

                (thresholded, segmented) = hand

                # draw the segmented region and display the frame
                cv2.drawContours(clone, [segmented + (right, top)], -1, (0, 0, 255))

                # count the number of fingers
                feature_vector = create_feature_vector(thresholded, segmented)
                
                # Fix the shape of the feature vector
                feature_vector = np.reshape(feature_vector, (feature_vector.size,1))
                
                fingers = svm.predict(feature_vector.T)[1][0][0]
                
    

                if fingers == 1:
                    # print ("Jump!")


                    if prev_pos == 1:
                        pass  
                    elif prev_pos == 2:
                        actions = ActionChains(browser)                    
                        actions.key_up(Keys.DOWN)
                        actions.send_keys(Keys.SPACE)
                        actions.perform() 
                    else:
                        actions = ActionChains(browser)
                        actions.send_keys(Keys.SPACE)
                        actions.perform()                 

                    prev_pos = 1

                elif fingers == 2:
                    # print ("Duck.")

                    if prev_pos == 1:
                        actions = ActionChains(browser)
                        actions.key_down(Keys.DOWN)
                        actions.perform()     
                    elif prev_pos == 2:
                        pass
                    else:
                        actions = ActionChains(browser)
                        actions.key_down(Keys.DOWN)
                        actions.perform()                     

                    prev_pos = 2


                else:
                    # print ("Keep running.")

                    if prev_pos == 2:
                        actions = ActionChains(browser)                    
                        actions.key_up(Keys.DOWN)
                        actions.perform() 

                    prev_pos = 0                   
                
                
                cv2.putText(clone, str(fingers), (70, 45), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2)
                
                # show the thresholded image
                cv2.imshow("Thesholded", thresholded)

        # draw the segmented hand
        cv2.rectangle(clone, (left, top), (right, bottom), (0,255,0), 2)

        # increment the number of frames
        num_frames += 1

        # display the frame with segmented hand
        cv2.imshow("Video Feed", clone)

        # observe the keypress by the user
        keypress = cv2.waitKey(1) & 0xFF

        # if the user pressed "q", then stop looping
        if keypress == ord("q"):
            break
    
    # After the loop release the cap object
    camera.release()
    # Destroy all the windows
    cv2.destroyAllWindows()
    for i in range (1,5):
        cv2.waitKey(1)    

[STATUS] Please wait, calibrating...
[STATUS] Calibration successful! You can now put your hand in the box.
