In [3]:
# import the necessary libraries
import cv2
import numpy as np
import matplotlib.pyplot as plt
from math import hypot
import pyautogui
import dlib

###### Step 1: Face Detection

In [4]:
#path to the weights file
model_weights = './res10_300x300_ssd_iter_140000.caffemodel'

# path to architecture
model_arch = './deploy.prototxt.txt'

#load the caffe model
net = cv2.dnn.readNetFromCaffe(model_arch,model_weights)

In [8]:
def face_detector(image, threshold=0.7):
    # get the height, width of the image
    h,w = image.shape[:2]
    # Apply mean substraction and create 4D blob from the image
    blob = cv2.dnn.blobFromImage(image,1.0,(300,300),(104.0,177.0,123.0))
    # set the new input value for the network
    net.setInput(blob)
    # run foward path on the input to get the output
    faces = net.forward()
    # get all the confidence value for all detected faces
    prediction_scores = faces[:,:,:,2]
    # get the index of the prediction with highest confidence
    i = np.argmax(prediction_scores)
    # get the face with the highest confidence
    face = faces[0,0,i]
    # extract the confidence
    confidence = face[2]
    # if confidence value is greater than the threshold
    if confidence> threshold:
        # The 4 values at indexes 3-6 are the top-left bottom-right co-ordinates
        # scales to range 0-1. The original coordinates can be found by
        # multiplying x,y values with the width, height of the  image
        box = face[3:7]*np.array([w, h, w, h])
        
        # the coordinates are the pixel numbers relative to the top left
        # corner of the image therfore needs be quantized to int type
        (x,y,x1,y1) = box.astype("int")
        # draw the bounding box around the face
        ted_frame = cv2.rectangle(image.copy(),(x,y),(x1,y1),(0, 255, 255), 2)
        output =(ted_frame,(x,y,x1,y1),True,confidence)
    else:
        output =(image,(),False,0)
    return output

In [11]:
# test the face detector
# get the video feed from the webcam
cap = cv2.VideoCapture(0)

# set the window to a normal one so we can adjust it
cv2.namedWindow('face detection', cv2.WINDOW_NORMAL)

while(True):
    # read the frame
    ret, frame = cap.read()
    
    # break if frame is not returned
    if not ret:
        break
    
    # flip the frame horizontally
#     frame = cv2.flip(frame, 1)
    
    # detect face in the frame
    annotated_frame , coords, status, conf = face_detector(frame)
    
    # display the frame
    cv2.imshow('face detection', annotated_frame)
    # break the loop if 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
# when everything is done, release the capture and destroy the window
cap.release()
cv2.destroyAllWindows()

###### Step 2: Landmarks Detection

In [12]:
predictor = dlib.shape_predictor('./shape_predictor_68_face_landmarks.dat')

In [13]:
def detect_landmarks(box,image):
    # for faster results convert the image to gray-scale
    gray_scale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # get the coordinates
    (x,y,x1,y1) = box
    
    # perform the detection
    shape = predictor(gray_scale, dlib.rectangle(x,y,x1,y1))
    
    # get the numpy array containing the coordinates of the landmarks
    landmarks = shape_to_np(shape)
    
    # draw the landmarks with circles
    for (x,y) in landmarks:
        annoted_image = cv2.circle(image,(x,y),2,(0, 127, 255), -1)
    
    return annoted_image, landmarks

The helper function below converts the shape object returned by the predictor function into a more convenient NumPy array. So the helper function below is being used by the landmark function we created above.

In [14]:
def shape_to_np(shape):
    # create an array of shape(68, 2) for storing the landmark coordinates
    landmarks = np.zeros((68, 2), dtype='int')
    
    #write the x,y coordinates of each landmark into the array 
    for i in range(0, 60):
        landmarks[i] = (shape.part(i).x, shape.part(i).y)
    
    return landmarks

In [26]:
# test the detect_landmark function with realtime feed
# get the video feed from webcam
cap = cv2.VideoCapture(0)

# set the window to a normal one so we can adjust it
cv2.namedWindow('landmark', cv2.WINDOW_NORMAL)

while(True):
    # read the frames
    ret,frame = cap.read()
    
    # break if frame is not returned
    if not ret:
        break
    
    #flip the frame horizontally
    frame = cv2.flip( frame, 1)
    
    # detect the face
    face_image, box_cord, status, conf = face_detector(frame)
    
    if status:
        # get the landmarks for the face region in the frame
        lm_image,landmarks = detect_landmarks(box_cord,frame)
    
    # display the frame
    cv2.imshow('landmark', lm_image)
    
    # break the loop if 'q' key pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

#when done, release the capture and destroy the window
cap.release()
cv2.destroyAllWindows()

###### Step 3: Jump Control mechanism

In [32]:
def is_mouth_open(landmarks, ar_threshold =0.7):
    # calculate the euclidean distance labelled as A,B,C
    A = hypot(landmarks[50][0] - landmarks[58][0],landmarks[50][1] - landmarks[58][1])
    B = hypot(landmarks[52][0] - landmarks[56][0],landmarks[52][1] - landmarks[56][1])
    C = hypot(landmarks[48][0] - landmarks[54][0],landmarks[48][1] - landmarks[54][1])
    
    # calculate the mouth aspect ratio ,The value of vertical distance A,B is averaged
    MAR = (A+B) / (2.0 * C)
    
    # return true if the value is greater than the threshold
    if MAR > ar_threshold:
        return True, MAR
    else:
        return False, MAR

In [28]:
# test the mouth funtion
cap = cv2.VideoCapture(0)
# cv2.namedWindow('mouth', cv2.WINDOW_NORMAL)
while(True):
    ret, frame = cap.read()
    if not ret:
        break
    frame = cv2.flip(frame, 1)
    face_image, box_coords, status, conf = face_detector(frame)
    if status:
        l_image,landmarks = detect_landmarks(box_coords,frame)
        mouth_status,_ = is_mouth_open(landmarks,0.55)
        cv2.putText(frame,'Is mouth open: {}'.format(mouth_status),(20,20),cv2.FONT_HERSHEY_COMPLEX,0.65,(0, 127, 255), 2)
    cv2.imshow('mouth',frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

###### Step 4: Crouch Control Mechanism

In [46]:
def face_proximity(box,image, proximity_threshold = 325):
    
    # Get the height and width of the face bounding box
    face_width =  box[2]-box[0]
    face_height = box[3]-box[1]
    
    # Draw rectangle to guide the user 
    # Calculate the angle of diagonal using face width, height 
    theta = np.arctan(face_height/face_width)
     
    # Use the angle to calculate height, width of the guide rectangle
    guide_height = np.sin(theta)*proximity_threshold
    guide_width  = np.cos(theta)*proximity_threshold
    
    # Calculate the mid-point of the guide rectangle/face bounding box
    mid_x,mid_y = (box[2]+box[0])/2 , (box[3]+box[1])/2
    
    # Calculate the coordinates of top-left and bottom-right corners
    guide_topleft = int(mid_x-(guide_width/2)), int(mid_y-(guide_height/2))
    guide_bottomright = int(mid_x +(guide_width/2)), int(mid_y + (guide_height/2))
    
    # Draw the guide rectangle
    cv2.rectangle(image, guide_topleft, guide_bottomright, (0, 255, 255), 2)
    
    # Calculate the diagonal distance of the bounding box
    diagonal = hypot(face_width, face_height)
    
    # Return True if distance greater than the threshold
    if diagonal > proximity_threshold:
        return True, diagonal
    else:
        return False, diagonal

In [47]:
# Get the video feed from webcam
cap = cv2.VideoCapture(0)
cv2.namedWindow('Face proximity', cv2.WINDOW_NORMAL)
while(True):
    ret, frame = cap.read()
    if not ret:
        break
    frame = cv2.flip( frame, 1 )
    face_image, box_coords, status, conf = face_detector(frame)
    if status:
        is_face_close,_ = face_proximity(box_coords, face_image, proximity_threshold = 325)
        cv2.putText(face_image,'Is Face Close: {}'.format(is_face_close),
                    (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.65, (0, 127, 255),2)
    cv2.imshow('Face proximity',face_image)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()


In [34]:
cap = cv2.VideoCapture(0)
while(True):
    ret, frame = cap.read()
    if not ret:
        break
    frame = cv2.flip(frame, 1)
    face_image, box_cords, status, conf = face_detector(frame)
    if status:
        lam_image,landmarks = detect_landmarks(box_cords,frame)
        _,mouth_ar = is_mouth_open(landmarks)
        _, prox = face_proximity(box_cords, face_image)
        # calculate the threshold values
        ar_threshold = mouth_ar* 1.4
        prox_threshold = prox*1.3
        cv2.putText(frame,'Aspect Ratio threshold: {:.2f}'.format(ar_threshold),(20,20),cv2.FONT_HERSHEY_COMPLEX,0.65,(0, 127, 255),2)
        cv2.putText(frame,'Aspect Ratio threshold: {:.2f}'.format(prox_threshold),(20,50),cv2.FONT_HERSHEY_COMPLEX,0.65,(0, 127, 255),2)
        cv2.imshow('cal',frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
cap.release()
cv2.destroyAllWindows()

###### Step 6: Automate the process

In [35]:
# this will open a context menu
pyautogui.click(button='right')

In [36]:
# press space bar. this will scroll down the page in some browsers
pyautogui.press('space')

In [50]:
# Get the video feed from webcam
cap = cv2.VideoCapture(0)
cv2.namedWindow('Dino with OpenCV', cv2.WINDOW_NORMAL)
# By default each key press is followed by a 0.1 second pause
pyautogui.PAUSE = 0.0
# The fail-safe triggers an exception in case mouse is moved to corner of the screen
#pyautogui.FAILSAFE = False
while(True):
    ret, frame = cap.read()
    if not ret:
        break
    frame = cv2.flip( frame, 1 )
    face_image, box_coords, status, conf = face_detector(frame)
    if status:
        # Detect landmarks if a face is found
        landmark_image, landmarks = detect_landmarks(box_coords, frame)
        # Check if mouth is open
        is_open,_ = is_mouth_open(landmarks, ar_threshold)
        # If the mouth is open trigger space key Down event to jump
        if is_open:
            pyautogui.keyDown('space')
            mouth_status = 'Open'
        else:
            # Else the space key is Up
            pyautogui.keyUp('space')
            mouth_status = 'Closed'
        # Display the mouth status on the frame
        cv2.putText(frame,'Mouth: {}'.format(mouth_status),
                    (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.65, (0, 127, 255),2)
        # Check the proximity of the face
        is_closer,_  = face_proximity(box_coords, frame)
        if is_closer:
            pyautogui.keyDown('down')
        else:
            pyautogui.keyUp('down')
    cv2.imshow('Dino with OpenCV',frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()
