# Introduction to Artificial Intelligence 

## Benjamin Frost & Sophie Chalklin 
#### December 2020

This notebook uses a webcam to classify the gesture of a hand.

A fist and the index pointing finger can be classified.

It is recommended to point your webcam to a surface with a plain background, and hold your hand directly upward.

#### Prerequisites

##### Libraries

This notebook was developed with the following library versions:

opencv-contrib-python==4.4.0.46<br />
numpy==1.19.3<br />
Keras==2.4.3<br />
tensorflow-gpu==2.4.0<br />
imutils==0.5.3

##### Model

The model HandModelV# accompanies this notebook and must be placed in the folder "Model Versions/" relative to this notebook. By default, the notebook loads in the most recent model in this folder, however during development varying levels of success were found with the models so I would recommmend trying different models to see which work best.

Also accompanying this notebooks is the notebook used to traing the CNN classifier for this project.

#### Thanks and partial credit for some data filtering code is due to BhaskarP9 from https://www.instructables.com/Opencv-Python-Hand-Detection-and-Tracking/. Code taken from this website is referenced with the @BhaskarP9 tag.

In [30]:
import cv2
import numpy as np
import keras
import imutils
import matplotlib.pyplot as plt

In [31]:
def loadModel():
    
    version = 0
    modelDir = "Model Versions/HandModelV"

    # This method always gets the most up to date model.
    
    while True:
        try:
            version = version + 1
            f = open(modelDir + str(version) + ".h5", 'r')
            f.close()
        except:
            break
            
    model = keras.models.load_model(modelDir + str(version-1) + ".h5")
    
    print("Using model " + modelDir + str(version-1) + ".h5")
    
    return model

model = loadModel()

Using model Model Versions/HandModelV5.h5


In [32]:
cv2.setUseOptimized(True);
cv2.setNumThreads(4);

version = 1
dir = "../../../../HandsData/OpenCVHandsData/None/none"

def saveImage(handBox):

    # This simple function saves the images to be processed at a later date by the image classifier.
    
    global version
    
    while True:
        try:
            f = open(dir + str(version) + ".jpg", 'r')
            f.close()
            version = version + 1
        except:
            break

    try:
        cv2.imwrite(dir + str(version) + ".jpg", handBox)
    except:
        pass
    

In [33]:
sizeX = 60
sizeY = 100

def resize(image):
    
    # Since size of the box around the hand can vary each frame by a great deal, 
    # this function ensures that image sizes are standardised to 60 by 100.
    # This is important since the classifier needs all input images to be the same size.
    
    
    # Converting from the old BGR to RGB
    
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    h, w, ch = image.shape
    
    
    #Creating the base of the new standardised image
    
    zeros = np.zeros((sizeY, sizeX, 3))
    
    tempImage = image.copy()
    
    
    # Resize the image to be just as tall as the standardised dimensions.
    
    image = imutils.resize(tempImage, height=sizeY)
    
    
    # If this resizing process results in an image that has a width greater than 60,
    # instead resize the image to be just as wide as the standardised dimensions.
    
    if image.shape[1] > sizeX:
        image = imutils.resize(tempImage, width=sizeX)
    
    # One of these resizing options will result in an image that either has a black 
    # portion at the top or at the right hand side.
    
    zeros[:image.shape[0], :image.shape[1]] = image
    
    return zeros

In [34]:
outputText = ""
combinedOutput = []

def classifyHand(handBox):
    
    # Acceses the globally declared variables
    
    global outputText, combinedOutput
    
    # Sometimes this resizing method fails, and I cannot explain it.
    # It only fails once out of every 100 or 200 frames, so it's easy
    # to hide the error and return last frame's results.
    
    try :
        handBox = resize(handBox)
    except:
        print("Resizing error") 
        return outputText, combinedOutput
    
    handBox = np.array(handBox)    
        
    # Using the resized image to predict the gesture of the hand.
    
    pred_hot = model.predict(np.expand_dims(handBox, axis=0))[0]

    
    if pred_hot[0] > pred_hot[1]:
        outputText = "Index "
    else:
        outputText = "Fist "
    
    
    combinedOutput = ["Fist: " + str(pred_hot[1]), "Index " + str(pred_hot[0])]
    
    return outputText, combinedOutput

In [35]:
def removeBackground(frame):
    
    # @BhaskarP9
    
    # Separate the foreground and the background in the webcam image.
    
    background = cv2.createBackgroundSubtractorMOG2(0,50)

    kernel = np.ones((3,3), np.uint8)

    bgMask = background.apply(frame)
    bgMask = cv2.erode(bgMask, kernel, iterations=1)

    return cv2.bitwise_and(frame, frame, mask = bgMask)
    

In [36]:
def captureAndCleanData():
    
    # @BhaskarP9
    
    # Getting the current frame from the webcam
    
    ret, frame = camera.read()

    # In early tests, without this filtering the video is far less smooth and tracking is worse.
    
    #frame = cv2.bilateralFilter(frame, 10, 65, 120)  # Smoothing
    mask = removeBackground(frame)

    # Converting the masked image to HSV to be able to separate skin tones from the background.
    
    hsv = cv2.cvtColor(mask, cv2.COLOR_BGR2HSV)
    
    lower = np.array([0,48,80], dtype="uint8")
    upper = np.array([20,255,255], dtype="uint8")
    
    #This mask only keeps the data within the bounds defined above.
    
    skinMask = cv2.inRange(hsv, lower, upper)
    
    # Find contours in the image. This creates a continuous line around the hand.
    
    contours, h = cv2.findContours(skinMask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    
    return frame, contours

In [37]:
camera = cv2.VideoCapture(0)

while (True):

    # Exit key (If you don't press this to exit then the program closes by crashing)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    frame, contours = captureAndCleanData()
    
    if len(contours) > 0:
        
        # Finding the largest area contour, which is most likely the hand.
        
        areas = []
        for contour in contours:
            areas.append(cv2.contourArea(contour))
        maxCon = max(areas)
        res = contours[areas.index(maxCon)]
    
    
        # Finding the rectangle that bounds the contour.
        
        x, y, w, h = cv2.boundingRect(res)
               
        
        try:
            
            # Adding padding around the box of interest. This will fail if it is too close to the edge.
            
            handBox = frame[int(y-h/4):int(y+h*1.25), int(x-w/4):int(x+w*1.25)]
            
            
        except:
            
            # If the hand is at the edge of the screen no padding is applied
            
            handBox = frame[y:y+h, x:x+w]
            
        
        try:
            
            # If the user's hand is too close to the screen, 
            # the box around the hand that was generated by 
            # OpenCV will be taller than the frame, which causes this to fail.
            
            cv2.imshow("Box", handBox)
            
        except:
            
            print("Too close")
        
        
        # Adding the box around the hand
        
        cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
                
            
        # saveImage() is used for collecting images for training the classifier. 
        # We only ever want to be either saving the images or classifying them directly.
        
        save = False
        
        if save:
            
            saveImage(handBox)
            
        else:
            
            # Two outputs from the classifier function - a verbose version and a long version.
            
            outputText, combinedOutput = classifyHand(handBox)

            
            # Printing the results of the classifier in the top left corner.
            # This is looped so that more classes can be added to the model in the future.
            
            spacing = 25
            for i in range(len(combinedOutput)):
                cv2.putText(frame, combinedOutput[i], (10, 25 + (i * spacing)), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 255, 0), 2, cv2.LINE_AA)

                
            # Adding the label on the hand itself.
            
            cv2.putText(frame, outputText, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 255, 0), 2, cv2.LINE_AA)
        
        
    cv2.imshow("Output", frame)
        
    
camera.release()
cv2.destroyAllWindows()

