## Gesture detection

Be sure to be in the root directory of the project before running the following code cells.  

## Model loading

In [1]:
# Import all the required libraries
import os
import tensorflow as tf
from object_detection.utils import config_util
from object_detection.protos import pipeline_pb2
from google.protobuf import text_format
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.builders import model_builder
import numpy as np

# Utility definitions
WORKSPACE_PATH = 'Tensorflow/Workspace'
ANNOTATION_PATH = WORKSPACE_PATH+'/annotations'
MODEL_PATH = WORKSPACE_PATH+'/trained-models'
PRETRAINED_MODEL_PATH = WORKSPACE_PATH+'/pre-trained-model'
CONFIG_PATH = MODEL_PATH+'/custom_trained_model/pipeline.config'
CHECKPOINT_PATH = MODEL_PATH+'/custom_trained_model/'
actionMap = {
    "SelectPrevious" : ("LeftL", "RIndexUp"),
    "SelectNext" : ("LeftL", "RIndexDown"),
    "SelectChild" : ("LeftL", "RIndexRight"),
    "SelectParent" : ("LeftL", "RIndexLeft"),

    "MoveZPositive" : ("LeftA", "RIndexUp"),
    "MoveZNegative" : ("LeftA", "RIndexDown"),
    "MoveXPositive" : ("LeftA", "RIndexRight"),
    "MoveXNegative" : ("LeftA", "RIndexLeft"),
    "MoveYPositive" : ("LeftA", "RIndexFront"),
    "MoveYNegative" : ("LeftA", "RThumbBack"),
    "MoveClear" : ("LeftA", "RClearC"),

    "RotateZPositive" : ("LeftO", "RIndexUp"),
    "RotateZNegative" : ("LeftO", "RIndexDown"),
    "RotateXPositive" : ("LeftO", "RIndexRight"),
    "RotateXNegative" : ("LeftO", "RIndexLeft"),
    "RotateYPositive" : ("LeftO", "RIndexFront"),
    "RotateYNegative" : ("LeftO", "RThumbBack"),
    "RotateClear" : ("LeftO", "RClearC"),

    "TransformGlobal" : ("LeftV", "RIndexUp"),
    "TransformLocal" : ("LeftV", "RIndexDown"),
    "ViewNext" : ("LeftV", "RIndexRight"),
    "ViewPrevious" : ("LeftV", "RIndexLeft"),

    "HideCurrent" : ("LeftOpenHand", "RIndexRight"),
    "UnhideCurrent" : ("LeftOpenHand", "RIndexLeft"),
    "UnhideAll" : ("LeftOpenHand", "RIndexUp"),
             }

Be sure to change the _checkpoint\_name_ variable to the name of your model's chosen checkpoint name.  

In [29]:
checkpoint_name = 'ckpt-37'

# Load the pipeline.config file and build a detection model
configs = config_util.get_configs_from_pipeline_file(CONFIG_PATH)
detection_model = model_builder.build(model_config=configs['model'], is_training=False)

# Restore the specified checkpoint (it must match an existing model)
ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)
ckpt.restore(os.path.join(CHECKPOINT_PATH, checkpoint_name)).expect_partial()

# Computes the detections from the predictive model
@tf.function
def detect_fn(image):
    image, shapes = detection_model.preprocess(image)
    prediction_dict = detection_model.predict(image, shapes)
    detections = detection_model.postprocess(prediction_dict, shapes)
    return detections

## Detect Gestures in Real-Time

The user should be located at a distance of 1 to 1.5 meters from the camera, else the model won't be able to recognize gestures with high enough accuracy.  

In [30]:
import time
from queue import Queue
from threading import Thread
import sys
import cv2 
import numpy as np
from IPython.display import clear_output
import tkinter as tk
import ast
import matplotlib
matplotlib.use('Qt5Agg')
import matplotlib.pyplot as plt
from PyQt5 import QtGui

In [35]:
def handle_close(event, cap):
    cap.release()

# Keyboard interrupt handler
def on_press(event):
    if event.key == 'q':
        print("You pressed " + event.key + ", the program exited")
        cap.release()
        plt.close('all')

In [36]:
# Load labels from the label map
category_index = label_map_util.create_category_index_from_labelmap(ANNOTATION_PATH+'/label_map.pbtxt')

# Setup camera capture
cap = cv2.VideoCapture(0)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
root = tk.Tk()
screen_width = root.winfo_screenwidth()
screen_height = root.winfo_screenheight()

The window is setup through the matplotlib library.  

In [37]:
# Setup output window
plt.ion()
gui = plt.figure("Real-time Gesture Detection", facecolor='#1e1e1e', edgecolor='#1e1e1e')
gui.canvas.mpl_connect("close_event", lambda event: handle_close(event, cap))
gui.canvas.mpl_connect('key_press_event', on_press)
result = None
title_obj = plt.title('Real-time Sign Detection')
plt.setp(title_obj, color='#d4d4d4')         #set the color of title to white

# Check if a configuration file exists, else load a predefined value set
if os.path.isfile('Config\config.dat'):
    print("Configuration file found")
    file = open("Config\config.dat", "r")
    contents = file.read()
    config = ast.literal_eval(contents)
    file.close()
else:
    print ("Configuration file not found. Run CreateHSVProfile.py to create a local profile")
    config = {'HL': 0, 'SL': 29, 'VL': 24, 'HH': 40, 'SH': 255, 'VH': 255}

Configuration file found


The main camera loop.  

In [38]:
# Camera loop
while cap.isOpened(): 
    ret, frame = cap.read()
    image_np = np.array(frame)

    HSV_Frame = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    totalMask = cv2.inRange(HSV_Frame, (config["HL"], config["SL"], config["VL"]), (config["HH"], config["SH"], config["VH"]))
    totalMask = totalMask.astype(np.uint8)

    # The mask finally undergoes the Opening operator in order to remove pepper noise,
    # then gets applied as a bitwise operator to the frame
    totalMask = cv2.morphologyEx(totalMask, cv2.MORPH_OPEN, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(7, 7)))
    output = cv2.bitwise_and(frame, frame, mask = totalMask)

    # The masked image is then converted to a tensor for object detection
    # The detections dictionary is formatted according to the objectdetectionAPI
    input_tensor = tf.convert_to_tensor(np.expand_dims(output, 0), dtype=tf.float32)
    detections = detect_fn(input_tensor)
    num_detections = int(detections.pop('num_detections'))
    detections = {key: value[0, :num_detections].numpy()
                for key, value in detections.items()}
    detections['num_qdetections'] = num_detections
    detections['detection_classes'] = detections['detection_classes'].astype(np.int64)

    label_id_offset = 1

    # The bounding boxes of all detected gestures are drawn on top of the original frame
    # with their corresponding label.
    # max_boxes_to_draw=1 doesn't let two overlapping gestures to be recognized at once
    # min_score_thresh=.7 ignores all detections with an accuracy rate lower than 70%
    viz_utils.visualize_boxes_and_labels_on_image_array(
                image_np,
                detections['detection_boxes'],
                detections['detection_classes']+label_id_offset,
                detections['detection_scores'],
                category_index,
                use_normalized_coordinates=True,
                max_boxes_to_draw=5,
                min_score_thresh=.4,
                agnostic_mode=False)

    # The output is displayed on an interactive window
    image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
    if result is None:
        plt.axis("off")
        result = plt.imshow(image_np)
        plt.title("Real-time Gesture Detection")
        plt.show() 
    else:
        result.set_data(image_np)
        gui.canvas.draw()
        gui.canvas.flush_events()

You pressed q, the program exited
