# Real-time Sign Language Detection

## Load the Trained Model

Be sure to be in the root directory of the project before running the following code boxes.

In [5]:
# Import all the required libraries
import os
import tensorflow as tf
from object_detection.utils import config_util
from object_detection.protos import pipeline_pb2
from google.protobuf import text_format
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.builders import model_builder

# Utility definitions
WORKSPACE_PATH = 'Tensorflow/workspace'
ANNOTATION_PATH = WORKSPACE_PATH+'/annotations'
MODEL_PATH = WORKSPACE_PATH+'/models'
PRETRAINED_MODEL_PATH = WORKSPACE_PATH+'/pre-trained-models'
CONFIG_PATH = MODEL_PATH+'/my_ssd_mobnet/pipeline.config'
CHECKPOINT_PATH = MODEL_PATH+'/my_ssd_mobnet/'

Be sure to change the argument of the "ckpt.restore()" function to the name of your resulting model's name

In [6]:
# Load the pipeline.config file and build a detection model
configs = config_util.get_configs_from_pipeline_file(CONFIG_PATH)
detection_model = model_builder.build(model_config=configs['model'], is_training=False)

# Restore the specified checkpoint (it must match an existing model)
ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)
ckpt.restore(os.path.join(CHECKPOINT_PATH, 'ckpt-41')).expect_partial()

# Computes the detections from the predictive model
@tf.function
def detect_fn(image):
    image, shapes = detection_model.preprocess(image)
    prediction_dict = detection_model.predict(image, shapes)
    detections = detection_model.postprocess(prediction_dict, shapes)
    return detections

## Detect Gestures

In [7]:
import cv2 
import numpy as np
from IPython.display import clear_output

In [8]:
category_index = label_map_util.create_category_index_from_labelmap(ANNOTATION_PATH+'/label_map.pbtxt')

In [9]:
# Setup camera capture
cap = cv2.VideoCapture(0)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

In [10]:
# Camera loop
while True: 
    ret, frame = cap.read()
    image_np = np.array(frame)
    
    # Skin tone segmentation
    # The frame is converted to HSV, then thresholded according to the Hue value
    # according to the paper: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.718.1964&rep=rep1&type=pdf
    HSV_Frame = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    Hue,Sat,Val = [HSV_Frame[...,i] for i in range(3)]
    HSV_res = np.logical_or(Hue < 35, Hue > 140)
    totalMask = HSV_res.astype(np.uint8)
    
    # Face removal, in order to give less room for error to the gesture classifier
    # A Haar classifier detects the face, then adds its filled bounding box to the mask
    haar_face = cv2.CascadeClassifier()
    haar_face.load(cv2.samples.findFile("HaarClassifiers/HaarFrontalFaceAlt.xml"))
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    gray_frame = cv2.equalizeHist(gray_frame)
    faces = haar_face.detectMultiScale(gray_frame, minSize=(int(0.2*height), int(0.2*height)))
    for (x, y, w, h) in faces:
        vertices = np.array([[x,y-int(0.3*h)], [x+w, y-int(0.3*h)], [x+w, y+h], [x, y+h]])
        cv2.fillPoly(totalMask, pts = [vertices], color =(0,0,0))
    
    # The mask finally undergoes the Opening operator in order to remove pepper noise,
    # then gets applied as a bitwise operator to the frame
    totalMask = cv2.morphologyEx(totalMask, cv2.MORPH_OPEN, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(7, 7)))
    output = cv2.bitwise_and(frame, frame, mask = totalMask)
        
    # The masked image is then converted to a tensor for object detection
    input_tensor = tf.convert_to_tensor(np.expand_dims(output, 0), dtype=tf.float32)
    detections = detect_fn(input_tensor)
    
    num_detections = int(detections.pop('num_detections'))
    detections = {key: value[0, :num_detections].numpy()
                  for key, value in detections.items()}
    detections['num_qdetections'] = num_detections

    # detection_classes should be ints.
    detections['detection_classes'] = detections['detection_classes'].astype(np.int64)

    label_id_offset = 1
    image_np_with_detections = image_np.copy()
    
    # The bounding boxes of all detected gestures are drawn on top of the original frame
    # with their corresponding label.
    # max_boxes_to_draw=1 doesn't let two overlapping gestures to be recognized at once
    # min_score_thresh=.7 ignores all detections with an accuracy rate lower than 70%
    viz_utils.visualize_boxes_and_labels_on_image_array(
                image_np_with_detections,
                detections['detection_boxes'],
                detections['detection_classes']+label_id_offset,
                detections['detection_scores'],
                category_index,
                use_normalized_coordinates=True,
                max_boxes_to_draw=1,
                min_score_thresh=.7,
                agnostic_mode=False)

    # The output is displayed on an interactive window
    cv2.imshow('object detection',  image_np_with_detections)
    #cv2.imshow('Masked image', output)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        cap.release()
        cv2.destroyWindow('object detection')
        #cv2.destroyWindow('Masked image')
        break

In [45]:
blur = 21
canny_low = 50
canny_high = 150
min_area = 0.0005
max_area = 0.5
mask_dilate_iter = 10
mask_erode_iter = 10
mask_color = (0.0,0.0,0.0)
image_area = width * height  
max_area = max_area * image_area
min_area = min_area * image_area

In [None]:
#preprocess input
    image_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(image_gray, canny_low, canny_high)
    contour_info = [(c, cv2.contourArea(c),) for c in cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)[0]]
    mask = np.zeros(edges.shape, dtype = np.uint8)
    for contour in contour_info:
        if contour[1] > min_area and contour[1] < max_area:
            mask = cv2.fillConvexPoly(mask, contour[0], (255))
    
    mask = cv2.dilate(mask, None, iterations=mask_dilate_iter)
    mask = cv2.erode(mask, None, iterations=mask_erode_iter)
    mask = cv2.GaussianBlur(mask, (blur, blur), 0)
    mask_stack = mask.astype('float32') / 255.0           
    frame = frame.astype('float32') / 255.0
    mask_stack = cv2.cvtColor(mask_stack, cv2.COLOR_GRAY2RGB)
    masked = (mask_stack * frame) + ((1-mask_stack) * mask_color)
    masked = (masked * 255).astype('uint8')

In [15]:
def nothing(x): #needed for createTrackbar to work in python.
    pass 

cap = cv2.VideoCapture(0)
cv2.namedWindow('temp')
cv2.createTrackbar('bl', 'temp', 0, 255, nothing)
cv2.createTrackbar('gl', 'temp', 0, 255, nothing)
cv2.createTrackbar('rl', 'temp', 0, 255, nothing)
cv2.createTrackbar('bh', 'temp', 255, 255, nothing)
cv2.createTrackbar('gh', 'temp', 255, 255, nothing)
cv2.createTrackbar('rh', 'temp', 255, 255, nothing)
while True:
        ret,img=cap.read()#Read from source
        hsv=cv2.cvtColor(img,cv2.COLOR_BGR2HSV)
        bl_temp=cv2.getTrackbarPos('bl', 'temp')
        gl_temp=cv2.getTrackbarPos('gl', 'temp')
        rl_temp=cv2.getTrackbarPos('rl', 'temp')
        bh_temp=cv2.getTrackbarPos('bh', 'temp')
        gh_temp=cv2.getTrackbarPos('gh', 'temp')
        rh_temp=cv2.getTrackbarPos('rh', 'temp')
        thresh=cv2.inRange(hsv,(bl_temp,gl_temp,rl_temp),(bh_temp,gh_temp,rh_temp))
        if(cv2.waitKey(10) & 0xFF == ord('b')):
            cap.release()
            cv2.destroyWindow('temp')
            break #break when b is pressed 
        cv2.imshow('Video', img)
        cv2.imshow('thresh', thresh)

error: OpenCV(4.5.3) C:\Users\runneradmin\AppData\Local\Temp\pip-req-build-sn_xpupm\opencv\modules\highgui\src\window_w32.cpp:1246: error: (-27:Null pointer) NULL window: 'object detection' in function 'cvDestroyWindow'


In [None]:
def Rule_A(BGR_Frame):
    B_Frame, G_Frame, R_Frame =  [BGR_Frame[...,BGR] for BGR in range(3)]# [...] is the same as [:,:]
    #you can use the split built-in method in cv2 library to get the b,g,r components
    #B_Frame, G_Frame, R_Frame  = cv2.split(BGR_Frame)
    #i am using reduce built in method to get the maximum of a 3 given matrices
    BRG_Max = np.maximum.reduce([B_Frame, G_Frame, R_Frame])
    BRG_Min = np.minimum.reduce([B_Frame, G_Frame, R_Frame])
    #at uniform daylight, The skin colour illumination's rule is defined by the following equation :
    Rule_1 = np.logical_and.reduce([R_Frame > 95, G_Frame > 40, B_Frame > 20 ,
                                 BRG_Max - BRG_Min > 15,abs(R_Frame - G_Frame) > 15, 
                                 R_Frame > G_Frame, R_Frame > B_Frame])
    #the skin colour under flashlight or daylight lateral illumination rule is defined by the following equation :
    Rule_2 = np.logical_and.reduce([R_Frame > 220, G_Frame > 210, B_Frame > 170,
                         abs(R_Frame - G_Frame) <= 15, R_Frame > B_Frame, G_Frame > B_Frame])
    #Rule_1 U Rule_2
    RGB_Rule = np.logical_or(Rule_1, Rule_2)
    #return the RGB mask
    return RGB_Rule
def lines(axis):
    #equation(3)
    line1 = 1.5862  * axis + 20
    #equation(4)
    line2 = 0.3448  * axis + 76.2069
    #equation(5)
    #the slope of this equation is not correct Cr ≥ -4.5652 × Cb + 234.5652
    #it should be around -1  
    line3 = -1.005 * axis + 234.5652
    #equation(6)
    line4 = -1.15   * axis + 301.75
    #equation(7)
    line5 = -2.2857 * axis + 432.85
    return [line1,line2,line3,line4,line5]
    #The five bounding rules of Cr-Cb 
def Rule_B(YCrCb_Frame):
    Y_Frame,Cr_Frame, Cb_Frame = [YCrCb_Frame[...,YCrCb] for YCrCb in range(3)]
    line1,line2,line3,line4,line5 = lines(Cb_Frame)
    YCrCb_Rule = np.logical_and.reduce([line1 - Cr_Frame >= 0,
                                        line2 - Cr_Frame <= 0,
                                        line3 - Cr_Frame <= 0,
                                        line4 - Cr_Frame >= 0,
                                        line5 - Cr_Frame >= 0])
    return YCrCb_Rule
def Rule_C(HSV_Frame):
    Hue,Sat,Val = [HSV_Frame[...,i] for i in range(3)]
    #Change values depending on experimental observation
    HSV_ = np.logical_or(Hue < 35, Hue > 140)
    return HSV_

    Ycbcr_Frame = cv2.cvtColor(output, cv2.COLOR_BGR2YCrCb)
    skin_ = np.logical_and.reduce([Rule_A(output), Rule_B(Ycbcr_Frame), Rule_C(HSV_Frame)])
    skin_frame = np.asarray(skin_, dtype=np.uint8)
    contours, hierarchy = cv2.findContours(skin_frame, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cv2.drawContours(frame, contours, -1, (0, 255, 0), 1)
    rects = []
    for c in contours:
        # get the bounding rect
        x, y, w, h = cv2.boundingRect(c)
        # draw a green rectangle to visualize the bounding rect
        if (w > 40 and h > 40) and (w < 300 and h < 300):
            #pinhole distance
            Distance1 = 11.5*(frame.shape[1]/float(w))
            #camera distance
            Distance2 = 15.0*((frame.shape[1] + 226.8)/float(w))
            #print("\npinhole distance = {:.2f} cm\ncamera distance = {:.2f} cm".format(Distance1,Distance2))
            #print("Width = {} \t Height = {}".format(w,h))
            rects.append(np.asarray([x,y,w,w*1.25], dtype=np.uint16))

    for i,r in enumerate(rects):
        x0,y0,w,h = r
        cv2.rectangle(frame, (x0,y0),(x0+w,y0+h),(0,255,0),1)
        font = cv2.FONT_HERSHEY_SIMPLEX