In [None]:
# load packages
import time
import os
import cv2
from PIL.Image import core as Image
from PIL import ImageTk
import pickle
from keras.models import model_from_json
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input, decode_predictions
import numpy as np
import matplotlib.pyplot as plt
import tkinter as tk
from tkinter import messagebox
from datetime import datetime
import xgboost as xgb

# make sure matplotlib shows images inline
%matplotlib inline

In [None]:
# get directory of this file
current_directory = os.getcwd()

# make sure this directory is the working directory
os.chdir(current_directory)

print(current_directory)

In [None]:
# define the location of the cascade files we will use to detect hands
#fist_cascade_path = '00 Data/Haarcascades/fist.xml' 
#palm_cascade_path =  '00 Data/Haarcascades/palm.xml'
#closed_frontal_palm_cascade_path =  '00 Data/Haarcascades/closed_frontal_palm.xml'

In [None]:
# load cascade file
#fistCascade = cv2.CascadeClassifier(fist_cascade_path)
#palmCascade = cv2.CascadeClassifier(palm_cascade_path)
#closedFrontalPalmCascade = cv2.CascadeClassifier(closed_frontal_palm_cascade_path)

In [None]:
# the name of the model you want to load
modelname = 'xgb_model_500loops'

# is it an xgboost model
xgboost_model = True

# what size should the resized images be (check modeling script)
resolution = (48,64)

In [None]:
if xgboost_model:
    # load pickle with XGB model
    loaded_model = pickle.load(open('01 Models/' + modelname + '.pickle.dat', 'rb'))
else:
    # load json and create model
    json_file = open('01 Models/' + modelname + '.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    
    # load weights into new model
    loaded_model.load_weights("01 Models/" + modelname + ".h5")

print("Loaded model from disk:")
print(loaded_model)

In [None]:
# load the dictionary that translates the label matrix to values
label_dict = pickle.load(open('01 Models/LabelDictionary.pkl', 'rb'))

In [None]:
# create a function which turns a matrix into an array
def im_to_wide(grey):
    # define the first row
    y = grey[0][0]
    # append all other rows
    for col in range(1,len(grey[0])):
        y = np.hstack((y, grey[0][col]))
    return y

In [None]:
# define the threshold for the minimum probability a prediction must have
thresh = .7

In [None]:
# define the function to predict which letter is shown with handsigns
def predict_letter(model, img, target_size, xgboost = False):
    #    model: keras model
    #    img: PIL format image
    #    target_size: (width, height) tuple
    #    predict xgboost model or not
    
    if img.size != target_size:
        print("the original size of the image is: " + str(img.size))
        img = img.resize(target_size)
        print("the new size of the image is: " + str(img.size))

    # convert to numpy array
    x = np.array(img)
    # add a shape parameter which defines the number of images (which is 1)
    x = np.expand_dims(x, axis = 0)
    # convert to float
    x = x.astype(float)
    # normalize
    x = x / 255
    #print(x)
    #print(x.shape)
    
    # for xgboost: turn matrix into array
    if xgboost:
        x  = np.array([im_to_wide(x)])
        #x = x[:,0]
        #print(x.shape)
        
    # set the channels when necessary
    elif(len(x.shape) == 3): # number of channels = 1
        x = x.reshape((x.shape[0], x.shape[1], x.shape[2], 1))
    
    #print(x.shape)
    #print(type(x))
    #print(x)
    
    # make a prediction
    if xgboost:
        pred = model.predict_proba(x)
    else:
        pred = model.predict(x)
    
    #print(pred)
    #print(pred < thresh)
    #print(x.shape)
    
    # set all elements below the threshold to zero
    pred[pred < thresh] = 0
    #print(pred)
    
    # if matrix contains all zeros, no prediction can be done
    if np.any(pred):
        print("prediction can be done")
        # check which column contains the highest probability
        # translate that label to the letter, using the label dictionary
        label = list(label_dict.keys())[list(label_dict.values()).index(np.argmax(pred))]
        
    else:
        print("no prediction possible")
        label = "Unknown"    
    
    # return the label of the prediction
    return label

In [None]:
# define a function for showing the webcam screen
def show_frame():
    _, frame = camera.read()
    #frame = cv2.flip(frame, 1)
    cv2image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGBA)
    img = Image.fromarray(cv2image)
    imgtk = ImageTk.PhotoImage(image = img)
    lmain.imgtk = imgtk
    lmain.configure(image = imgtk)
    lmain.after(10, show_frame)

In [None]:
# define the webcam screen
width, height = 480, 640
camera = cv2.VideoCapture(2)
camera.set(cv2.CAP_PROP_FRAME_WIDTH, width)
camera.set(cv2.CAP_PROP_FRAME_HEIGHT, height)

# create the GUI screen
root = tk.Tk()
#root = tk.Toplevel()

# create an empty variable to save the sentence and the new letter
the_sentence = "de "
new_letter = ""

# configure the GUI
root.configure(bg = '#%02x%02x%02x' % (101, 116, 129))

# configure the number of columns and rows in the grid
root.columnconfigure(3, pad = 7)
root.rowconfigure(5, pad = 7)

# make column(s) and/or(s) row expandable
root.columnconfigure(1, weight = 1)
root.rowconfigure(1, weight = 1)
root.rowconfigure(4, weight = 1)

# make sure the GUI is in fullscreen mode
root.state("zoom")

# Define the title of the GUI
root.title("Gebarentaal herkenning met behulp van Artificial Intelligence")

# put a logo on the GUI
logo = tk.PhotoImage(file = 'Logo.png')
img = tk.Label(root, image = logo, bg = '#%02x%02x%02x' % (101, 116, 129))
img.image = logo
img.grid(row = 0, column = 0, padx = 5, pady = 5)

# create a placeholder for the webcam screen
lmain = tk.Label(root)
lmain.grid(row = 1, column = 1)

# create a placeholder textbox for the complete sentence
text = tk.Text(root
               , bd = 0 # size of the border
               , bg = '#%02x%02x%02x' % (218, 218, 222) # background color
               , height = 5 # number of lines
               , padx = 5 # left and right padding in pixels
               , pady = 5 # top and bottom padding in pixels
               , relief = "solid" # 3D appearance of widget: flat, groove, raised, ridge, solid, or sunken
               , wrap = "word" # truncate line after last whole word that fits
               , font = ('Verdana', 20, 'bold')
               , fg = '#%02x%02x%02x' % (4, 55, 133) # textcolor (Cmotions darkblue)
               , width = 40 # the number of characters that fit on a single line
              )
text.grid(row = 4, column = 1)

# show the sentence
text.insert(tk.INSERT, the_sentence)


# create a function that calls the model and processes the outcome
def call_model(the_sentence = the_sentence):
    
    # get image from camera
    return_value, image = camera.read()
    
    # print starttime
    print(datetime.now())
    
    # check if the camera gives an image
    if return_value:
        
        # flip the image
        image = cv2.flip(image, 1)
    
        # make sure the image is read as an RGB
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGBA)
        
        # save the image to disk
        imsv = Image.fromarray(image)
        imsv.save('D:/Documents/GitHub/HandSign_Recognition/00 Data/pictures/image_' + str(round(time.time())) + '.png')
        
        # set to grayscale
        image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        
        # binarize (image to black and white)
        #thr, image = cv2.threshold(image, 135, 300, cv2.THRESH_BINARY)
              
        # convert opencv image to PIL
        pil_im = Image.fromarray(image)
        #pil_im.show()
        
        
        # try to define the letter from the handsign
        new_letter = predict_letter(loaded_model, pil_im, resolution, xgboost_model)
        print(new_letter)
        
        # let the user decide if the letter should be added to the sentence
        if new_letter == 'Unknown':
            add_letter = False
            unknown = messagebox.askyesno(title = "Oeps"
                                         , message = "Sorry, de letter is niet herkend" + 
                                          "\n\nWil je het nog een keer proberen?")
        else:
            add_letter = messagebox.askyesno(title = "Bevestig toevoegen van letter aan zin"
                                         , message = "Ik heb de nieuwe letter herkend als een:\n\n" + str(new_letter)
                                         + "\n\nWil je deze toevoegen aan de zin?")
        
        # check is letter should be added and act accordingly
        if add_letter:
            
            # make sure the text is editable
            #text.configure(state = 'normal')
            
            # update the value of the complete sentence
            the_sentence = the_sentence + new_letter
            print('the sentence is updated to: ' + the_sentence)
            
            # show the complete sentence that has been created so far
            text.insert(tk.INSERT, new_letter)
            root.update_idletasks()
            
            # make sure the text isn't editable
            #text.configure(state = 'disabled')
        else:
            print('WRONG!')

# create a button that calls the model
startButton = tk.Button(root, text = 'Bepaal letter', command = call_model
                        , height = 5, width = 20, bg = 'red', foreground = 'white'
                       , relief = 'raised', justify = 'center', font = ('Verdana', 15, 'bold'))
startButton.grid(row = 1, column = 0)

# show the webcam stream
show_frame()

# start the GUI
root.mainloop()  

# turn off the camera
camera.release()
cv2.destroyAllWindows()

In [None]:
# turn off the camera
camera.release()
cv2.destroyAllWindows()