In [1]:
# load packages
import os
import cv2
import pickle
from keras.models import model_from_json
from PIL import Image, ImageTk
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input, decode_predictions
import numpy as np
import matplotlib.pyplot as plt
import tkinter as tk
from tkinter import messagebox

# make sure matplotlib shows images inline
%matplotlib inline

Using TensorFlow backend.


In [2]:
# set directory
os.chdir("D:/Documents/GitHub/HandSign_Recognition")

In [3]:
# define the location of the cascade files we will use to detect hands
fist_cascade_path = '00 Data/Haarcascades/fist.xml' 
palm_cascade_path =  '00 Data/Haarcascades/palm.xml'
closed_frontal_palm_cascade_path =  '00 Data/Haarcascades/closed_frontal_palm.xml'

In [4]:
# load cascade file
fistCascade = cv2.CascadeClassifier(fist_cascade_path)
palmCascade = cv2.CascadeClassifier(palm_cascade_path)
closedFrontalPalmCascade = cv2.CascadeClassifier(closed_frontal_palm_cascade_path)

In [5]:
# load json and create model
json_file = open('00 Data/my_model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)

# load weights into new model
loaded_model.load_weights("00 Data/my_model.h5")
print("Loaded model from disk")

Loaded model from disk


In [6]:
label_dict = pickle.load(open('00 Data/LabelDictionary.pkl', 'rb'))

In [7]:
# define the function to predict which letter is shown with handsigns
def predict(model, img, target_size):
    #    model: keras model
    #    img: PIL format image
    #    target_size: (width, height) tuple
    
    if img.size != target_size:
        print("the original size of the image is: " + str(img.size))
        img = img.resize(target_size)
        print("the new size of the image is: " + str(img.size))

    # convert to numpy array and preprocess
    x = np.array(img)
    x = np.expand_dims(x, axis = 0)
    x = preprocess_input(x.astype(float))
    
    # make a prediction
    pred = model.predict(x)
    #print(pred)
    
    # label the prediction
    pred_label = np.argmax(pred, axis = 1)
    #print(pred_label)
    
    # translate the label to the letter, using the label dictionary
    label = list(label_dict.keys())[list(label_dict.values()).index(pred_label)]
    
    # return the label of the prediction
    return label

In [8]:
# define a function for showing the webcam screen
def show_frame():
    _, frame = camera.read()
    frame = cv2.flip(frame, 1)
    cv2image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGBA)
    img = Image.fromarray(cv2image)
    imgtk = ImageTk.PhotoImage(image = img)
    lmain.imgtk = imgtk
    lmain.configure(image = imgtk)
    lmain.after(10, show_frame)

In [None]:
# define the webcam screen
width, height = 850, 640
camera = cv2.VideoCapture(0)
camera.set(cv2.CAP_PROP_FRAME_WIDTH, width)
camera.set(cv2.CAP_PROP_FRAME_HEIGHT, height)

# create the GUI screen
root = tk.Tk()

# create an empty variable to save the sentence and the new letter
the_sentence = ""
new_letter = ""

# configure the GUI
root.configure(bg = '#%02x%02x%02x' % (101, 116, 129))

# configure the number of columns and rows in the grid
root.columnconfigure(3, pad = 7)
root.rowconfigure(5, pad = 7)

# make column(s) and/or(s) row expandable
root.columnconfigure(1, weight = 1)
root.rowconfigure(1, weight = 1)
root.rowconfigure(4, weight = 1)

# make sure the GUI is in fullscreen mode
root.state("zoom")

# Define the title of the GUI
root.title("Gebarentaal herkenning met behulp van Artificial Intelligence")

# put a logo on the GUI
logo = tk.PhotoImage(file = 'C:/Users/j.schoonemann/Downloads/logo.png')
img = tk.Label(root, image = logo, bg = '#%02x%02x%02x' % (101, 116, 129))
img.image = logo
img.grid(row = 0, column = 0, padx = 5, pady = 5)

# create a placeholder for the webcam screen
lmain = tk.Label(root)
lmain.grid(row = 1, column = 1)

# create a placeholder textbox for the complete sentence
text = tk.Text(root
               , bd = 0 # size of the border
               , bg = '#%02x%02x%02x' % (218, 218, 222) # background color
               , height = 5 # number of lines
               , padx = 5 # left and right padding in pixels
               , pady = 5 # top and bottom padding in pixels
               , relief = "solid" # 3D appearance of widget: flat, groove, raised, ridge, solid, or sunken
               , wrap = "word" # truncate line after last whole word that fits
               , font = ('Verdana', 20, 'bold')
               , fg = '#%02x%02x%02x' % (4, 55, 133) # textcolor (Cmotions darkblue)
               , width = 40 # the number of characters that fit on a single line
              )
text.grid(row = 4, column = 1)

# show the sentence
text.insert(tk.INSERT, the_sentence)

# show the webcam stream
show_frame()

# start the GUI
root.mainloop()
    
while True:
      
    return_value, image = camera.read()
    print(".")
    # check if the camera gives an image
    if return_value:
        #gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
        #cv2.imshow('image', image)
        print("-")
        # detect hands
        fists = fistCascade.detectMultiScale(image, scaleFactor = 1.1, minNeighbors = 5
                                             , minSize = (85, 64), flags = cv2.CASCADE_SCALE_IMAGE)
        palms = palmCascade.detectMultiScale(image, scaleFactor = 1.1, minNeighbors = 5
                                             , minSize = (85, 64), flags = cv2.CASCADE_SCALE_IMAGE)
        front_palms = closedFrontalPalmCascade.detectMultiScale(image, scaleFactor = 1.1, minNeighbors = 5
                                             , minSize = (85, 64), flags = cv2.CASCADE_SCALE_IMAGE)

        # check if there are hands in the image
        if len(fists) > 0 or len(palms) > 0 or len(front_palms) > 0:
            
            print("fists: " + str(len(fists)) + ", palms: " + str(len(palms)) + ", front_palms: " + str(len(front_palms)))
            
            # convert opencv image to PIL
            pil_im = Image.fromarray(image)
            #pil_im.show()
            
            # try to define the letter from the handsign
            new_letter = predict(loaded_model, pil_im, (64,85))
            print(new_letter)
                        
            # let the user decide if the letter should be added to the sentence
            add_letter = messagebox.askyesno(title = "Bevestig toevoegen van letter aan zin"
                                     , message = "Ik heb de nieuwe letter herkend als een:\n\n" + str(new_letter)
                                     + "\n\nWil je deze toevoegen aan de zin?")
    
            # check is letter should be added and act accordingly
            if add_letter:
                # update the value of the complete sentence
                the_sentence = the_sentence + new_letter
                # show the complete sentence that has been created so far
                text.insert(tk.INSERT, new_letter)
                root.update_idletasks()
        
        if cv2.waitKey(1)& 0xFF == ord('s'):
            print ('stop!')
            break


# make sure the text isn't editable
#text.configure(state = 'disabled')
root.update_idletasks()

# turn off the camera
camera.release()
cv2.destroyAllWindows()

Exception in Tkinter callback
Traceback (most recent call last):
  File "D:\Programs\Anaconda3\lib\tkinter\__init__.py", line 1699, in __call__
    return self.func(*args)
  File "D:\Programs\Anaconda3\lib\tkinter\__init__.py", line 745, in callit
    func(*args)
  File "<ipython-input-8-8855870214e2>", line 9, in show_frame
    lmain.configure(image = imgtk)
  File "D:\Programs\Anaconda3\lib\tkinter\__init__.py", line 1479, in configure
    return self._configure('configure', cnf, kw)
  File "D:\Programs\Anaconda3\lib\tkinter\__init__.py", line 1470, in _configure
    self.tk.call(_flatten((self._w, cmd)) + self._options(cnf))
KeyboardInterrupt


In [None]:
# turn off the camera
camera.release()
cv2.destroyAllWindows()