In [2]:
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import cv2

In [3]:
def __draw_label(img, text, pos, bg_color):
   font_face = cv2.FONT_HERSHEY_SIMPLEX
   scale = 1
   color = (0, 0, 0)
   thickness = cv2.FILLED
   margin = 2
   txt_size = cv2.getTextSize(text, font_face, scale, thickness)

   end_x = pos[0] + txt_size[0][0] + margin
   end_y = pos[1] - txt_size[0][1] - margin

   cv2.rectangle(img, pos, (end_x, end_y), bg_color, thickness)
   cv2.putText(img, text, pos, font_face, scale, color, 1, cv2.LINE_AA)




def preprocess_Input(X):
    np_X = np.array(X)
    normalised_X = np_X.astype('float32')/255.0
    normalised_X = np.expand_dims(normalised_X, axis=0)
    return normalised_X

<font size="6">If you want to load and use mobile net TFLite model 
Run the next 2 cells</font>

In [None]:
# Load the TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_path='ASL_words_mobilenet_v2_130_224.tflite')
interpreter.allocate_tensors()

# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

classes = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 
           'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 
           'W', 'X', 'Y', 'Z', 'del', 'nothing', 'space']

In [None]:
cap = cv2.VideoCapture(0)

#Check if camera was opened correctly
if not (cap.isOpened()):
    print("Could not open video device")

# 2) fetch one frame at a time from your camera
while(True):
    
    # frame is a numpy array, that you can predict on 
    ret, frame = cap.read()
    frame = cv2.flip(frame, 1)
    # 3) obtain the prediction
    # depending on your model, you may have to reshape frame
    frame1 = cv2.resize(frame, (224, 224), interpolation=cv2.INTER_CUBIC)
    frame1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2RGB)

    try:
        image_pixels = tf.keras.utils.img_to_array(frame1)
        image_pixels = preprocess_Input(image_pixels)
        interpreter.set_tensor(input_details[0]['index'], image_pixels)
        interpreter.invoke()

        # Get the result 
        output_data = interpreter.get_tensor(output_details[0]['index'])
        predicted_index = np.argmax(output_data)
        print(predicted_index)

        print("Predicted label: " + classes[predicted_index])
        res = classes[predicted_index]
        #sequence = categories[mx][1]
    except:
        continue 
    # you may need then to process prediction to obtain a label of your data, depending on your model. Probably you'll have to apply an argmax to prediction to obtain a label.
    
    # 4) Adding the label on your frame
    __draw_label(frame, res, (20,20), (255,0,0))


    # 5) Display the resulting frame
    cv2.imshow("preview",frame)
    #Waits for a user input to quit the application
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()

<font size="6">If you want to load and use mobile net Model 
Run the next 2 cells</font>

In [None]:
# Load the Keras Model
model = tf.keras.models.load_model('ASL_words.h5', custom_objects={'KerasLayer':hub.KerasLayer})
classes = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 
           'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 
           'W', 'X', 'Y', 'Z', 'del', 'nothing', 'space']

In [7]:
cap = cv2.VideoCapture(0)

#Check if camera was opened correctly
if not (cap.isOpened()):
    print("Could not open video device")

# 2) fetch one frame at a time from your camera
while(True):
    
    # frame is a numpy array, that you can predict on 
    ret, frame = cap.read()
    frame = cv2.flip(frame, 1)
    # 3) obtain the prediction
    # depending on your model, you may have to reshape frame
    frame1 = cv2.resize(frame, (224, 224), interpolation=cv2.INTER_CUBIC)
    frame1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2RGB)

    try:
        image_pixels = tf.keras.utils.img_to_array(frame1)
        #image_pixels = np.expand_dims(image_pixels, axis = 0)
        image_pixels = preprocess_Input(image_pixels)

        # Get the result 
        output_data = model.predict(image_pixels)
        predicted_index = np.argmax(output_data)
        print(predicted_index)

        print("Predicted label: " + classes[predicted_index])
        res = classes[predicted_index]
        #sequence = categories[mx][1]
    except:
        continue 
    # you may need then to process prediction to obtain a label of your data, depending on your model. Probably you'll have to apply an argmax to prediction to obtain a label.
    
    # 4) Adding the label on your frame
    __draw_label(frame, res, (20,20), (255,0,0))


    # 5) Display the resulting frame
    cv2.imshow("preview",frame)
    #Waits for a user input to quit the application
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()

1175
1175
1233
1233
160
972
972
972
1233
827
1233
972
972
972
1233
972
363
1233
972
1233
972
972
363
1030
972
15
Predicted label: nothing
972
363
1233
1233
972
972
624
972
972
972
972
972
566
972
972
972
1146
1030
566
1233
566
769
566
1175
363
421
972
1030
972
160
972
769
566
566
363
566
160
160
1030
1030
566
1175
1175
972
972
421
972
1146
972
1146
1146
1175
972
1146
1146
1146
972
1146
1146
1146
1146
1146
1146
1146
1146
1146
1146
1146
1175
1175
972
972
15
Predicted label: nothing


<font size="6">If you want to load and use ASL_Kaggle Model 
Run the next 2 cells</font>

In [12]:
# Load the Keras Model
model = tf.keras.models.load_model('words_model_kaggle.h5', custom_objects={'KerasLayer':hub.KerasLayer})
classes = ['A', 'B', 'C', 'D','del', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 
           'L', 'M', 'N', 'nothing', 'O', 'P', 'Q', 'R', 'S', 'space', 'T', 'U', 'V', 
           'W', 'X', 'Y', 'Z']

In [9]:
# Open the device at the ID 0
# Use the camera ID based on
# /dev/videoID needed
cap = cv2.VideoCapture(0)

#Check if camera was opened correctly
if not (cap.isOpened()):
    print("Could not open video device")

# 2) fetch one frame at a time from your camera
while(True):
    
    # frame is a numpy array, that you can predict on 
    ret, frame = cap.read()
    frame = cv2.flip(frame, 1)
    # 3) obtain the prediction
    # depending on your model, you may have to reshape frame
    frame1 = cv2.resize(frame, (32, 32), interpolation=cv2.INTER_CUBIC)
    frame1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2RGB)

    try:
        image_pixels = tf.keras.utils.img_to_array(frame1)
        #image_pixels = np.expand_dims(image_pixels, axis = 0)
        image_pixels = preprocess_Input(image_pixels)
        proba = model.predict(image_pixels)
        mx = np.argmax(proba)

        res = classes[mx]
        print(res)
        #sequence = categories[mx][1]
    except:
        continue 
    # you may need then to process prediction to obtain a label of your data, depending on your model. Probably you'll have to apply an argmax to prediction to obtain a label.
    
    # 4) Adding the label on your frame
    __draw_label(frame, res, (20,20), (255,0,0))


    # 5) Display the resulting frame
    cv2.imshow("preview",frame)
   
    #Waits for a user input to quit the application
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()

nothing
nothing
nothing
nothing
