In [6]:
import cv2 
import numpy as np
import tensorflow as tf
from utils.mp_helper import mediapipe_detection, draw_landmarks, get_keypoints
from utils.config import SEQUENCE_LENGTH, ACTIONS, mp_holistic

In [7]:
PATH_TO_MODEL = 'Models/gesture2.keras'
PREDICTION_THRESHOLD = .6
model = tf.keras.models.load_model(PATH_TO_MODEL)

In [8]:
colors = [(245, 117, 16), (117, 245, 16), (16, 117, 245)]
def prob_viz(res, ACTIONS, input_Frame, colors):
    output_frame = input_Frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0, 60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, ACTIONS[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
    return output_frame

In [11]:
curr_kp = []
past_actions = []
res = [] 

vc = cv2.VideoCapture(0) # open up device camera 

# set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence =0.5, min_tracking_confidence=0.5) as holistic:
    
    # read the feed from the device camera while its open 
    while vc.isOpened():
        
        # Read video capture 
        ret, frame = vc.read()
        
        # Make detection 
        image, result = mediapipe_detection(frame, holistic)
        
        # Draw landmarks in real time and display them
        draw_landmarks(image, result)
        
        # Prediction logic
        kp = get_keypoints(result)
        curr_kp.insert(0, kp) # add new frames to beginning so we make prediction from most recent to past  (you may need to reverse this list)
        action = curr_kp[:SEQUENCE_LENGTH] # get the most recent frames to make prediction
        action.reverse() # we need to reverse so we are going in correct order: past -> present 
        
        # make prediction if enough frames can
        if len(action) == SEQUENCE_LENGTH:
            res = model.predict(np.array([action]))[0] # need another dimension so we can make prediction on single example 
        
        # see if prediction is above threshold 
        curr_prediction = 'None'
        if len(res) > 0 and res[np.argmax(res)] > PREDICTION_THRESHOLD:
            # to make stable predicitons, 
            curr_prediction = ACTIONS[np.argmax(res)]
        
        # Stablize Prediction (Check if last 10 predictions are the curr_prediction before updating it)
        past_actions.append(curr_prediction)
        if len(past_actions) > 10:
            past_actions = past_actions[-10:]
        final_pred = max(set(past_actions), key=past_actions.count)
        
        # Show prediction probabiltiy for curr_prediction 
        image = prob_viz(res, ACTIONS, image, colors)
        
        # Display Visualize for Final Prediction
        cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, final_pred, (3, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show image 
        cv2.imshow("Camera Feed", image)
        
        # check if the q keep is pressed and break if it was 
        if cv2.waitKey(10) == ord('q'):
            break 
    vc.release()
    cv2.destroyAllWindows()

