In [1]:
import numpy as np
import cv2
import mediapipe as mp
import tensorflow as tf
import time
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands

In [2]:
# load model
tflite_save_path = 'model/model.tflite'
interpreter = tf.lite.Interpreter(model_path=tflite_save_path)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()


In [3]:
def gesture_preprocess(landmark):
    """
    convert landmarks for trainable data
    66 features
    X (21): 0-20
    Y (21): 21-41
    Z (21): 42-62
    X,Y,Z range (3): 63-65
    
    params landmark: mediapipe landmark for 1 hand
    params label: str
    return: np.array (1,66)
    """ 
    lm_x = np.array([])
    lm_y = np.array([])
    lm_z = np.array([])
    for hlm in landmark.landmark:
        lm_x = np.append(lm_x, hlm.x)
        lm_y = np.append(lm_y, hlm.y)
        lm_z = np.append(lm_z, hlm.z)
    data_gest = [lm_x, lm_y, lm_z]
    x_rng, y_rng, z_rng = lm_x.max()-lm_x.min(), lm_y.max()-lm_y.min(), lm_z.max()-lm_z.min()
    data_gest = np.ravel([(k-k.min())/(k.max()-k.min()) for i, k in enumerate(data_gest)])
    data_gest = np.append(data_gest, [x_rng, y_rng, z_rng])
    return data_gest.astype('float32')

In [6]:
def gesture_inference(data):
    """
    inference
    
    param data: np.array
    return: int class
    """
    interpreter.set_tensor(input_details[0]['index'], np.array([data]))
    interpreter.invoke()
    tflite_results = interpreter.get_tensor(output_details[0]['index'])
    inf_class_idx = np.argmax(np.squeeze(tflite_results))
    if np.squeeze(tflite_results)[inf_class_idx] < 0.95:
        return 4
    return inf_class_idx

In [8]:
# For webcam input:
detect_time = time.time()
inf_class = {0: 'Hit', 1: 'Stand', 2: 'Split', 3: 'Reset' , 4: 'None'}
inf_class_idx = 4

cap = cv2.VideoCapture(0)
with mp_hands.Hands(
    max_num_hands=1,
    model_complexity=1,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as hands:
    while cap.isOpened():            
        success, image = cap.read()
        if not success:
            print("Ignoring empty camera frame.")
            continue
        # To improve performance, optionally mark the image as not writeable to
        # pass by reference.
        image.flags.writeable = False
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        results = hands.process(image)
        
        # Draw + infer: the hand annotations on the image.
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        if results.multi_hand_landmarks:
            if (time.time() - detect_time) > 0.5:
                print("detected hand")
                for hand_landmarks in results.multi_hand_landmarks:
                    # inference
                    gest_data = gesture_preprocess(hand_landmarks)
                    inf_class_idx = gesture_inference(gest_data)

                    # draw
                    mp_drawing.draw_landmarks(
                        image,
                        hand_landmarks,
                        mp_hands.HAND_CONNECTIONS,
                        mp_drawing_styles.get_default_hand_landmarks_style(),
                        mp_drawing_styles.get_default_hand_connections_style())
        else:
            detect_time = time.time()
        image_height, image_width, _ = image.shape
        cv2.putText(image, f"{inf_class[inf_class_idx]}", (0, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36,255,12), 2)
        cv2.imshow('MediaPipe Hands', image)
        if cv2.waitKey(5) & 0xFF == 27:
            break
cap.release()
cv2.destroyAllWindows()

detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detect

detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detected hand
detect