# Model Application

### Imports

In [1]:
import mediapipe as mp
import cv2
import csv
import copy
import numpy as np
import os
import itertools
from model.keypoint_classifier.keypoint_classifier import KeyPointClassifier

### Model Inference

In [2]:
def load_model(use_static_image_mode=False, min_detection_confidence=0.7, min_tracking_confidence=0.5):
        # Model load #############################################################
        mp_hands = mp.solutions.hands
        hands = mp_hands.Hands(
            static_image_mode=use_static_image_mode,
            max_num_hands=1,
            min_detection_confidence=min_detection_confidence,
            min_tracking_confidence=min_tracking_confidence,
        )

        keypoint_classifier = KeyPointClassifier()

        # Read labels ###########################################################
        with open('model/keypoint_classifier/keypoint_classifier_label.csv',
                  encoding='utf-8-sig') as f:
            keypoint_classifier_labels = csv.reader(f)
            keypoint_classifier_labels = [
                row[0] for row in keypoint_classifier_labels
            ]

        return hands, keypoint_classifier, keypoint_classifier_labels

def _calc_landmark_list(image, landmarks):
    image_width, image_height = image.shape[1], image.shape[0]
    print(image_width)
    print(image_height)

    landmark_point = []

    # Keypoint
    for _, landmark in enumerate(landmarks.landmark):
        landmark_x = min(int(landmark.x * image_width), image_width - 1)
        landmark_y = min(int(landmark.y * image_height), image_height - 1)
        # landmark_z = landmark.z

        landmark_point.append([landmark_x, landmark_y])
    
    return landmark_point

def _pre_process_landmark(landmark_list):
    temp_landmark_list = copy.deepcopy(landmark_list)

    # Convert to relative coordinates
    base_x, base_y = 0, 0
    for index, landmark_point in enumerate(temp_landmark_list):
        if index == 0:
            base_x, base_y = landmark_point[0], landmark_point[1]

        temp_landmark_list[index][0] = temp_landmark_list[index][0] - base_x
        temp_landmark_list[index][1] = temp_landmark_list[index][1] - base_y

    # Convert to a one-dimensional list
    temp_landmark_list = list(
        itertools.chain.from_iterable(temp_landmark_list))

    # Normalization
    max_value = max(list(map(abs, temp_landmark_list)))

    def normalize_(n):
        return n / max_value

    temp_landmark_list = list(map(normalize_, temp_landmark_list))
    
    return temp_landmark_list
    
def recognize(image, hands, keypoint_classifier, keypoint_classifier_labels, number=-1, mode=0):
    # TODO: Move constants to other place
    USE_BRECT = True

    image = cv2.flip(image, 1)  # Mirror display
    debug_image = copy.deepcopy(image)

    # Saving gesture id for drone controlling
    gesture_id = -1

    # Detection implementation #############################################################
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    image.flags.writeable = False
    results = hands.process(image)
    image.flags.writeable = True

    #  ####################################################################
    if results.multi_hand_landmarks is not None:
        for hand_landmarks, handedness in zip(results.multi_hand_landmarks,
                                              results.multi_handedness):
            # Bounding box calculation
            # brect = self._calc_bounding_rect(debug_image, hand_landmarks)
            # Landmark calculation
            landmark_list = _calc_landmark_list(debug_image, hand_landmarks)

            # Conversion to relative coordinates / normalized coordinates
            pre_processed_landmark_list = _pre_process_landmark(landmark_list)
            # pre_processed_point_history_list = self._pre_process_point_history(
            #     debug_image, self.point_history)

            # Write to the dataset file
            # self._logging_csv(number, mode, pre_processed_landmark_list, pre_processed_point_history_list)

            # Hand sign classification
            hand_sign_id = keypoint_classifier(pre_processed_landmark_list)
            # print("Hand Sign: {}".format(keypoint_classifier_labels[hand_sign_id]))
            # if hand_sign_id == 2:  # Point gesture
            #     self.point_history.append(landmark_list[8])
            # else:
            #     self.point_history.append([0, 0])

            # Finger gesture classification
#             finger_gesture_id = 0
#             point_history_len = len(pre_processed_point_history_list)
#             if point_history_len == (self.history_length * 2):
#                 finger_gesture_id = self.point_history_classifier(
#                     pre_processed_point_history_list)

#             # Calculates the gesture IDs in the latest detection
#             self.finger_gesture_history.append(finger_gesture_id)
#             most_common_fg_id = Counter(
#                 self.finger_gesture_history).most_common()

#             # Drawing part
#             debug_image = self._draw_bounding_rect(USE_BRECT, debug_image, brect)
#             debug_image = self._draw_landmarks(debug_image, landmark_list)
#             debug_image = self._draw_info_text(
#                 debug_image,
#                 brect,
#                 handedness,
#                 self.keypoint_classifier_labels[hand_sign_id],
#                 self.point_history_classifier_labels[most_common_fg_id[0][0]]
#             )

            # Saving gesture
            gesture_id = hand_sign_id
    # else:
    #     self.point_history.append([0, 0])

    # debug_image = self.draw_point_history(debug_image, self.point_history)
    
    if gesture_id == -1:
        hand_pose = "NO HAND"
    else:
        hand_pose = keypoint_classifier_labels[gesture_id]

    return debug_image, gesture_id, hand_pose, results

In [3]:
cap = cv2.VideoCapture(0)
hands, keypoint_classifier, keypoint_classifier_labels = load_model()

# Hand model
mp_hands = mp.solutions.hands

# Drawing util
mp_drawing = mp.solutions.drawing_utils

while cap.isOpened():
    # ret = return value (not needed)
    # frame = frame of footage
    ret, frame = cap.read()
    
    image = frame
    
    # Retrieve prediction for hand
    debug, gesture_id, hand_sign, results = recognize(image = frame, hands = hands, keypoint_classifier = keypoint_classifier, 
                                  keypoint_classifier_labels = keypoint_classifier_labels)
    
    # Read an image, flip it around y-axis for correct handedness output
    image = cv2.flip(image, 1)

    # Set flag to true
    image.flags.writeable = True

    # Detections
    # print(results)

    # Rendering results
    if results.multi_hand_landmarks:
        for num, hand in enumerate(results.multi_hand_landmarks):
            # mp_hands.HAND_CONNECTIONS -> telling Mediapipe the relationship between the landmarks of the hand
            # mp_drawing.DrawingSpec -> telling Mediapipe how to draw the hands onto a given frame. Note: colour 
            # here is in BGR format
            mp_drawing.draw_landmarks(image, hand, mp_hands.HAND_CONNECTIONS, 
                                    mp_drawing.DrawingSpec(color=(157, 212, 76), thickness=2, circle_radius=2),
                                    mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2),
                                     )

    cv2.putText(image, hand_sign, (10,450), cv2.FONT_HERSHEY_SIMPLEX, 3, (0, 255, 0), 2, cv2.LINE_AA)
    
    # Display frame
    cv2.imshow('Hand Tracking', image)

    # Breaks loop if we enter 'q'
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

640
480
[[388, 464], [336, 451], [296, 417], [268, 388], [240, 369], [332, 343], [313, 289], [301, 253], [292, 222], [367, 335], [354, 277], [344, 238], [337, 205], [402, 340], [399, 285], [394, 249], [388, 218], [438, 356], [447, 315], [448, 287], [445, 261]]
[0.0, 0.0, -0.20077220077220076, -0.05019305019305019, -0.3552123552123552, -0.18146718146718147, -0.46332046332046334, -0.29343629343629346, -0.5714285714285714, -0.3667953667953668, -0.21621621621621623, -0.4671814671814672, -0.28957528957528955, -0.6756756756756757, -0.3359073359073359, -0.8146718146718147, -0.37065637065637064, -0.9343629343629344, -0.08108108108108109, -0.4980694980694981, -0.13127413127413126, -0.722007722007722, -0.16988416988416988, -0.8725868725868726, -0.1969111969111969, -1.0, 0.05405405405405406, -0.47876447876447875, 0.04247104247104247, -0.6911196911196911, 0.023166023166023165, -0.8301158301158301, 0.0, -0.9498069498069498, 0.19305019305019305, -0.416988416988417, 0.2277992277992278, -0.57528957528