In [None]:
import cv2
import numpy as np
import mediapipe as mp
from IPython.display import clear_output
import IPython.display

In [None]:
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

In [None]:
def classify_landmark(landmark):
    wrist = landmark[0]
    thump = landmark[1:5]
    index_finger = landmark[5:9]
    middle_finger = landmark[9:13]
    ring_finger = landmark[13:17]
    pinky = landmark[17:21]
    return [wrist, thump, index_finger, middle_finger, ring_finger, pinky]

In [None]:
def is_on(idx, finger, landmark_label):
    if idx == 0:
        if landmark_label == "Right":
            if finger[-1].x < finger[-2].x:
                return True
            else:
                return False
        else:
            if finger[-1].x < finger[-2].x:
                return False
            else:
                return True
    else:
        if -finger[-1].y > -finger[0].y:
            return True
        else:
            return False

In [None]:
def gesture_recognition(finger_is_on):
    thump_is_on = finger_is_on[0]
    index_finger_is_on = finger_is_on[1]
    middle_finger_is_on = finger_is_on[2]
    ring_finger_is_on = finger_is_on[3]
    pinky_is_on = finger_is_on[4]
    
    if thump_is_on:
        if index_finger_is_on and middle_finger_is_on and ring_finger_is_on and pinky_is_on:
            return 5
        elif index_finger_is_on and middle_finger_is_on and ring_finger_is_on and not pinky_is_on:
            return 9
        elif index_finger_is_on and middle_finger_is_on and not ring_finger_is_on and not pinky_is_on:
            return 8
        elif index_finger_is_on and not middle_finger_is_on and not ring_finger_is_on and not pinky_is_on:
            return 7
        elif not index_finger_is_on and not middle_finger_is_on and not ring_finger_is_on and not pinky_is_on:
            return 6
        else:
            return "Wrong Gesture"
        
    else:
        if index_finger_is_on and middle_finger_is_on and ring_finger_is_on and pinky_is_on:
            return 4
        elif index_finger_is_on and middle_finger_is_on and ring_finger_is_on and not pinky_is_on:
            return 3
        elif index_finger_is_on and middle_finger_is_on and not ring_finger_is_on and not pinky_is_on:
            return 2
        elif index_finger_is_on and not middle_finger_is_on and not ring_finger_is_on and not pinky_is_on:
            return 1
        elif not index_finger_is_on and not middle_finger_is_on and not ring_finger_is_on and not pinky_is_on:
            return 0
        else:
            return "Wrong Gesture"

In [None]:
def recognition(landmark, handness):
    landmark = landmark.landmark
    handness = handness.classification[0].label
    
    hand_landmark = classify_landmark(landmark)
    finger_landmark = hand_landmark[1:]
    
    finger_is_on = []
    for idx, finger in enumerate(finger_landmark):
        finger_is_on.append(is_on(idx, finger, handness))
     
    return gesture_recognition(finger_is_on)

In [None]:
def get_output(idx):
    global _output, output
    key = []
    for i in range(len(_output[idx])):
        if _output[idx][i] not in key:
            c = _output[idx].count(_output[idx][i])
            if c > 10:
                key.append(_output[idx][i])           
    floor = ""
    for i in key:
        if i == "Wrong Gesture":
            continue
        floor += str(i)
    if floor == "":
        return None
    output.append(floor)
    _output[idx] = []
    return None

In [None]:
def get_euclidean_distance(a, b):
    return np.linalg.norm(a - b)

In [None]:
def main(image, results):
    global mp_drawing, current_hand 
    global output, _output
    global mean_xy
    
    multi_hand_landmarks = results.multi_hand_landmarks
    multi_handedness = results.multi_handedness
    
    _mean_xy = []
    _gesture = []
    
    isIncreased = False
    isDecreased = False
            
    if current_hand != 0:
        if results.multi_hand_landmarks is None:
            isDecreased = True
        else:
            if len(multi_hand_landmarks) > current_hand:
                isIncreased = True
            elif len(multi_hand_landmarks) < current_hand:
                isDecreased = True
           
    if results.multi_hand_landmarks:
    
        h, w, _ = image.shape
        for idx in reversed(range(len(multi_hand_landmarks))):
            mp_drawing.draw_landmarks(image, multi_hand_landmarks[idx], mp_hands.HAND_CONNECTIONS)

            min_x = int(min([multi_hand_landmarks[idx].landmark[i].x for i in range(len(multi_hand_landmarks[idx].landmark))])*w)
            max_x = int(max([multi_hand_landmarks[idx].landmark[i].x for i in range(len(multi_hand_landmarks[idx].landmark))])*w)
            min_y = int(min([multi_hand_landmarks[idx].landmark[i].y for i in range(len(multi_hand_landmarks[idx].landmark))])*h)
            max_y = int(max([multi_hand_landmarks[idx].landmark[i].y for i in range(len(multi_hand_landmarks[idx].landmark))])*h)

            cv2.rectangle(image, (min_x-10, min_y-10), (max_x+10, max_y+10), (0, 255, 255), 2)

            gesture = recognition(multi_hand_landmarks[idx], multi_handedness[idx])

            order_text = "No.{} hand".format(idx)
            cv2.putText(image, order_text, (min_x-10, max_y+30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2)

            gesture_text = "Gesture: {}".format(gesture)
            cv2.putText(image, gesture_text, (min_x-10, max_y+60), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2)

            handness_text = "This is {} hand".format(multi_handedness[idx].classification[0].label)
            cv2.putText(image, handness_text, (min_x-10, max_y+90), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2)

            _mean_xy.append(np.array([(min_x+max_x)/2, (min_y+max_y)/2]))
            _gesture.append(gesture)                        
        
    if isIncreased == True:
        mean_xy[0] = _mean_xy[0]
        if current_hand == 1:  
            mean_xy[1] = _mean_xy[1]

    elif isDecreased == True:
        if current_hand == 1:
            get_output(0)
        elif current_hand == 2:
            vanishing_index = find_vanishing(_mean_xy)
            get_output(vanishing_index)
    else:
        if results.multi_hand_landmarks is not None:
            mean_xy[0] = _mean_xy[0]
            _output[0].append(_gesture[0])
            
            if current_hand == 2:
                mean_xy[1] = _mean_xy[1]
                _output[1].append(_gesture[1])
    
    if results.multi_hand_landmarks:
        current_hand = len(multi_hand_landmarks)
    else:
        current_hand = 0

    return image

In [None]:
def find_appearing(_mean_xy):
    global mean_xy
    
    _a = get_euclidean_distance(mean_xy[0], _mean_xy[0])
    _b = get_euclidean_distance(mean_xy[1], _mean_xy[0])
    
    if _a > _b:
        mean_xy[0] = mean_xy[1]
        mean_xy[1] = []
        return 0
    else:
        mean_xy[1] = []
        return 1  

In [None]:
def find_vanishing(_mean_xy):
    global mean_xy
    
    _a = get_euclidean_distance(mean_xy[0], _mean_xy[0])
    _b = get_euclidean_distance(mean_xy[1], _mean_xy[0])
    
    if _a > _b:
        mean_xy[0] = mean_xy[1]
        mean_xy[1] = []
        return 0
    else:
        mean_xy[1] = []
        return 1

In [None]:
cap = cv2.VideoCapture(1)
output = []
_output = [[], []]
mean_xy = [[], []]
current_hand = 0

with mp_hands.Hands(
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5, 
    max_num_hands = 2) as hands:
    while cap.isOpened():
        success, image = cap.read()
        if not success:
            print("Ignoring empty camera frame.")
            continue

        # Flip the image horizontally for a later selfie-view display, and convert the BGR image to RGB.
        image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
        
        # To improve performance, optionally mark the image as not writeable to pass by reference.
        image.flags.writeable = False
        results = hands.process(image)

        # Draw the hand annotations on the image.
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        try:
            image = main(image, results)
        except Exception as e:
            print(e)
            
        number_text = str(output)
        cv2.putText(image, number_text, (10, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2)
        cv2.imshow('MediaPipe Hands', image)
        
        if cv2.waitKey(5) & 0xFF == 27:
            break
            
cv2.destroyAllWindows()
cap.release()