In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import mediapipe as mp

from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard
import math

In [2]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils
holistic = mp_holistic.Holistic(min_detection_confidence = 0.8, min_tracking_confidence = 0.5)

In [3]:
IMAGESET_FOLDER = os.path.join('Imageset')
KEYPOINT_FOLDER = os.path.join('Extracted Hand Landmarks')
DATASET_FOLDER = os.path.join('Dataset')
actions = np.array(['Kol', 'live'])
no_sequences = 120
no_frames = 30

In [4]:
def mediapipe_detection(image, holistic):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    result = holistic.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, result

In [5]:
def render_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

In [6]:
def get_true_coords(results):
    l_coords = np.zeros(shape = (21, 3), dtype = float)
    r_coords = np.zeros(shape = (21, 3), dtype = float)
    
#     l_shift_coords = np.zeros(shape = (21, 3), dtype = float)
#     r_shift_coords = np.zeros(shape = (21, 3), dtype = float)
    
    if results.left_hand_landmarks:
        for idxL, lh in enumerate(results.left_hand_landmarks.landmark):
            l_coords[idxL] = np.array([lh.x, lh.y, lh.z])
#         l_shift_coords = coord_shift(l_coords, 0)
    
    if results.right_hand_landmarks:
        for idxR, rh in enumerate(results.right_hand_landmarks.landmark):
            r_coords[idxR] = np.array([rh.x, rh.y, rh.z])
#         r_shift_coords = coord_shift(r_coords, 1)
    
    return l_coords, r_coords

In [7]:
def get_shifted_coords(coords_array, num):
    
    coords_zero = np.all((coords_array == 0))
    shifted_coords_array = np.zeros(shape = (21, 3), dtype = float)
    
    new_l_x = 8.24e-01
    new_l_y = 6.7e-01
    new_l_z = 2.25e-07
    
    new_r_x = 1.39e-01
    new_r_y = 6.7e-01
    new_r_z = 2.25e-07
    
    if num == 0:
        shifted_coords_array[0] = np.array([new_l_x, new_l_y, new_l_z])
        shift_factor_x = new_l_x - coords_array[0][0]
        shift_factor_y = new_l_y - coords_array[0][1]
        shift_factor_z = new_l_z - coords_array[0][2]
        for i in range(1, len(coords_array)):
            shifted_coords_array[i] = np.array([(coords_array[i][0] + shift_factor_x), (coords_array[i][1] + shift_factor_y), (coords_array[i][2] + shift_factor_z)])

    else:
        shifted_coords_array[0] = np.array([new_r_x, new_r_y, new_r_z])
        shift_factor_x = new_r_x - coords_array[0][0]
        shift_factor_y = new_r_y - coords_array[0][1]
        shift_factor_z = new_r_z - coords_array[0][2]
        for j in range(1, len(coords_array)):
            shifted_coords_array[j] = np.array([(coords_array[j][0] + shift_factor_x), (coords_array[j][1] + shift_factor_y), (coords_array[j][2] + shift_factor_z)])

    #print(shifted_coords_array)
    return shifted_coords_array

In [8]:
def hand_wrist_distance(coords_array):
    
    wrist_distance = np.zeros(shape = (21, 1), dtype = float)
    
    for num in range(len(coords_array)):
        wrist_distance[num] = (((coords_array[0][0] - coords_array[num][0])**2) + ((coords_array[0][1] - coords_array[num][1])**2) + ((coords_array[0][2] - coords_array[num][2])**2))**0.5
        
    return wrist_distance

In [9]:
def left_right_hand_distance(l_true_coords, r_true_coords):
    l_r_hand_distance = np.zeros(shape = (21, 1), dtype = float)
    
    for num in range(len(l_true_coords)):
        l_r_hand_distance[num] = (((l_true_coords[num][0] - r_true_coords[num][0])**2) + ((l_true_coords[num][1] - r_true_coords[num][1])**2))**0.5
    
    return l_r_hand_distance

In [10]:
def calculate_angles(results):
    
    #right_elbow, right_shoulder, left_shoulder, left_elbow
    four_joint_angles = np.zeros(shape = (4, 1), dtype = float)
    
    if results.pose_landmarks:
        # angle_joints = [right_elbow, right_shoulder, left_shoulder, left_elbow]
        angle_joints = [[16, 14, 12], [11, 12, 14], [13, 11, 12], [11, 13, 15]]
        
        for joint_index in range(len(angle_joints)):
            
            p1 = np.array([results.pose_landmarks.landmark[angle_joints[joint_index][0]].x, results.pose_landmarks.landmark[angle_joints[joint_index][0]].y])
            p2 = np.array([results.pose_landmarks.landmark[angle_joints[joint_index][1]].x, results.pose_landmarks.landmark[angle_joints[joint_index][1]].y])
            p3 = np.array([results.pose_landmarks.landmark[angle_joints[joint_index][2]].x, results.pose_landmarks.landmark[angle_joints[joint_index][2]].y])
            
            angle = math.degrees(math.atan2(p3[1] - p2[1], p3[0] - p2[0]) - math.atan2(p1[1] - p2[1], p1[0] - p2[0]))
            
            if angle < 0:
                angle = angle + 360
            
            rad_angle = angle * (np.pi / 180)
            
            normalised_angle = (angle / 360)
                
            four_joint_angles[joint_index] = angle
    
    return four_joint_angles

In [11]:
def nose_to_joint(results):
    
    nose_to_joints_distance = np.zeros(shape = (6, 1), dtype = float)
    
    if results.pose_landmarks:
        
        #right_wrist, right_elbow, right_shoulder, left_shoulder, left_elbow, left_wrist
        nose_to_joints = [16, 14, 12, 11, 13, 15]
        
        for distance_idx in range(len(nose_to_joints)):
            
            nose_to_joints_distance[distance_idx] = (((results.pose_landmarks.landmark[0].x - results.pose_landmarks.landmark[nose_to_joints[distance_idx]].x) ** 2) + ((results.pose_landmarks.landmark[0].y - results.pose_landmarks.landmark[nose_to_joints[distance_idx]].y) ** 2)) ** 0.5
    
    return nose_to_joints_distance

In [12]:
def extract_feature_points(results):
    
    l_true_coords = np.zeros(shape = (21, 3), dtype = float)
    r_true_coords = np.zeros(shape = (21, 3), dtype = float)
    
    l_shift_coords = np.zeros(shape = (21, 3), dtype = float)
    r_shift_coords = np.zeros(shape = (21, 3), dtype = float)
    
    l_h_wrist_distance = np.zeros(shape = (21, 1), dtype = float)
    r_h_wrist_distance = np.zeros(shape = (21, 1), dtype = float)
    
    l_r_hand_distance = np.zeros(shape = (21, 1), dtype = float)
    
    nose_joint_distances = np.zeros(shape = (6, 1), dtype = float)
    
    four_joint_angles = np.zeros(shape = (4, 1), dtype = float)
    
    if results:
        l_true_coords, r_true_coords = get_true_coords(results)
        
        four_joint_angles = calculate_angles(results)
        
        nose_joint_distances = nose_to_joint(results)
        
        if results.left_hand_landmarks:
            l_shift_coords = get_shifted_coords(l_true_coords, 0)
            l_h_wrist_distance = hand_wrist_distance(l_shift_coords)
        
        if results.right_hand_landmarks:
            r_shift_coords = get_shifted_coords(r_true_coords, 1)
            r_h_wrist_distance = hand_wrist_distance(r_shift_coords)
            
        if results.left_hand_landmarks and results.right_hand_landmarks:
            l_r_hand_distance = left_right_hand_distance(l_true_coords, r_true_coords)
    
        return np.concatenate((l_shift_coords.flatten(), r_shift_coords.flatten(), l_h_wrist_distance.flatten(), r_h_wrist_distance.flatten(), l_r_hand_distance.flatten(), four_joint_angles.flatten(), nose_joint_distances.flatten())).flatten()
#     return np.column_stack((l_shift_coords, r_shift_coords, l_h_wrist_distance, r_h_wrist_distance, l_r_hand_distance)).flatten()

In [13]:
model = Sequential()
model.add(LSTM(64, return_sequences = True, activation = 'relu', input_shape = (30, 199)))
model.add(LSTM(128, return_sequences = True, activation = 'relu'))
model.add(LSTM(64, return_sequences = False, activation = 'relu'))
model.add(Dense(64, activation = 'relu'))
model.add(Dense(32, activation = 'relu'))
model.add(Dense(actions.shape[0], activation = 'softmax'))
model.compile(optimizer = 'Adam', loss = 'categorical_crossentropy', metrics = ['categorical_accuracy'])

In [14]:
model.load_weights('DualFixed.h5')

In [16]:
sequence = []
sentence = []
old_pred_char = ''
new_pred_char = ''
display_pred_char = ''
pred_char = ''
count = 0
threshold = 0.9

k = 0

res2 = []

cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()

    image, results = mediapipe_detection(frame, holistic)

    render_landmarks(image, results)

    keypoints = extract_feature_points(results)
    sequence.append(keypoints)
    sequence = sequence[-30:]

    if len(sequence) == 30:
        old_pred_char = new_pred_char
        if results.left_hand_landmarks or results.right_hand_landmarks:
            res2 = model.predict(np.expand_dims(sequence, axis = 0))[0]
            print(res2[np.argmax(res2)])
            new_pred_char = actions[np.argmax(res2)]
            print(new_pred_char)
        else:
            new_pred_char = '-'
            print(new_pred_char)
        if new_pred_char != '-':
            if old_pred_char == new_pred_char:
                count += 1
            else:
                count = 0
    
    flipImg = cv2.flip(image, 1)
    cv2.rectangle(flipImg, (0,0), (100,80), (245, 117, 16), -1)
    
    if count >= 15:
        cv2.putText(flipImg, new_pred_char, (30,60), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 2, cv2.LINE_AA)

    cv2.imshow("Live Feed", flipImg)
#     cv2.imwrite(str(k) + '.jpg', flipImg)
    k += 1
    if cv2.waitKey(10) & 0Xff == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
0.9999999
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
0.99999857
Kol
0.63920134
Kol
1.0
live
0.99999905
live
0.99999285
live
0.99999726
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
-
-
1.0
live
-
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1.0
live
1

0.9999945
live
0.99770385
live
0.96652913
live
0.9564718
live
0.9790758
live
0.9994667
live
0.99946827
live
0.9872312
live
0.9785544
live
0.9818709
live
0.97316426
live
0.9815819
live
0.9964619
live
0.9990631
live
0.99309945
live
0.994991
live
0.9915113
live
0.9478778
live
0.84931624
live
0.90536416
live
0.991223
live
0.98288083
live
0.9890104
live
0.9970536
live
0.9994504
live
0.99959236
live
0.999624
live
0.99966776
live
0.99991703
live
0.99996436
live
0.99987006
live
0.9701269
live
0.75082695
live
0.99154824
live
0.9555167
live
0.99999416
live
0.9999994
live
0.99999964
live
0.9999974
live
0.9999665
live
0.99847037
live
1.0
live
1.0
live
0.99467105
live
0.9999902
live
0.9999993
live
0.99999917
live
0.9999778
live
0.99859923
live
0.9520419
live
1.0
live
0.99993956
live
0.99669814
live
0.99978167
live
0.9998097
live
0.99943537
live
0.9893526
live
0.9651438
live
1.0
live
1.0
live
0.9998888
live
0.99905294
live
0.99998724
live
0.9999876
live
0.8069667
live
1.0
live
1.0
live
1.0
live
-
0.

In [52]:
model.predict(np.expand_dims(sequence, axis = 0))[0]

array([2.7543650e-25, 4.9682480e-13, 2.8047471e-12, 2.4476992e-18,
       1.0827762e-25, 4.6328294e-26, 2.1591019e-11, 6.3763430e-12,
       1.9572707e-01, 4.9124755e-31, 5.1900216e-13, 2.3983701e-18,
       1.1483107e-21, 1.8021233e-03, 2.8595546e-11, 3.6872138e-23,
       2.5746452e-19, 4.4249836e-07, 1.6651842e-12, 6.6458467e-19,
       1.8910617e-24, 2.2940356e-28, 1.7729408e-16, 8.0238372e-01,
       3.6692164e-13, 1.1962736e-15, 8.6629218e-05], dtype=float32)

In [47]:
sequence

[array([ 8.24000000e-01,  6.70000000e-01,  2.25000000e-07,  1.39000000e-01,
         6.70000000e-01,  2.25000000e-07,  0.00000000e+00,  0.00000000e+00,
         7.66440748e-01,  6.41275687e-01,  2.08571111e-02,  1.74996586e-01,
         6.19896157e-01, -6.85045227e-04,  6.76251677e-02,  6.16978031e-02,
         7.19456791e-01,  6.40505059e-01,  2.60217102e-02,  1.86962070e-01,
         5.61276062e-01, -2.39204696e-04,  1.11697591e-01,  1.18833128e-01,
         6.87686591e-01,  6.47061272e-01,  2.52043070e-02,  1.90208645e-01,
         5.18397851e-01, -3.41436828e-03,  1.40508990e-01,  1.60053730e-01,
         6.64145737e-01,  6.57169385e-01,  2.33503436e-02,  1.79943503e-01,
         4.86354632e-01, -7.03304682e-03,  1.62059366e-01,  1.88285577e-01,
         6.86218171e-01,  6.25522120e-01,  1.79619720e-02,  1.61980601e-01,
         4.96586723e-01,  1.56739712e-02,  1.45892901e-01,  1.75630119e-01,
         6.25064849e-01,  6.36481447e-01,  1.21110534e-02,  1.62204535e-01,
         4.3