In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import mediapipe as mp

from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard
import tensorflow as tf

In [2]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils
holistic = mp_holistic.Holistic(min_detection_confidence = 0.8, min_tracking_confidence = 0.5)

In [3]:
IMAGESET_FOLDER = os.path.join('Imageset')
KEYPOINT_FOLDER = os.path.join('Extracted Hand Landmarks')
DATASET_FOLDER = os.path.join('Dataset')
actions = np.array(['--','A', 'B', "I_lett", 'J', 'N', 'W', "language", "sign", "teacher"])
no_sequences = 120
no_frames = 30

In [4]:
def mediapipe_detection(image, holistic):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    result = holistic.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, result

In [5]:
def render_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

In [6]:
def get_true_coords(results):
    l_coords = np.zeros(shape = (21, 3), dtype = float)
    r_coords = np.zeros(shape = (21, 3), dtype = float)
    
#     l_shift_coords = np.zeros(shape = (21, 3), dtype = float)
#     r_shift_coords = np.zeros(shape = (21, 3), dtype = float)
    
    if results.left_hand_landmarks:
        for idxL, lh in enumerate(results.left_hand_landmarks.landmark):
            l_coords[idxL] = np.array([lh.x, lh.y, lh.z])
#         l_shift_coords = coord_shift(l_coords, 0)
    
    if results.right_hand_landmarks:
        for idxR, rh in enumerate(results.right_hand_landmarks.landmark):
            r_coords[idxR] = np.array([rh.x, rh.y, rh.z])
#         r_shift_coords = coord_shift(r_coords, 1)
    
    return l_coords, r_coords

In [7]:
def get_shifted_coords(coords_array, num):
    
    coords_zero = np.all((coords_array == 0))
    shifted_coords_array = np.zeros(shape = (21, 3), dtype = float)
    
    new_l_x = 8.24e-01
    new_l_y = 6.7e-01
    new_l_z = 2.25e-07
    
    new_r_x = 1.39e-01
    new_r_y = 6.7e-01
    new_r_z = 2.25e-07
    
    if num == 0:
        shifted_coords_array[0] = np.array([new_l_x, new_l_y, new_l_z])
        shift_factor_x = new_l_x - coords_array[0][0]
        shift_factor_y = new_l_y - coords_array[0][1]
        shift_factor_z = new_l_z - coords_array[0][2]
        for i in range(1, len(coords_array)):
            shifted_coords_array[i] = np.array([(coords_array[i][0] + shift_factor_x), (coords_array[i][1] + shift_factor_y), (coords_array[i][2] + shift_factor_z)])

    else:
        shifted_coords_array[0] = np.array([new_r_x, new_r_y, new_r_z])
        shift_factor_x = new_r_x - coords_array[0][0]
        shift_factor_y = new_r_y - coords_array[0][1]
        shift_factor_z = new_r_z - coords_array[0][2]
        for j in range(1, len(coords_array)):
            shifted_coords_array[j] = np.array([(coords_array[j][0] + shift_factor_x), (coords_array[j][1] + shift_factor_y), (coords_array[j][2] + shift_factor_z)])

    #print(shifted_coords_array)
    return shifted_coords_array

In [8]:
def hand_wrist_distance(coords_array):
    
    wrist_distance = np.zeros(shape = (21, 1), dtype = float)
    
    for num in range(len(coords_array)):
        wrist_distance[num] = (((coords_array[0][0] - coords_array[num][0])**2) + ((coords_array[0][1] - coords_array[num][1])**2) + ((coords_array[0][2] - coords_array[num][2])**2))**0.5
        
    return wrist_distance

In [9]:
def left_right_hand_distance(l_true_coords, r_true_coords):
    l_r_hand_distance = np.zeros(shape = (21, 1), dtype = float)
    
    for num in range(len(l_true_coords)):
        l_r_hand_distance[num] = (((l_true_coords[num][0] - r_true_coords[num][0])**2) + ((l_true_coords[num][1] - r_true_coords[num][1])**2))**0.5
    
    return l_r_hand_distance

In [10]:
def extract_feature_points(results):
    
    l_true_coords = np.zeros(shape = (21, 3), dtype = float)
    r_true_coords = np.zeros(shape = (21, 3), dtype = float)
    
    l_shift_coords = np.zeros(shape = (21, 3), dtype = float)
    r_shift_coords = np.zeros(shape = (21, 3), dtype = float)
    
    l_h_wrist_distance = np.zeros(shape = (21, 1), dtype = float)
    r_h_wrist_distance = np.zeros(shape = (21, 1), dtype = float)
    
    l_r_hand_distance = np.zeros(shape = (21, 1), dtype = float)
    
    if results:
        l_true_coords, r_true_coords = get_true_coords(results)
        
        if results.left_hand_landmarks:
            l_shift_coords = get_shifted_coords(l_true_coords, 0)
            l_h_wrist_distance = hand_wrist_distance(l_shift_coords)
        
        if results.right_hand_landmarks:
            r_shift_coords = get_shifted_coords(r_true_coords, 1)
            r_h_wrist_distance = hand_wrist_distance(r_shift_coords)
            
        if results.left_hand_landmarks and results.right_hand_landmarks:
            l_r_hand_distance = left_right_hand_distance(l_true_coords, r_true_coords)
    
    return np.concatenate((l_shift_coords.flatten(), r_shift_coords.flatten(), l_h_wrist_distance.flatten(), r_h_wrist_distance.flatten(), l_r_hand_distance.flatten())).flatten()
#     return np.column_stack((l_shift_coords, r_shift_coords, l_h_wrist_distance, r_h_wrist_distance, l_r_hand_distance)).flatten()

In [11]:
model = Sequential()
model.add(LSTM(64, return_sequences = True, activation = 'relu', input_shape = (30, 189)))
model.add(LSTM(128, return_sequences = True, activation = 'relu'))
model.add(LSTM(64, return_sequences = False, activation = 'relu'))
model.add(Dense(64, activation = 'relu'))
model.add(Dense(32, activation = 'relu'))
model.add(Dense(actions.shape[0], activation = 'softmax'))
model.compile(optimizer = 'Adam', loss = 'categorical_crossentropy', metrics = ['categorical_accuracy'])

In [12]:
model.load_weights('DualFlexible.h5')

In [13]:
# y_pred = model.predict(x_train)

In [14]:
# temp = tf.Variable(initial_value=1.0, trainable=True, dtype=tf.float32) 

In [15]:
# def compute_loss():
#     y_pred_model_w_temp = tf.math.divide(y_pred, temp)
#     loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(\
#                                 tf.convert_to_tensor(y_train), y_pred_model_w_temp))
#     return loss

In [16]:
# optimizer = tf.optimizers.Adam(learning_rate=0.01)

# print('Temperature Initial value: {}'.format(temp.numpy()))

# for i in range(300):
#     opts = optimizer.minimize(compute_loss, var_list=[temp])


# print('Temperature Final value: {}'.format(temp.numpy()))

In [17]:
sequence = []
sentence = []
old_pred_char = ''
new_pred_char = ''
display_pred_char = ''
pred_char = ''
count = 0
threshold = 0.9

k = 0

res2 = np.zeros(len(actions))

cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()

    image, results = mediapipe_detection(frame, holistic)

    render_landmarks(image, results)

    keypoints = extract_feature_points(results)
#     print(keypoints)
    sequence.append(keypoints)
    sequence = sequence[-30:]

    if len(sequence) == 30:
        old_pred_char = new_pred_char
        if results.left_hand_landmarks and results.right_hand_landmarks:
#             first_prediction = model.predict(np.expand_dims(sequence, axis = 0))[0]
#             print(first_prediction)
#             calibrated_prediction = tf.math.divide(first_prediction, temp)
#             print(calibrated_prediction)
#             new_pred_char = actions[np.argmax(calibrated_prediction)]
#             print(new_pred_char)
            res2 = model.predict(np.expand_dims(sequence, axis = 0))[0]
            print(res2[np.argmax(res2)])
            new_pred_char = actions[np.argmax(res2)]
            print(new_pred_char)
        else:
            new_pred_char = '-'
            print(new_pred_char)
        if new_pred_char != '-':
            if old_pred_char == new_pred_char:
                count += 1
            else:
                count = 0
#     if len(sequence) == 30:
#         if results.left_hand_landmarks or results.right_hand_landmarks:
#             res2 = model.predict(np.expand_dims(sequence, axis = 0))[0]
#             print(res2[np.argmax(res2)])
#             new_pred_char = actions[np.argmax(res2)]
#             print(new_pred_char)
#         else:
#             new_pred_char = '-'
#             print(new_pred_char)
    
    flipImg = cv2.flip(image, 1)
    cv2.rectangle(flipImg, (0,0), (100,80), (245, 117, 16), -1)
    
    if count >= 20:
        cv2.putText(flipImg, new_pred_char, (30,60), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 2, cv2.LINE_AA)

    cv2.imshow("Live Feed", flipImg)
#     cv2.imwrite(str(k) + '.jpg', flipImg)
    k += 1
    if cv2.waitKey(10) & 0Xff == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
0.47544807
--
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
1.0
language
1.0
language
1.0
language
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
0.4357704
A
0.49259666
A
0.41941303
A
-
-
0.60967046
N
0.6913347
N
0.75162446
N
-
-
-
-
0.90132856
N
0.9157137
N
0.9280752
N
0.9391597
N
0.9421363
N
0.93609065
N
-
-
0.5184
N
0.81610626
teacher
0.9842317
teacher
0.9991372
teacher
0.9999505
teacher
0.99998736
teacher
-
0.8523241
teacher
0.9987143
J
0.99938166
J
-
-
-
-
-
-
-
-
-
-
-
-
0.9998698
J
0.9999044
J
0.9998803
J
0.9994955
J
0.9869034
J
-
-
-
-
0.64112604
A
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
0.9999887
A
-
-
-
-
-
-
-
-
-
-
-
-
-
0.99999833
A
0.99999833
A
-
0.99999654
A
0.9999931
A

In [66]:
first_prediction.shape

(5,)

In [68]:
y_test

array([[0, 0, 1, 0, 0],
       [1, 0, 0, 0, 0],
       [0, 0, 0, 0, 1],
       [0, 0, 0, 1, 0],
       [0, 0, 1, 0, 0],
       [0, 0, 0, 0, 1],
       [0, 0, 0, 0, 1],
       [0, 0, 0, 0, 1],
       [0, 0, 0, 0, 1],
       [0, 0, 0, 0, 1],
       [0, 0, 0, 1, 0],
       [0, 0, 0, 1, 0],
       [0, 0, 0, 0, 1],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 0, 0, 1],
       [0, 1, 0, 0, 0],
       [0, 0, 1, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 1, 0, 0],
       [0, 0, 0, 1, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 0, 0, 1],
       [0, 0, 0, 1, 0],
       [0, 0, 0, 0, 1],
       [0, 0, 0, 1, 0],
       [0, 0, 1, 0, 0],
       [0, 0, 0, 1, 0],
       [1, 0, 0, 0, 0]])

In [28]:
len(sequence)

30

In [52]:
model.predict(np.expand_dims(sequence, axis = 0))[0]

array([2.7543650e-25, 4.9682480e-13, 2.8047471e-12, 2.4476992e-18,
       1.0827762e-25, 4.6328294e-26, 2.1591019e-11, 6.3763430e-12,
       1.9572707e-01, 4.9124755e-31, 5.1900216e-13, 2.3983701e-18,
       1.1483107e-21, 1.8021233e-03, 2.8595546e-11, 3.6872138e-23,
       2.5746452e-19, 4.4249836e-07, 1.6651842e-12, 6.6458467e-19,
       1.8910617e-24, 2.2940356e-28, 1.7729408e-16, 8.0238372e-01,
       3.6692164e-13, 1.1962736e-15, 8.6629218e-05], dtype=float32)

In [42]:
qw = model.predict(np.expand_dims(sequence, axis = 0))