In [21]:
import cv2
import numpy as np
import os
import mediapipe as mp

In [22]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils
holistic = mp_holistic.Holistic(min_detection_confidence = 0.8, min_tracking_confidence = 0.5)

In [23]:
IMAGESET_FOLDER = os.path.join('Imageset')
KEYPOINT_FOLDER = os.path.join('Extracted Hand Landmarks')
DATASET_FOLDER = os.path.join('Dataset')
actions = np.array(['I', 'jayga', 'hello', 'my', 'india'])
# actions = np.array(['A', 'B', 'J', 'N', 'W', "teacher", "live", "Kol", "sign", "language"])
no_sequences = 30
no_frames = 30

In [24]:
def mediapipe_detection(image, holistic):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    result = holistic.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, result

In [25]:
def render_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

In [26]:
def get_coords(results):
    l_coords = np.zeros(shape = (21, 3), dtype = float)
    r_coords = np.zeros(shape = (21, 3), dtype = float)
    
    l_shift_coords = np.zeros(shape = (21, 3), dtype = float)
    r_shift_coords = np.zeros(shape = (21, 3), dtype = float)
    
    if results.left_hand_landmarks:
        for idxL, lh in enumerate(results.left_hand_landmarks.landmark):
            l_coords[idxL] = np.array([lh.x, lh.y, lh.z])
        l_shift_coords = coord_shift(l_coords, 0)
    
    if results.right_hand_landmarks:
        for idxR, rh in enumerate(results.right_hand_landmarks.landmark):
            r_coords[idxR] = np.array([rh.x, rh.y, rh.z])
        r_shift_coords = coord_shift(r_coords, 1)
    
    return l_shift_coords, r_shift_coords

In [27]:
def coord_shift(coords_array, num):
    
    coords_zero = np.all((coords_array == 0))
    shifted_coords_array = np.zeros(shape = (21, 3), dtype = float)
    
    new_l_x = 8.24e-01
    new_l_y = 6.7e-01
    new_l_z = 2.25e-07
    
    new_r_x = 1.39e-01
    new_r_y = 6.7e-01
    new_r_z = 2.25e-07
    
    if num == 0:
        shifted_coords_array[0] = np.array([new_l_x, new_l_y, new_l_z])
        shift_factor_x = new_l_x - coords_array[0][0]
        shift_factor_y = new_l_y - coords_array[0][1]
        shift_factor_z = new_l_z - coords_array[0][2]
        for i in range(1, len(coords_array)):
            shifted_coords_array[i] = np.array([(coords_array[i][0] + shift_factor_x), (coords_array[i][1] + shift_factor_y), (coords_array[i][2] + shift_factor_z)])

    else:
        shifted_coords_array[0] = np.array([new_r_x, new_r_y, new_r_z])
        shift_factor_x = new_r_x - coords_array[0][0]
        shift_factor_y = new_r_y - coords_array[0][1]
        shift_factor_z = new_r_z - coords_array[0][2]
        for j in range(1, len(coords_array)):
            shifted_coords_array[j] = np.array([(coords_array[j][0] + shift_factor_x), (coords_array[j][1] + shift_factor_y), (coords_array[j][2] + shift_factor_z)])

    #print(shifted_coords_array)
    return shifted_coords_array

In [28]:
def hand_wrist_distance(coords_array):
    
    wrist_distance = np.zeros(shape = (21, 1), dtype = float)
    
    for num in range(len(coords_array)):
        wrist_distance[num] = (((coords_array[0][0] - coords_array[num][0])**2) + ((coords_array[0][1] - coords_array[num][1])**2) + ((coords_array[0][2] - coords_array[num][2])**2))**0.5
        
    return wrist_distance

In [29]:
def extract_feature_points(results):
    
    l_shift_coords = np.zeros(shape = (21, 3), dtype = float)
    r_shift_coords = np.zeros(shape = (21, 3), dtype = float)
    
    l_h_wrist_distance = np.zeros(shape = (21, 1), dtype = float)
    r_h_wrist_distance = np.zeros(shape = (21, 1), dtype = float)
    
    if results:
        l_shift_coords, r_shift_coords = get_coords(results)
        
        if results.left_hand_landmarks:
            l_h_wrist_distance = hand_wrist_distance(l_shift_coords)
        
        if results.right_hand_landmarks:
            r_h_wrist_distance = hand_wrist_distance(r_shift_coords)
            
    return np.column_stack((l_shift_coords, r_shift_coords, l_h_wrist_distance, r_h_wrist_distance)).flatten()

In [30]:
for action in range(len(actions)):
    for seqNo in range(no_sequences):
        for frameNo in range(no_frames):
            source_image_path = os.path.join(IMAGESET_FOLDER, actions[action], str(seqNo))
            image = cv2.imread(os.path.join(source_image_path, str(frameNo) + '.jpg'))
            
            image = cv2.flip(image, 1)
            
            image, results = mediapipe_detection(image, holistic)
        
            render_landmarks(image, results)
            
            frame_features = extract_feature_points(results)
            
            saved_image_path = os.path.join(KEYPOINT_FOLDER, actions[action], str(seqNo))
            
            image = cv2.flip(image, 1)
            try:
                os.makedirs(saved_image_path)
            except:
                pass
            
            cv2.imwrite(os.path.join(saved_image_path, str(frameNo) + '.jpg'), image)
            
            npy_path = os.path.join(DATASET_FOLDER, actions[action], str(seqNo))
            
            try:
                os.makedirs(npy_path)
            except:
                pass
            np.save(os.path.join(npy_path, str(frameNo)), frame_features)