## Feature extraction
Using the mediapipe framework

In [2]:
%pip install mediapipe

Note: you may need to restart the kernel to use updated packages.


In [3]:
# import libraries
import numpy as np
import matplotlib.pyplot as plt
import cv2
import mediapipe as mp
from glob import glob
import os
import copy
import itertools

In [4]:
# load dataset
root_path = "D:/NLP internship/ArSL dataset/RGB ArSL dataset/"
# load labels
# Collect all alphapets
alphabets = os.listdir(root_path)
# load image paths
filepaths = [path  for name in alphabets for path in glob(root_path + name + '/*')]

In [5]:
# create labels list
labels = np.empty(shape=(len(filepaths), 1), dtype=np.int32)
for idx, path in enumerate(filepaths):
    try:
        # get the label of the image from its path: which would be the directory name
        label = [name for name in alphabets if name in path][0]
        # ensure uniform capitalization
        label = alphabets.index(label.title())
        # add it to list of labels
        labels[idx] = label
    except OSError:
        print(f"Error loading image: {path}     \nSkipping...")
# clip to range between 0-30 (total 29 labels)
labels = np.clip(labels, 0, 30)
labels

array([[ 0],
       [ 0],
       [ 0],
       ...,
       [30],
       [30],
       [30]])

In [6]:
# # Create dictionary to store the label as key and file paths as values
# def alphabet_dict(root_path, alphabets):
#     """ A method to load the file path of each 
#     letter as values and the letter label as key
    
#     Parameters:
#     root_path (string): containg the path to the dataset folder
#     alpahbets (array-like sequence): a list of the available labels/ letters

#     Returns:
#     filepaths (dict): key -> label, value -> list of file paths of images corresponding to that label 
#     """
#     # create empty dict to be returned
#     filepaths = dict()

#     for letter in alphabets:
#         # load image paths at their corresponding key
#         filepaths[letter] = [path  for name in alphabets for path in glob(root_path + name + '/*')]

#     return filepaths

In [14]:
all_hands_landmarks =[]
skipped_images = []
# using mediapipe to get the coordinnates of the hands landmark
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands
with mp_hands.Hands(
    static_image_mode=True,
    max_num_hands=1,
    min_detection_confidence=0.5) as hands:
  for idx, file in enumerate(filepaths):
    # Read an image, flip it around y-axis for correct handedness output (see
    # above).
    image = cv2.flip(cv2.imread(file), 1)
    # Convert the BGR image to RGB before processing.
    results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

    # Print handedness and draw hand landmarks on the image. (left or right)
    #print('Handedness:', results.multi_handedness)
    if not results.multi_hand_landmarks:
      skipped_images.append(file)
      continue
    image_height, image_width, _ = image.shape
    annotated_image = image.copy()

    all_hands_landmarks.append(results.multi_hand_landmarks)

    # for hand_landmarks in results.multi_hand_landmarks:
    #   print('hand_landmarks:', hand_landmarks)
    #   print(
    #       f'Index finger tip coordinates: (',
    #       f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * image_width}, '
    #       f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * image_height})'
    #   )
    #   mp_drawing.draw_landmarks(
    #       annotated_image,
    #       hand_landmarks,
    #       mp_hands.HAND_CONNECTIONS,
    #       mp_drawing_styles.get_default_hand_landmarks_style(),
    #       mp_drawing_styles.get_default_hand_connections_style())
    # cv2.imwrite(
    #     '/tmp/annotated_image' + str(idx) + '.png', cv2.flip(annotated_image, 1))
    # # Draw hand world landmarks.
    # if not results.multi_hand_world_landmarks:
    #   continue
    # for hand_world_landmarks in results.multi_hand_world_landmarks:
    #   mp_drawing.plot_landmarks(
    #     hand_world_landmarks, mp_hands.HAND_CONNECTIONS, azimuth=5)

KeyboardInterrupt: 

In [8]:
def pre_process_landmark(landmark_list):
    temp_landmark_list = copy.deepcopy(landmark_list)

    # Convert to relative coordinates
    base_x, base_y = 0, 0
    for index, landmark_point in enumerate(temp_landmark_list):
        if index == 0:
            base_x, base_y = landmark_point[0], landmark_point[1]

        temp_landmark_list[index][0] = temp_landmark_list[index][0] - base_x
        temp_landmark_list[index][1] = temp_landmark_list[index][1] - base_y

    # Convert to a one-dimensional list
    temp_landmark_list = list(
        itertools.chain.from_iterable(temp_landmark_list))

    # Normalization
    max_value = max(list(map(abs, temp_landmark_list)))

    def normalize_(n):
        return n / max_value

    temp_landmark_list = list(map(normalize_, temp_landmark_list))

    return temp_landmark_list

In [9]:
def calc_landmark_list(image, landmarks):
    image_width, image_height = image.shape[1], image.shape[0]

    landmark_point = []

    # Keypoint
    for _, landmark in enumerate(landmarks.landmark):
        landmark_x = min(int(landmark.x * image_width), image_width - 1)
        landmark_y = min(int(landmark.y * image_height), image_height - 1)
        # landmark_z = landmark.z

        landmark_point.append([landmark_x, landmark_y])

    return landmark_point

In [10]:
all_hands_landmarks[0][0]

landmark {
  x: 0.722896814
  y: 0.767472386
  z: -3.02403976e-007
}
landmark {
  x: 0.681494355
  y: 0.588606
  z: 0.0386095792
}
landmark {
  x: 0.588793695
  y: 0.475019753
  z: -0.000455980509
}
landmark {
  x: 0.474425554
  y: 0.44633767
  z: -0.037453644
}
landmark {
  x: 0.376007676
  y: 0.448455
  z: -0.0809956715
}
landmark {
  x: 0.579672337
  y: 0.377137393
  z: -0.188864887
}
landmark {
  x: 0.369623721
  y: 0.374690562
  z: -0.211965725
}
landmark {
  x: 0.260349274
  y: 0.383481801
  z: -0.211963952
}
landmark {
  x: 0.160177797
  y: 0.385234296
  z: -0.221360758
}
landmark {
  x: 0.568345189
  y: 0.48718971
  z: -0.221175492
}
landmark {
  x: 0.32528609
  y: 0.48816222
  z: -0.275250733
}
landmark {
  x: 0.178635687
  y: 0.495432079
  z: -0.294293195
}
landmark {
  x: 0.0686121
  y: 0.510011554
  z: -0.32496506
}
landmark {
  x: 0.539507806
  y: 0.608280182
  z: -0.225175709
}
landmark {
  x: 0.364411294
  y: 0.600304604
  z: -0.206843406
}
landmark {
  x: 0.427899122
  

In [11]:
all_normalized_landmarks = []
for path, landmark_list in zip(filepaths, all_hands_landmarks):
    # print(path)
    # print(landmark_list[0])
    # for hand_landmarks in landmark_list[0]:
    normalized_landmarks = calc_landmark_list(cv2.imread(path), landmark_list[0])
    normalized_landmarks = pre_process_landmark(normalized_landmarks)
    all_normalized_landmarks.append(normalized_landmarks)
len(all_normalized_landmarks[0])

42

In [12]:
print(filepaths)

['D:/NLP internship/ArSL dataset/RGB ArSL dataset/Ain\\Ain_0.jpg', 'D:/NLP internship/ArSL dataset/RGB ArSL dataset/Ain\\Ain_1.jpg', 'D:/NLP internship/ArSL dataset/RGB ArSL dataset/Ain\\Ain_10.jpeg', 'D:/NLP internship/ArSL dataset/RGB ArSL dataset/Ain\\Ain_100.jpg', 'D:/NLP internship/ArSL dataset/RGB ArSL dataset/Ain\\Ain_101.jpg', 'D:/NLP internship/ArSL dataset/RGB ArSL dataset/Ain\\Ain_102.jpg', 'D:/NLP internship/ArSL dataset/RGB ArSL dataset/Ain\\Ain_103.jpg', 'D:/NLP internship/ArSL dataset/RGB ArSL dataset/Ain\\Ain_104.jpg', 'D:/NLP internship/ArSL dataset/RGB ArSL dataset/Ain\\Ain_105.jpg', 'D:/NLP internship/ArSL dataset/RGB ArSL dataset/Ain\\Ain_106.jpg', 'D:/NLP internship/ArSL dataset/RGB ArSL dataset/Ain\\Ain_107.jpeg', 'D:/NLP internship/ArSL dataset/RGB ArSL dataset/Ain\\Ain_108.jpg', 'D:/NLP internship/ArSL dataset/RGB ArSL dataset/Ain\\Ain_109.jpg', 'D:/NLP internship/ArSL dataset/RGB ArSL dataset/Ain\\Ain_11.jpg', 'D:/NLP internship/ArSL dataset/RGB ArSL dataset/Ai