In [29]:
import csv
import copy
import argparse
import itertools
import os
from collections import Counter
from collections import deque


import cv2 as cv
import numpy as np
import mediapipe as mp

In [30]:
def pre_process_landmark(landmark_list):
    temp_landmark_list = copy.deepcopy(landmark_list)

    # Convert to relative coordinates
    base_x, base_y = 0, 0
    for index, landmark_point in enumerate(temp_landmark_list):
        if index == 0:
            base_x, base_y = landmark_point[0], landmark_point[1]

        temp_landmark_list[index][0] = temp_landmark_list[index][0] - base_x
        temp_landmark_list[index][1] = temp_landmark_list[index][1] - base_y

    # Convert to a one-dimensional list
    temp_landmark_list = list(
        itertools.chain.from_iterable(temp_landmark_list))

    # Normalization
    max_value = max(list(map(abs, temp_landmark_list)))

    def normalize_(n):
        return n / max_value

    temp_landmark_list = list(map(normalize_, temp_landmark_list))

    return temp_landmark_list

In [31]:
def calc_landmark_list(image, landmarks):
    image_width, image_height = image.shape[1], image.shape[0]

    landmark_point = []

    # Keypoint
    for _, landmark in enumerate(landmarks.landmark):
        landmark_x = min(int(landmark.x * image_width), image_width - 1)
        landmark_y = min(int(landmark.y * image_height), image_height - 1)
        # landmark_z = landmark.z

        landmark_point.append([landmark_x, landmark_y])

    return landmark_point

In [32]:
def logging_csv(number, landmark_list):
    if (0 <= number <= 30):
        csv_path = './keypoint.csv'
        with open(csv_path, 'a', newline="") as f:
            writer = csv.writer(f)
            writer.writerow([number, *landmark_list])
    return

In [33]:
mp_drawing_styles = mp.solutions.drawing_styles

mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
webcam=cv.VideoCapture(0)
hands = mp_hands.Hands(
        static_image_mode=False,
        max_num_hands=1,
        min_detection_confidence=0.4,
        min_tracking_confidence=0.5,
    )

dataset_train = './dataset/asl_alphabet_train'

In [34]:
imgs=[]
labels=[]
count=0
i=0
list_dir=os.listdir(dataset_train)
for label in range(len(list_dir)):
    print("Loading images form ", list_dir[label])
    for img_name in os.listdir(dataset_train+"/"+list_dir[label]):
        if count >= 2000:
            count = 0
            break
        img=cv.imread(dataset_train+"/"+list_dir[label]+"/"+img_name)
        debug_image = copy.deepcopy(img)
        results = hands.process(cv.cvtColor(img, cv.COLOR_BGR2RGB))
        if results.multi_hand_landmarks:
            count+=1
            for hand_landmarks in results.multi_hand_landmarks:
                landmark_list = calc_landmark_list(debug_image, hand_landmarks)
                pre_processed_landmark_list = pre_process_landmark(landmark_list)
                logging_csv(i, pre_processed_landmark_list)
    i+=1

Loading images form  A
Loading images form  B
Loading images form  C
Loading images form  D
Loading images form  del
Loading images form  E
Loading images form  F
Loading images form  G
Loading images form  H
Loading images form  I
Loading images form  J
Loading images form  K
Loading images form  L
Loading images form  M
Loading images form  N
Loading images form  O
Loading images form  P
Loading images form  Q
Loading images form  R
Loading images form  S
Loading images form  space
Loading images form  T
Loading images form  U
Loading images form  V
Loading images form  W
Loading images form  X
Loading images form  Y
Loading images form  Z


: 