In [29]:
import cv2
import numpy as np
import os
import mediapipe as mp
import pandas as pd
from IPython.display import clear_output

mp_hands = mp.solutions.hands
mp_faces = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils

def mediapipe_detection(image, hand_model, face_model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False                
    hand_results = hand_model.process(image)
    face_results = face_model.process(image)
    image.flags.writeable = True                
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, hand_results, face_results

def extract_keypoints_v1(results):
    lh = np.zeros((21, 3))
    rh = np.zeros((21, 3))
    visible = False
    if results.multi_hand_landmarks:
        visible = True
        for index, hand_landmarks in enumerate(results.multi_hand_landmarks):
            if results.multi_handedness[index].classification[0].index == 0:
                lh = [[res.x, res.y, res.z] for res in
                    hand_landmarks.landmark]
            else:
                rh = [[res.x, res.y, res.z] for res in
                            hand_landmarks.landmark]
    return np.concatenate([lh, rh], axis=0), visible

def extract_keypoints_v2(results):
    lh = np.zeros((21, 3))
    rh = np.zeros((21, 3))
    found = False
    if results.multi_hand_landmarks:
        found = True

        for index, hand_landmarks in enumerate(results.multi_hand_landmarks):
            ref = hand_landmarks.landmark[0]
            ref = np.array([ref.x, ref.y, ref.z])
            min_pos = ref
            max_pos = ref
            all_pos = []
            for res in hand_landmarks.landmark:
                norm_pos = np.array([res.x - ref[0], res.y - ref[1], res.z - ref[2]])
                all_pos.append(norm_pos)
                min_pos = np.minimum(min_pos, np.array([res.x, res.y, res.z]))
                max_pos = np.maximum(max_pos, np.array([res.x, res.y, res.z]))
            size = max_pos - min_pos
            if results.multi_handedness[index].classification[0].index == 0:
                lh = [pos / size for pos in
                            all_pos]
            else:
                rh = [pos / size for pos in
                            all_pos]
    return np.concatenate([lh, rh], axis=0), found

def extract_keypoints_v3(hand_results, face_results, w, h):
    lh = np.zeros((21, 3))
    rh = np.zeros((21, 3))
    face_hand_dif = [1000, 1000, 1000]
    face_size = np.ones(2)
    found = False
    if hand_results.multi_hand_landmarks:
        found = True

        for index, hand_landmarks in enumerate(hand_results.multi_hand_landmarks):
            ref = hand_landmarks.landmark[0]
            ref = np.array([ref.x, ref.y, ref.z])
            min_pos = ref
            max_pos = ref
            all_pos = []
            hand_center = [0, 0]
            for res in hand_landmarks.landmark:
                hand_center = [curr + new for curr, new in zip(hand_center, [res.x, res.y])]
                norm_pos = np.array([res.x - ref[0], res.y - ref[1], res.z])
                all_pos.append(norm_pos)
                min_pos = np.minimum(min_pos, np.array([res.x, res.y, res.z]))
                max_pos = np.maximum(max_pos, np.array([res.x, res.y, res.z]))
            hand_center = [pos / len(hand_landmarks.landmark) for pos in hand_center]
            max_face_pos = [0, 0]
            min_face_pos = [1, 1]
            if face_results.detections:
                for detection in face_results.detections:
                    face_center = [0, 0]
                    for keypoint in detection.location_data.relative_keypoints:
                        face_center = [pos + point for pos, point in zip(face_center, [keypoint.x, keypoint.y])]
                        min_face_pos = np.minimum(min_face_pos, np.array([keypoint.x, keypoint.y]))
                        max_face_pos = np.maximum(max_face_pos, np.array([keypoint.x, keypoint.y]))
                    face_center = [pos / len(detection.location_data.relative_keypoints) for pos in face_center]
                    dif = [face - hand for face, hand in zip(face_center, hand_center)]
                    dif.append(1000)
                    if sum([abs(val) for val in dif]) < sum([abs(val) for val in face_hand_dif]):
                        face_hand_dif = dif
            face_size = np.array(max_face_pos) - np.array(min_face_pos)
            size = max_pos - min_pos
            hand_size = (abs((all_pos[1][0] - all_pos[2][0]) * w) + abs((all_pos[1][1] - all_pos[2][1]) * h) + abs((all_pos[1][2] - all_pos[2][2]) * 2000)) / 10
            approx_z = face_size[0] * face_size[1] * 100 - hand_size
            face_hand_dif[2] = approx_z
            if hand_results.multi_handedness[index].classification[0].index == 0:
                lh = [pos / size for pos in
                            all_pos]
            else:
                rh = [pos / size for pos in
                            all_pos]
    face_hand_dif = np.array(face_hand_dif)
    face_size = np.append(face_size, [1])
    face_hand_dif = face_hand_dif / face_size
    face_hand_dif = face_hand_dif.reshape(1, face_hand_dif.shape[0])
    return np.concatenate([lh, rh, face_hand_dif], axis=0), found

KEYPOINT_PATH = "Keypoint Dataset"
DATA_PATH = "terbisa.v3i.multiclass"

try: 
    os.makedirs(KEYPOINT_PATH)
except:
    pass

In [30]:
with mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.1) as hands, mp_faces.FaceDetection(min_detection_confidence=0.5) as faces:
    for folder in os.listdir(DATA_PATH):
        print("Current folder: " + folder)
        df = pd.read_csv(os.path.join(DATA_PATH, folder) + "/_classes.csv")
        for className in df.columns[1:]:
            name = className.split()[0]
            print("Current class: " + name)
            try: 
                os.makedirs(os.path.join(KEYPOINT_PATH, "V1", name))
                os.makedirs(os.path.join(KEYPOINT_PATH, "V2", name))
                os.makedirs(os.path.join(KEYPOINT_PATH, "V3", name))
            except:
                pass
            for index, data in df[df[className] == 1].iterrows():
                frame = cv2.imread(os.path.join(DATA_PATH, folder, data['filename']))
                
                image, hand_results, face_results = mediapipe_detection(frame, hands, faces)
                
                keypoints, use = extract_keypoints_v1(hand_results)
                if use:
                    npy_path = os.path.join(KEYPOINT_PATH, "V1", name, str(len(os.listdir(os.path.join(KEYPOINT_PATH, "V1", name)))))
                    np.save(npy_path, keypoints)
                keypoints, use = extract_keypoints_v2(hand_results)
                if use:
                    npy_path = os.path.join(KEYPOINT_PATH, "V2", name, str(len(os.listdir(os.path.join(KEYPOINT_PATH, "V2", name)))))
                    np.save(npy_path, keypoints)
                keypoints, use = extract_keypoints_v3(hand_results, face_results, image.shape[1], image.shape[0])
                if use:
                    npy_path = os.path.join(KEYPOINT_PATH, "V3", name, str(len(os.listdir(os.path.join(KEYPOINT_PATH, "V3", name)))))
                    np.save(npy_path, keypoints)

                if folder != 'train':
                    mirrored_image = cv2.flip(frame, 1)
                    image, hand_results, face_results = mediapipe_detection(mirrored_image, hands, faces)
                    keypoints, use = extract_keypoints_v1(hand_results)
                    if use:
                        npy_path = os.path.join(KEYPOINT_PATH, "V1", name, str(len(os.listdir(os.path.join(KEYPOINT_PATH, "V1", name)))))
                        np.save(npy_path, keypoints)
                    keypoints, use = extract_keypoints_v2(hand_results)
                    if use:
                        npy_path = os.path.join(KEYPOINT_PATH, "V2", name, str(len(os.listdir(os.path.join(KEYPOINT_PATH, "V2", name)))))
                        np.save(npy_path, keypoints)
                    keypoints, use = extract_keypoints_v3(hand_results, face_results, mirrored_image.shape[1], mirrored_image.shape[0])
                    if use:
                        npy_path = os.path.join(KEYPOINT_PATH, "V3", name, str(len(os.listdir(os.path.join(KEYPOINT_PATH, "V3", name)))))
                        np.save(npy_path, keypoints)
                    


Current folder: test
Current class: Anda
Current class: Apa
Current class: Berhenti
Current class: Bodoh
Current class: Cantik
Current class: Halo
Current class: Hati-hati
Current class: Lelah
Current class: Maaf
Current class: Makan
Current class: Mau-Ingin
Current class: Membaca
Current class: Nama
Current class: Sama-sama
Current class: Saya
Current class: Siapa
Current class: Sombong
Current class: Takut
Current class: Terima
Current folder: train
Current class: Anda
Current class: Apa
Current class: Berhenti
Current class: Bodoh
Current class: Cantik
Current class: Halo
Current class: Hati-hati
Current class: Lelah
Current class: Maaf
Current class: Makan
Current class: Mau-Ingin
Current class: Membaca
Current class: Nama
Current class: Sama-sama
Current class: Saya
Current class: Siapa
Current class: Sombong
Current class: Takut
Current class: Terima
Current folder: valid
Current class: Anda
Current class: Apa
Current class: Berhenti
Current class: Bodoh
Current class: Cantik
Cur