코드 출처
- https://www.youtube.com/watch?v=eHxDWhtbRCk&list=LL
- https://github.com/kairess/gesture-recognition

In [1]:
import cv2
import mediapipe as mp
import numpy as np
import time, os

In [2]:
# 학습 시킬 동작
# 아무런 동작을 하지 않는 손동작
# x1 : 손등
# x2 : 손바닥
# x3 : 
actions = ['come', 'away', 'spin', 'stop']#, 'x1', 'x2', 'x3']
# window 사이즈 지정
seq_length = 50
# 녹화할 시간 (녹화할 시간을 조정하여 데이터 사이즈 조절)
secs_for_action = 120
date = '0611+1'

# MediaPipe hands model
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(
    # 인식할 최대 손의 수
    max_num_hands=1,
    # 감지에 성공한 것으로 간주될 손모델의 최소 신뢰도 값
    min_detection_confidence=0.5,
    # tracking 감지에 성공한 것으로 간주될 손모델의 최소 신뢰도 값
    min_tracking_confidence=0.5)

In [3]:
cap = cv2.VideoCapture(0)

created_time = int(time.time())
# 저장할 디렉터리 생성
os.makedirs('dataset/{}'.format(date), exist_ok=True)

while cap.isOpened():
    for idx, action in enumerate(actions):
        data = []

        ret, img = cap.read()

        # 화면 반전
        img = cv2.flip(img, 1)

        cv2.putText(img, f'Waiting for collecting {action.upper()} action...', org=(10, 30),
                    fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(255, 255, 255), thickness=2)
        cv2.imshow('img', img)
        # 3초간 대기
        cv2.waitKey(3000)

        start_time = time.time()

        while time.time() - start_time < secs_for_action:
            ret, img = cap.read()

            img = cv2.flip(img, 1)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            result = hands.process(img)
            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

            if result.multi_hand_landmarks is not None:
                for res in result.multi_hand_landmarks:
                    joint = np.zeros((21, 4))
                    for j, lm in enumerate(res.landmark):
                        # visibility 추가                       
                        joint[j] = [lm.x, lm.y, lm.z, lm.visibility]

                    # Compute angles between joints
                    v1 = joint[[0,1,2,3,0,5,6,7,0,9,10,11,0,13,14,15,0,17,18,19], :3] # Parent joint
                    v2 = joint[[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20], :3] # Child joint
                    v = v2 - v1 # [20, 3]
                    # Normalize v
                    v = v / np.linalg.norm(v, axis=1)[:, np.newaxis]

                    # Get angle using arcos of dot product
                    angle = np.arccos(np.einsum('nt,nt->n',
                        v[[0,1,2,4,5,6,8,9,10,12,13,14,16,17,18],:], 
                        v[[1,2,3,5,6,7,9,10,11,13,14,15,17,18,19],:]))

                    angle = np.degrees(angle) # Convert radian to degree

                    angle_label = np.array([angle], dtype=np.float32)
                    # 학습 시킬 동작의 라벨을 지정
                    angle_label = np.append(angle_label, idx)

                    d = np.concatenate([joint.flatten(), angle_label])

                    data.append(d)

                    mp_drawing.draw_landmarks(img, res, mp_hands.HAND_CONNECTIONS)

            cv2.imshow('img', img)
            if cv2.waitKey(1) == ord('q'):
                break

        data = np.array(data)
        print(action, data.shape)
        # raw 파일 저장
        np.save(os.path.join('dataset/{}'.format(date), f'raw_{action}_{created_time}'), data)

        # Create sequence data
        full_seq_data = []
        for seq in range(len(data) - seq_length):
            full_seq_data.append(data[seq:seq + seq_length])

        full_seq_data = np.array(full_seq_data)
        print(action, full_seq_data.shape)
        # seq 파일 저장
        np.save(os.path.join('dataset/{}'.format(date), f'seq_{action}_{created_time}'), full_seq_data)

    break

# Release video capture and close windows
cap.release()
cv2.destroyAllWindows()

come (664, 100)
come (614, 50, 100)
away (0,)
away (0,)
spin (0,)
spin (0,)
stop (0,)
stop (0,)
