In [3]:
import cv2
import mediapipe as mp
import numpy as np

mp_hands = mp.solutions.hands
hands = mp_hands.Hands()

# 비디오 파일 열기
cap = cv2.VideoCapture(r'G:\내 드라이브\Sign_Remaster\Sign_Language_Remaster\test_hands.mp4')  # 여기서 'video.mp4'는 사용할 영상 파일명입니다.
data = []

while cap.isOpened():
    ret, img = cap.read()
    if not ret:
        break

    # 영상을 RGB로 변환하여 Mediapipe에 전달
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = hands.process(img_rgb)
    

    if results.multi_hand_landmarks:
        print(f"Number of hands detected: {len(results.multi_hand_landmarks)}")
        
        for res in results.multi_hand_landmarks:
            joint = np.zeros((21, 4))  # 21개의 관절에 대한 정보 저장 (x, y, z, visibility)

            for j, lm in enumerate(res.landmark):
                joint[j] = [lm.x, lm.y, lm.z, lm.visibility]

            # Compute angles between joints
            v1 = joint[[0, 1, 2, 3, 0, 5, 6, 7, 0, 9, 10, 11, 0, 13, 14, 15, 0, 17, 18, 19], :3]
            v2 = joint[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20], :3]
            v = v2 - v1
            v = v / np.linalg.norm(v, axis=1)[:, np.newaxis]

            angle = np.arccos(np.einsum('nt,nt->n',
                                        v[[0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 16, 17, 18], :],
                                        v[[1, 2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 15, 17, 18, 19], :]))

            angle = np.degrees(angle)
            angle_label = np.array([angle], dtype=np.float32)
            angle_label = np.append(angle_label, 1)  # Assuming idx is 1, change it accordingly

            d = np.concatenate([joint.flatten(), angle_label])
            data.append(d)

            # mp_drawing.draw_landmarks(img, res, mp_hands.HAND_CONNECTIONS)
    else:
        print("No hands detected in this frame")
    cv2.imshow('Hand Vector Data', img)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# 저장된 데이터 확인
data = np.array(data)
print("Shape of data:", data.shape)

# 데이터를 파일로 저장 (예: CSV 형태로)
np.savetxt('hand_vector_data.csv', data, delimiter=',')  # 변경 가능한 파일명 및 형식입니다.

cap.release()
cv2.destroyAllWindows()


No hands detected in this frame
No hands detected in this frame
No hands detected in this frame
No hands detected in this frame
No hands detected in this frame
No hands detected in this frame
No hands detected in this frame
No hands detected in this frame
No hands detected in this frame
No hands detected in this frame
No hands detected in this frame
No hands detected in this frame
No hands detected in this frame
No hands detected in this frame
No hands detected in this frame
No hands detected in this frame
No hands detected in this frame
No hands detected in this frame
No hands detected in this frame
No hands detected in this frame
No hands detected in this frame
No hands detected in this frame
No hands detected in this frame
No hands detected in this frame
Number of hands detected: 1
Number of hands detected: 1
Number of hands detected: 1
Number of hands detected: 1
Number of hands detected: 1
Number of hands detected: 2
Number of hands detected: 1
Number of hands detected: 2
Number o

In [3]:
### 동영상으로 벡터 추출
import cv2
import mediapipe as mp
import numpy as np
import time, os

seq_length = 30
secs_for_action = 30

mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    max_num_hands=1,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5)

video_path = 'test_hands.mp4'  # 동영상 파일 경로

action = input("녹화할 동작을 입력하세요: ")  # 사용자 입력으로 동작 받기

cap = cv2.VideoCapture(video_path)  # 동영상 파일을 VideoCapture 객체로 열기

created_time = int(time.time())
os.makedirs('dataset', exist_ok=True)

while cap.isOpened():
    data = []

    start_time = time.time()

    while time.time() - start_time < secs_for_action:
        ret, img = cap.read()

        if not ret:  # 동영상이 끝나면 종료
            break

        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        result = hands.process(img)
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

        if result.multi_hand_landmarks is not None:
            for res in result.multi_hand_landmarks:
                joint = np.zeros((21, 4))
                for j, lm in enumerate(res.landmark):
                    joint[j] = [lm.x, lm.y, lm.z, lm.visibility]

                v1 = joint[[0,1,2,3,0,5,6,7,0,9,10,11,0,13,14,15,0,17,18,19], :3]
                v2 = joint[[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20], :3]
                v = v2 - v1
                v = v / np.linalg.norm(v, axis=1)[:, np.newaxis]

                angle = np.arccos(np.einsum('nt,nt->n',
                    v[[0,1,2,4,5,6,8,9,10,12,13,14,16,17,18],:], 
                    v[[1,2,3,5,6,7,9,10,11,13,14,15,17,18,19],:]))
                angle = np.degrees(angle)

                angle_label = np.array([angle], dtype=np.float32)
                angle_label = np.append(angle_label, action)  # 사용자가 입력한 동작 사용

                d = np.concatenate([joint.flatten(), angle_label])
                data.append(d)

        if not ret:  # 동영상이 끝나면 종료
            break

    if len(data) == 0:  # 데이터가 없으면 종료
        break

    data = np.array(data)
    np.save(os.path.join('dataset', f'raw_{action}_{created_time}'), data)

    full_seq_data = []
    for seq in range(len(data) - seq_length):
        full_seq_data.append(data[seq:seq + seq_length])

    full_seq_data = np.array(full_seq_data)
    np.save(os.path.join('dataset', f'seq_{action}_{created_time}'), full_seq_data)

    break  # 여기서 영상 한 번만 실행하도록 설정

cap.release()  # 비디오 캡처 객체 해제
cv2.destroyAllWindows()  # 모든 창 닫기


In [1]:
import cv2
import mediapipe as mp
import numpy as np
import time, os

actions = ['NOISE']
seq_length = 30
secs_for_action = 30

# MediaPipe hands model
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(
    max_num_hands=1,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5)

cap = cv2.VideoCapture(0)

created_time = int(time.time())
os.makedirs('dataset', exist_ok=True)

while cap.isOpened():
    for idx, action in enumerate(actions):
        data = []

        ret, img = cap.read()

        img = cv2.flip(img, 1)

        cv2.putText(img, f'Waiting for collecting {action.upper()} action...', org=(10, 30), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(255, 255, 255), thickness=2)
        cv2.imshow('img', img)
        cv2.waitKey(3000)

        start_time = time.time()

        while time.time() - start_time < secs_for_action:
            ret, img = cap.read()

            img = cv2.flip(img, 1)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            result = hands.process(img)
            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

            if result.multi_hand_landmarks is not None:
                for res in result.multi_hand_landmarks:
                    joint = np.zeros((21, 4))
                    for j, lm in enumerate(res.landmark):
                        joint[j] = [lm.x, lm.y, lm.z, lm.visibility]

                    # Compute angles between joints
                    v1 = joint[[0,1,2,3,0,5,6,7,0,9,10,11,0,13,14,15,0,17,18,19], :3] # Parent joint
                    v2 = joint[[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20], :3] # Child joint
                    v = v2 - v1 # [20, 3]
                    # Normalize v
                    v = v / np.linalg.norm(v, axis=1)[:, np.newaxis]

                    # Get angle using arcos of dot product
                    angle = np.arccos(np.einsum('nt,nt->n',
                        v[[0,1,2,4,5,6,8,9,10,12,13,14,16,17,18],:], 
                        v[[1,2,3,5,6,7,9,10,11,13,14,15,17,18,19],:])) # [15,]

                    angle = np.degrees(angle) # Convert radian to degree

                    angle_label = np.array([angle], dtype=np.float32)
                    angle_label = np.append(angle_label, idx)

                    d = np.concatenate([joint.flatten(), angle_label])

                    data.append(d)

                    mp_drawing.draw_landmarks(img, res, mp_hands.HAND_CONNECTIONS)

            cv2.imshow('img', img)
            if cv2.waitKey(1) == ord('q'):
                break

        data = np.array(data)
        print(action, data.shape)
        np.save(os.path.join('dataset', f'raw_{action}_{created_time}'), data)

        # Create sequence data
        full_seq_data = []
        for seq in range(len(data) - seq_length):
            full_seq_data.append(data[seq:seq + seq_length])

        full_seq_data = np.array(full_seq_data)
        print(action, full_seq_data.shape)
        np.save(os.path.join('dataset', f'seq_{action}_{created_time}'), full_seq_data)
    break

NOISE (437, 100)
NOISE (407, 30, 100)


: 