In [None]:
!pip install mediapipe

## 1. 前置套件與函式
將 Mediapipe 所偵測到的 Keypoints 繪製到空白圖像，經過標準化後儲存。keypoints 包含 pose, right_hand, lefthand

In [None]:
import cv2
import mediapipe as mp
import os
import numpy as np

mp_holistic = mp.solutions.holistic # holistic model
mp_drawing = mp.solutions.drawing_utils # drawing utilities

def mediapipe_detection(image, model): # 偵測動作
    '''make current frame writable, so that the Mediapipe holistic model can enter their detection value'''
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # color conversion
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # color conversion
    return image, results

def draw_landmarks_and_styled_noface(image, results): # this function only draw dots on image but won't show it
    # draw face
    #mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION,
                             # mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), # color the dot
                              #mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1) # color the line
                              #)
    # draw pose
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(80,22,10), thickness=1, circle_radius=1),
                              mp_drawing.DrawingSpec(color=(80,44,121), thickness=1, circle_radius=1)
                              )
    # draw left hand
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=1),
                              mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=1)
                              )
    # draw right hand
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=1),
                              mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=1)
                              )

def extract_keypoints_1darray_noface(results): # 抓值
  '''
  pose: 33*4
  lh: 21*3
  rh: 21*3
  return: all value flattened into a 1d array
  '''
  pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
  lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
  rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
  # all of the above will all be a 1d array due to the flatten()
  return np.concatenate([pose, lh, rh])

def extract_keypoints_on_image_noface(results, resolution):
  blank_image = np.zeros((resolution, resolution, 3), dtype=np.float32) # all zero 2d array
  draw_landmarks_and_styled_noface(blank_image, results) # draw landmarks on blank image
  gray_image = cv2.cvtColor(blank_image, cv2.COLOR_RGB2GRAY)
  gray_image_std = gray_image/255.0 # Standardized
  return gray_image_std

def is_keypoints_valid(results, action, single_hand_actions):
    # 單手動作和雙手動作的最小關鍵點數量標準
    min_single_hand_keypoints = 16 # 21
    min_double_hand_keypoints = 35 # 42

    # 檢查手部關鍵點是否存在
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]) if results.left_hand_landmarks else np.zeros((21, 3))
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]) if results.right_hand_landmarks else np.zeros((21, 3))

    # 計算關鍵點數量
    lh_count = np.count_nonzero(np.all(lh != 0, axis=1)) # 21
    rh_count = np.count_nonzero(np.all(rh != 0, axis=1)) # 21

    # 根據動作類型判斷是否有效
    if action in single_hand_actions:
        # 單手動作至少一隻手達到最小關鍵點數量
        return lh_count >= min_single_hand_keypoints or rh_count >= min_single_hand_keypoints
    else:
        # 雙手動作兩隻手都要達到最小關鍵點數量
        return lh_count >= min_double_hand_keypoints and rh_count >= min_double_hand_keypoints

## 2. 變數設置
一次以30為單位

In [None]:
# Path for exported data, numpy arrays
DATA_PATH_1d_nf = os.path.join('1Darray', 'MP_Data_1d_nf')
DATA_PATH_150_nf = os.path.join('image', 'MP_Data_Image_150_nf')

# actions that we try to detect
actions = np.array(['apple', 'bad', 'eat', 'exercise', 'very', 'mood', 'then', 'plan to', 'you guys',
                    'we', 'suit to', 'good', 'tomorrow', 'weather', 'today'])

single_hand_action = np.array(['you guys', 'weather', 'bad', 'very', 'good', 'apple', 'tomorrow'])

# numbers of video
no_sequences = 30

# How many frame in each video
sequence_length = 45

## 3. 資料夾創建

In [None]:
# create folders
for action in actions:
    for sequence in range(no_sequences):
        try:                                                              # try create new folder
            os.makedirs(os.path.join(DATA_PATH_1d_nf, action, str(sequence)))
            os.makedirs(os.path.join(DATA_PATH_150_nf, action, str(sequence)))
        except:                                                           # if the folder exicts
            pass

## 4. 開始抓取

In [None]:
# 改這兩行之前，資料夾要先創，請務必檢查
cur_action = 'bad' # 這邊改要錄的動作
start_from_here = 0 # 第幾個影片開始
sequence = start_from_here
is_sequence_invalid = False

cap = cv2.VideoCapture(0) # grab the feed from device 0
# Set mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic: # 導入模型
    while sequence < no_sequences:
        for frame_num in range(sequence_length):
            # read feed
            ret, frame = cap.read() # grab current frame from webcam

            # make detection
            image, results = mediapipe_detection(frame, holistic)

            # draw landmarks
            draw_landmarks_and_styled_noface(image, results)

            # apply wait logic:
            if frame_num == 0:
                cv2.putText(image, 'STARTING COLLECTION', (120, 200), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 4, cv2.LINE_AA)
                cv2.putText(image, 'Collecting frames for {} Videos Number {}'.format(cur_action, sequence), (15, 12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                cv2.imshow('OpenCV Feed', image)
                cv2.waitKey(2000)     # take 2 seconds break
            else:
                cv2.putText(image, 'Collecting frames for {} Videos Number {}'.format(cur_action, sequence), (15, 12),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                cv2.imshow('OpenCV Feed', image)

            if not is_keypoints_valid(results, cur_action, single_hand_action):
                is_sequence_invalid = True
                cv2.putText(image, f'frame {frame_num} keypoints invalid'.format(cur_action, sequence), (15, 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                cv2.imshow('OpenCV Feed', image)
            else:
                # export keypoints
                # to 1darray no face
                keypoints = extract_keypoints_1darray_noface(results)
                npy_path = os.path.join(DATA_PATH_1d_nf, cur_action, str(sequence), str(frame_num))
                np.save(npy_path, keypoints)

                # to image no face
                keypoints_image = extract_keypoints_on_image_noface(results, 150)
                image_path = os.path.join(DATA_PATH_150_nf, cur_action, str(sequence), str(frame_num))
                # cv2.imwrite(image_path + '.jpg', keypoints_image)
                np.save(image_path, keypoints_image)

             # show to screen
            cv2.imshow('OpenCV Feed', image)

            # break gracefully
            if cv2.waitKey(10) & 0xFF == ord('q'): # 跳下一部
                break

        sequence += 1

        if is_sequence_invalid:
            sequence -= 1
            is_sequence_invalid = False
            cv2.putText(image, 'keypoints lost, do it again'.format(cur_action, sequence), (850, 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 127, 0), 4, cv2.LINE_AA)
            cv2.imshow('OpenCV Feed', image)
            cv2.waitKey(1000)

    cap.release() # release webcam
    cv2.destroyAllWindows() # close all windows
    cv2.waitKey(1) # make sure window close