In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as pyplot
import time
import mediapipe as mp

**variable setup**

In [2]:
mp_holistic = mp.solutions.holistic # holistic module
mp_drawing = mp.solutions.drawing_utils # drawing utilities

def mediapipe_detection(image,model):
  image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB) # color conversion
  image.flags.writeable = False
  results = model.process(image) # process image
  image.flags.writeable = True
  image = cv2.cvtColor(image,cv2.COLOR_RGB2BGR) # color conversion
  return image,results

def draw_landmarks(image,result):
  mp_drawing.draw_landmarks(image,result.face_landmarks,mp_holistic.FACEMESH_CONTOURS,
                            mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius=1),
                            mp_drawing.DrawingSpec(color=(80,256,10),thickness=1,circle_radius=1))
  mp_drawing.draw_landmarks(image,result.pose_landmarks,mp_holistic.POSE_CONNECTIONS)
  mp_drawing.draw_landmarks(image,result.left_hand_landmarks,mp_holistic.HAND_CONNECTIONS)
  mp_drawing.draw_landmarks(image,result.right_hand_landmarks,mp_holistic.HAND_CONNECTIONS)

def extract_keypoint(result):
  pose = np.array([[res.x,res.y,res.z,res.visibility] for res in result.pose_landmarks.landmark]).flatten() if result.pose_landmarks else np.zeros(33*4)
  face = np.array([[res.x,res.y,res.z] for res in result.face_landmarks.landmark]).flatten() if result.face_landmarks else np.zeros(468*3)
  left_hand = np.array([[res.x,res.y,res.z] for res in result.left_hand_landmarks.landmark]).flatten() if result.left_hand_landmarks else np.zeros(21*3)
  right_hand = np.array([[res.x,res.y,res.z] for res in result.right_hand_landmarks.landmark]).flatten() if result.right_hand_landmarks else np.zeros(21*3)
  return np.concatenate([pose,face,left_hand,right_hand])

In [3]:
#read all word and pack it in array
DATA_PATH = os.path.join("DATA_LIB")
with open("word_list.txt") as file:
  action = file.readlines()
for i,word in enumerate(action):
  action[i] = word.replace("\n","")
actions = np.array(action) # meaning of the pose
# no_sequences = 30 # take 30 video to collect data for each action
sequence_length = 30 # 30 frames to detect what is going on

In [4]:
# use to create folder only
os.mkdir("DATA_LIB")
for action in actions:
  os.mkdir(os.path.join(DATA_PATH,action))


*Video Feed*

In [5]:
#select action
action = "test"
no_sequences = 10
if no_sequences>0:
  cap = cv2.VideoCapture(0) #default camera
  with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holitic:
    
    ids = 0
    while os.path.exists(os.path.join(DATA_PATH,action,str(ids))):
      ids+=1
    for sequence in range(ids,no_sequences+ids):
      try:
        os.makedirs(os.path.join(DATA_PATH,action,str(sequence)))
        for frame_num in range(sequence_length):
          ret,frame = cap.read() #read frame from webcam
          #make pipe detection
          image,result = mediapipe_detection(frame, holitic)
          draw_landmarks(image,result)

          #show which pic is being collected
          if frame_num == 0:
            cv2.putText(image,"3",(120,200),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255),4,cv2.LINE_AA)
            cv2.waitKey(1000)
            cv2.putText(image,"2",(120,200),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255),4,cv2.LINE_AA)
            cv2.waitKey(1000)
            cv2.putText(image,"1",(120,200),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255),4,cv2.LINE_AA)
            cv2.waitKey(1000)
          cv2.putText(image,f"{action} : {sequence}",(15,12),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255),1,cv2.LINE_4)

          cv2.imshow("Camera",image) #show on screen

          # write data to path
          keypoint = extract_keypoint(result)
          npy_path = os.path.join(DATA_PATH,action,str(sequence),str(frame_num))
          np.save(npy_path,keypoint)

          if cv2.waitKey(10) & 0xff == ord('q'): # if q or close is pressed break
            break
      except:
        cap.release()  # disconnect webcam
        cv2.destroyAllWindows()
        break
  cap.release()  # disconnect webcam
  cv2.destroyAllWindows()
else:
  print("input sequences")

   #close window

In [None]:
cap.release()  # disconnect webcam
cv2.destroyAllWindows()