In [2]:
import cv2
import mediapipe as mp
import numpy as np
import os
from data_capture import process_image
from camera_utils import check_camera, check_hand_landmarks

In [3]:
mp_draw=mp.solutions.drawing_utils
mp_style=mp.solutions.drawing_styles
mp_hand=mp.solutions.hands

## A. Check if the utilities are working correctly before image capture

### Check if camera is working correctly

In [3]:
check_camera()

### Check the frames per second to determine speed of video capture
We need to capture 1s of video for each sample. Since, cv2 and mediapipe work on capturing one frame, we need to determine how many frames make a sample

In [4]:
cap=cv2.VideoCapture(0)
print('FPS of laptop cam is',cap.get(cv2.CAP_PROP_FPS), 'fps')
cap.release()
cv2.destroyAllWindows()

FPS of laptop cam is 30.0 fps


### Check if mediapipe is correctly detecting landmarks in the hands

In [5]:
marks=check_hand_landmarks(mp_draw,mp_hand)

### Explore the landmarks captured

In [10]:
print('No. of frames captured:', len(marks))
print('No. of landmarks captured for the left hand:',len(marks[0].landmark))
print('\n')
print(f'What does each landmark look like? \n{marks[0].landmark[0]}')

No. of frames captured: 1
No. of landmarks captured for the left hand: 21


What does each landmark look like? 
x: 0.08354503661394119
y: 0.7452123761177063
z: 4.6253799723672273e-07



<b> So mediapipe captures 21 points for each hand. Each of these 21 landmarks is described by their 3 coordinates </b>

## B. Image Capture

In [None]:
actions=['ascend', 'descend', 'ok', 'not ok','stop']
samples=30
frame_count=30 #30 frames per second will give us one second of sample

In [None]:
if 'data' not in os.listdir():
    os.makedirs('data')
SAVE_PATH=os.path.join('data')

In [None]:
no_landmarks_dict=np.zeros((len(actions),samples,frame_count)) #store number of frames where no landmark was captured. Will help us understand if more actions need to be captured

In [None]:
cap=cv2.VideoCapture(0)
act_dict={}

with mp_hand.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5) as hands:
    while cap.isOpened():    
        for a in actions:
            for s in range(samples):
                samples_array=[]
                for f in range(frame_count):
                    isframe, frame=cap.read()
                    if f == 0:
                        cv2.putText(frame, f'Action {a}, Sample {s}, Initiating...', (120,200), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                        cv2.imshow('Data', frame)
                        cv2.waitKey(500)                      
                        cv2.putText(frame, f'Action {a}, Sample {s}, Collecting....', (15,12),cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 255), 1, cv2.LINE_AA)
                        cv2.imshow('Data', frame)
                    isframe, frame=cap.read()
                    if isframe:
                        name=a+'_'+str(s)+'_'+str(f)+'.npy'
                        path=(os.path.join(SAVE_PATH,name))
                        frame=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
                        frame,no_landmarks=process_image(path,frame,hands,mp_draw,mp_hand)
                        no_landmarks_dict[actions.index(a),s,f]=no_landmarks
                        frame=cv2.cvtColor(frame,cv2.COLOR_RGB2BGR)
                        cv2.imshow('Data',frame) 
                    if cv2.waitKey(100) & 0xFF == ord('q'):
                        break
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break
            if cv2.waitKey(10) & 0xFF == ord('q'):
                break
        cap.release()
        cv2.destroyAllWindows()                        
                         

In [None]:
cap.release()
cv2.destroyAllWindows()   