In [7]:
import mediapipe as mp
import cv2
import numpy as np
import os
from extract import mediapipe_landmarks

## Getting started

In [8]:
PATH = 'Dataset'
path_action_types = 'types of actions'

# Classes
classes = np.array([clas[:-4] for clas in os.listdir(path_action_types)]) 

# Number of frames per video
len_sequence = 30

# Number of videos per action
num_of_sequences = 50

classes

array(['climb', 'cry', 'drink', 'eat', 'fall', 'give', 'jump', 'kick',
       'look', 'push', 'run', 'sit', 'sleep', 'wait', 'walk', 'wash'],
      dtype='<U5')

# Collecting data

In [11]:
# Creating Directories for saving the Dataset
for clas in classes:
    for cut in range(num_of_sequences):
        try:
            os.makedirs('PATH' +'\\'+ clas +'\\'+ str(cut))
        except:
            pass
        
# Collecting dataset
# AKA: 54 min of pain and family members asking "why u repeatedly touching your eye???"
cap = cv2.VideoCapture(0)
with holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as model:
    for i, clas in enumerate(classes):
        
        tutorial_path = os.path.join(path_action_types, clas+'.png')  # Shows the tutorial image first for 2.5 sec
        tutorial_image = cv2.imread(tutorial_path, cv2.IMREAD_COLOR)
        tutorial_image = cv2.resize(tutorial_image, (450, 360))
        h, w, _ = tutorial_image.shape

        cv2.imshow('OpenCV Feed', tutorial_image)
        cv2.waitKey(2500)

        for cut in range(num_of_sequences):                        # For every frame:
        
            _, frame = cap.read()
            image, _ = mediapipe_landmarks(frame, model)
            
            cv2.putText(image, 'Prepare: ', (100, 200),                              # 1.4 sec delay before starting to capture
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 255), 4, cv2.LINE_AA) 
            cv2.putText(image, f'({i}) {clas}...', (40, 25),
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 255), 2, cv2.LINE_AA)

            cv2.imshow('OpenCV Feed', image)
            cv2.waitKey(1400)
            
            for frame_num in range(len_sequence):
                _, frame = cap.read()
                image, landmarks = mediapipe_landmarks(frame, model)              # Get keypoints and draw landmarks
                
                tutorial_path = os.path.join(path_action_types, clas+'.png')      # Put tutorial image to right bottom
                tutorial_image = cv2.imread(tutorial_path, cv2.IMREAD_COLOR)
                tutorial_image = cv2.resize(tutorial_image, (150, 120))
                h, w, _ = tutorial_image.shape
                image[360:h+360, 490:w+490] = tutorial_image
                             
                cv2.putText(image, f'({i}) {clas}: {cut}', (40, 25),         # Print class and frame count (that is really painfull without them)
                           cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 255), 2, cv2.LINE_AA)

                
                cv2.imshow('OpenCV Feed', image)
                    
                np.save(os.path.join(PATH, clas, str(cut), str(frame_num)), landmarks)  # Save the data

                if cv2.waitKey(10) & 0xFF == ord('q'):                                         # break if interrupted or 'q' pressing
                    break

    cap.release()            # Zuckerberg is watching
    cv2.destroyAllWindows()

In [10]:
cap.release()
cv2.destroyAllWindows()