# Notebook 1
# POSE Landmark acquisition and storage


This notebook is designed to capture video and detect the pose using MediaPipe

It wil capture poses of action = [TL, TR, BL, BR, CENTER]  and store it into a (directory) structure:

persoon_xxxx/action/sequence_nr/frame_nr.npy

where xxxx is the persons ID, sequence is a series of frames and sequence_nr and Frame_nr are the corresponing ID's of the instances.

<div>
<img src="pose_landmarks_index.png" width="400"/>
</div>

The pose consists of 33 variables of (X, Y, Z, VISIBILITY).  Although all these variables are stored into the npy file, only the variables 0-12 are used in the models.

pose_ml.py contains helper functions for loading, saving, displaying pose keypoints/landmarks and for handling data.

In [None]:
!pip install mediapipe

In [2]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp
import winsound
import sklearn
import random
from pathlib import Path

 # routines for this assignement 
import pose_ml  as pml  





# Get the features and actions data structure

In [3]:
#get features
features  = pml.FEATURES
feature_descriptions = pml.FEATURE_DESCRIPTIONS

# get the actions
actions = pml.ACTIONS
action_descriptions = pml.ACTION_DESCRIPTIONS

print(len(feature_descriptions), " Features : ")
for key, feature in feature_descriptions.items():
    print(key, feature)
print("\nActions: ")
for key, action in action_descriptions.items():
    print(key, action)


20  Features : 
(0, 2) nose, left eye
(0, 5) nose, right eye
(0, 7) nose, left ear
(0, 8) nose, right ear
(0, 9) nose, left mouth
(0, 10) nose, right mouth
(0, 11) nose, left shoulder
(0, 12) nose, right shoulder
(2, 5) left eye, right eye
(2, 7) left eye, left ear
(5, 8) right eye, right ear
(7, 8) left ear, right ear
(7, 9) left ear, left mouth
(7, 11) Left ear, left shoulder
(8, 10) right ear, right mouth
(8, 12) right ear, right shoulder
(9, 10) left mouth, right mouth
(9, 11) left mouth, left shoulder
(10, 12) right mouth, right shoulder
(11, 12) left shoulder, right shoulder

Actions: 
C MIDDEN
TL LINKS BOVEN
TR RECHTS BOVEN
BL LINKS ONDER
BR RECHTS ONDER


set initial path's and other parameters

In [1]:
project_data = Path(r"./data1") # oritinally was ./data, was put to ./data1 to prevent poluting dataset for assingment
graph_path = Path(r"./graphs")
model_path = Path(r"./models")

# 5 videos per participant
no_sequences =5

#videos have 40 frames
sequence_length=40

# change this to the ID of the webcam to be used
web_cam = 0

NameError: name 'Path' is not defined

This routine creates a the file structure for the acquisition of data for 1 participant. It checks for existing data and appends it to a new folder.

In [12]:
# object creation for MediaPipe
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils


In [6]:
# detection of the landmarks from the MediaPipe model
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable =False
    results =  model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results
    

Create a folder structure to store the sample data

In [5]:
def create_person_structure(actions):

    folder_list = {}
    folders = [x for x in project_data.iterdir() if x.is_dir()]
    if len(folders)>1:
        seq_nr= int(str(folders[-1]).split("_")[-1])+1
    else:
        seq_nr=0
   
    person_folder= project_data / f"person_{seq_nr:04d}"
    folder_list['PERSON']= person_folder
    for action in actions:
        action_folder = person_folder / action
        #action_folder.mkdir(parents=True, exist_ok=False)
        folder_list[action]=action_folder
    return folder_list

This routine checks the webcam and determines the frame rate
The frame rate should not be less than 20.

In [14]:
def test_webcam():
    # determine frame rate of camera
    cap = cv2.VideoCapture(web_cam, cv2.CAP_DSHOW)

    if cap.isOpened():
        num_frames = 60
        start = time.time()
        for i in range(0, num_frames):
            ret, frame = cap.read()
        end = time.time()
        fps = num_frames / (end - start)
        if fps >20:
            print(f"Good frame rate: {fps:.1f} fps!")
        else:
            print(f"This webcam has a low frame rate: {fps:.1f} fps! Please consider another webcam!")
    else:
        print("Cannot open webcam")

    cap.release()
    cv2.destroyAllWindows()

test_webcam()

Good frame rate: 26.8 fps!


This routine is testing the acquisition and landmark detection

In [13]:
# webcam used for the assignemet is max 1920 x 1080
# this is quite large so reducing to halve the size. 
# change this for other formats.
cap = cv2.VideoCapture(web_cam, cv2.CAP_DSHOW) 
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1920/2)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080/2)
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        pml.draw_landmarks(image, results)

        cv2.imshow("Camera feed:", image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

Sounds helping participants indicating start - stop

In [15]:
def beep_start():
    frequency = 2500  # 2500 Hz for start
    duration = 500   # 1000 ms = 1 second
    winsound.Beep(frequency, duration)

def beep_stop():
    frequency = 1200  # 1200 Hz for stop
    duration = 500   # 1000 ms = 1 second
    winsound.Beep(frequency, duration)
# beep_start()
# beep_stop()


Selects random actions such that all classes are sampled 5 times.

In [10]:
def create_random_action(actions, repeats):
    random_list = actions.copy()*repeats
    random.shuffle(random_list)
    return random_list
  
print(create_random_action(['C','TL','TR','BL','BR'],5))

['BR', 'TL', 'C', 'C', 'TL', 'BR', 'BR', 'BL', 'C', 'BL', 'TR', 'BL', 'TL', 'C', 'BL', 'TR', 'BL', 'TL', 'TL', 'TR', 'BR', 'BR', 'C', 'TR', 'TR']


# Main sample acquisition routine
# Run this routine for every participant 

1. creates a folder structur for the new participant
2. selects randomly actions to sample each class 5x 
3. presents the action in the screen.
4. Starts with a high frequency beep to indicate start of an action
5. Beeps at a low frequencey to indicate stop of an action

In [11]:
cap = cv2.VideoCapture(web_cam, cv2.CAP_DSHOW) 
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1920/2)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080/2)

folder_structure = create_person_structure(actions)
random_actions = create_random_action(actions, no_sequences)
print(random_actions)
break_flag=False
sequences = {key: 0 for key in actions} # containing the amount of sequence per action 
counter = 0;
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        # loop trhough actions
    for action in random_actions:
        # Loop through sequence aka videos
        sequence=  sequences[action] 
       

        for frame_num in range(sequence_length):
            ret, frame = cap.read()
            
            # Make detections
            image, results = mediapipe_detection(frame, holistic)
            pml.draw_landmarks(image, results)
            
            # write text for participant and beep!
            if frame_num == 0:
                cv2.putText(image, f'KIJK NAAR {action_descriptions[action]}', (120, 200),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 4, cv2.LINE_AA)
                cv2.putText(image, f'Collecting frames for {action} Video Number {sequence}/{frame_num}/   {counter}', (20, 20),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 255, 0), 1, cv2.LINE_AA)
                cv2.imshow("Camera feed:", image)

                cv2.waitKey(150)
                beep_start()
            elif frame_num == sequence_length -1:
                beep_stop()
                #pass
            else:
                cv2.putText(image, f'Collecting frames for {action} Video Number {sequence}/{frame_num}/   {counter}', (20, 20),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 1, cv2.LINE_AA)
                cv2.imshow("Camera feed:", image)

            key_points = pml.extract_keypoints(results)
            
            # save teh file
            npy_path = folder_structure[action] / str(sequence) 
            if not npy_path.exists():
                npy_path.mkdir(parents=True, exist_ok=False)            
            npy_file = '.\\'+ str(npy_path / str(frame_num))
            np.save(npy_file, key_points)
           
        sequences[action] = sequence + 1
            counter += 1

            if cv2.waitKey(10) & 0xff == ord('q'):
                break_flag=True
                break
        if break_flag:
            break
    
    
cap.release()
cv2.destroyAllWindows()

['TR', 'TR', 'BR', 'C', 'TL', 'BR', 'BL', 'TL', 'BR', 'C', 'C', 'TL', 'C', 'BR', 'TL', 'TL', 'BR', 'BL', 'BL', 'TR', 'BL', 'TR', 'C', 'BL', 'TR']
