In [1]:
import numpy as np
import cv2 as cv
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

def mediapipe_detection(image, model):
	image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
	image.flags.writeable = False
	results = model.process(image)
	image.flags.writeable = True
	image = cv.cvtColor(image, cv.COLOR_RGB2BGR)
	return image, results

def draw_landmarks(image, results):
	mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1))
	mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2))
	mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2))
	mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2))

def extract_keypoints(results):
	pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
	face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
	left_hand = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
	left_hand = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
	return np.concatenate([pose, face, left_hand, left_hand])



objc[26499]: Class CaptureDelegate is implemented in both /Users/d_f_i/miniconda3/envs/tensorflow/lib/python3.10/site-packages/cv2/cv2.abi3.so (0x159fb25a0) and /Users/d_f_i/miniconda3/envs/tensorflow/lib/python3.10/site-packages/mediapipe/.dylibs/libopencv_videoio.3.4.16.dylib (0x125174860). One of the two will be used. Which one is undefined.
objc[26499]: Class CVWindow is implemented in both /Users/d_f_i/miniconda3/envs/tensorflow/lib/python3.10/site-packages/cv2/cv2.abi3.so (0x159fb25f0) and /Users/d_f_i/miniconda3/envs/tensorflow/lib/python3.10/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x115dd8a68). One of the two will be used. Which one is undefined.
objc[26499]: Class CVView is implemented in both /Users/d_f_i/miniconda3/envs/tensorflow/lib/python3.10/site-packages/cv2/cv2.abi3.so (0x159fb2618) and /Users/d_f_i/miniconda3/envs/tensorflow/lib/python3.10/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x115dd8a90). One of the two will be used.

In [4]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('Dataset') 

# Actions that we try to detect
actions = np.array(['Thank_you', 'Hello', 'Good'])

# Thirty videos worth of data
no_sequences = 50

# Videos are going to be 30 frames in length
sequence_length = 30

# Folder start
start_folder = 1

In [5]:
for action in actions:
    #dirmax = np.max(np.array(os.listdir(os.path.join(DATA_PATH, action))).astype(int))
    for sequence in range(1,no_sequences+1):
        try: 
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass

In [6]:
cap = cv.VideoCapture(0)
# Set mediapipe model
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    # NEW LOOP
    # Loop through actions
    for action in actions:
        # Loop through sequences aka videos
        for sequence in range(start_folder, start_folder+no_sequences):
            if sequence > 50:
                break
            # Loop through video length aka sequence length
            for frame_num in range(sequence_length):

                # Read feed
                ret, frame = cap.read()

                # Make detections
                image, results = mediapipe_detection(frame, holistic)

                # Draw landmarks
                draw_landmarks(image, results)
                
                # NEW Apply wait logic
                if frame_num == 0: 
                    cv.putText(image, 'STARTING COLLECTION', (120,200), 
                               cv.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv.LINE_AA)
                    cv.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv.LINE_AA)
                    # Show to screen
                    cv.imshow('OpenCV Feed', image)
                    cv.waitKey(500)
                else: 
                    cv.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv.LINE_AA)
                    # Show to screen
                    image = cv.flip(image, 1)
                    cv.imshow('OpenCV Feed', image)
                
                # NEW Export keypoints
                keypoints = extract_keypoints(results)
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                try:
                    np.save(npy_path, keypoints)
                except:
                    break
                    
                # Break gracefully
                if cv.waitKey(10) & 0xFF == ord('q'):
                    print('break')
                    
    cap.release()
    cv.destroyAllWindows()

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [1]:
print('running')
import cv2 as cv
import sys
cap = cv.VideoCapture(0)
while True:
    ret, frame = cap.read()
    if not ret:
        print('cant do it anymore')
        break
        
    cv.imshow('frame', cv.flip(frame, 1))
    if cv.waitKey(1) & 0xFF == ord('q'):
        print('should break')
        break

running
should break


In [8]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils
#cap = cv.VideoCapture(0)

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    samples_path = os.path.join('Clips')
    should_break = False
    for sample in os.listdir(samples_path):
        if should_break:
            break

        #print(sample)
        clips_path = os.path.join(samples_path, sample)
        if not os.path.isdir(clips_path):
            continue

        for clip_number, clip in enumerate(os.listdir(clips_path)):
            print(f'{sample}: {clip_number}')
            if should_break:
                break

            file = os.path.join(clips_path, clip)
            if not (os.path.isfile(file) and file.endswith('.mp4')):
                print(f'{file}: continue')
                continue

            cap = cv.VideoCapture(file)
            #cap.set(cv.CV_CAP_PROP_SETTINGS, 1)
            print(int(cap.get(cv.CAP_PROP_FRAME_COUNT)))

            if not cap.isOpened(): 
                print("Cannot open Video feed")
                should_break = True
                break
                
            frame_number = 0
            while True:
                if frame_number == 30:
                    print(f'frames: {frame_number}')
                    break
                # Capture frame-by-frame
                ret, frame = cap.read()

                # if frame is read correctly ret is True
                if not ret:
                    #print("Can't receive frame (stream end?). Exiting ...")
                    print("Next...")
                    break

                # Make detections
                image, results = mediapipe_detection(frame, holistic)

                key_points = extract_keypoints(results)
                #print(key_points)
                npy_path = os.path.join('Dataset', str(sample), str(clip_number), str(frame_number))
                try:
                    np.save(npy_path, key_points)
                except:
                    continue

                draw_landmarks(image, results)

                # Display the resulting frame 
                image = cv.flip(image, 1)
                print('show')
                cv.imshow('frame', image)

                if cv.waitKey(10) & 0xFF == ord('n'):
                    break

                if cv.waitKey(10) & 0xFF == ord('q'):
                    should_break = True
                    break

                frame_number += 1

            # When everything done, release the capture
            cap.release() 
            cv.destroyAllWindows()


S: 0
Clips/S/.DS_Store: continue
S: 1
61
Next...
S: 2
35
Next...
S: 3
53
Next...
S: 4
56
Next...
S: 5
48
Next...
S: 6
71
Next...
S: 7
40
Next...
S: 8
54
Next...
S: 9
74
Next...
S: 10
54
Next...
S: 11
62


KeyboardInterrupt: 