In [None]:
%pip install opencv-python mediapipe

In [None]:
import numpy as np
import cv2
import os
import mediapipe as mp

In [None]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [None]:
def mediapipe_detection(input_image, model):
    # Convert image from BGR to RGB
    input_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)
    # Make prediction
    detection_results = model.process(input_image)
    # Convert image back to BGR
    input_image = cv2.cvtColor(input_image, cv2.COLOR_RGB2BGR)

    return input_image, detection_results

In [None]:
def draw_landmarks(input_image, detection_results):
    # Draw face connections
    mp_drawing.draw_landmarks(input_image, detection_results.face_landmarks, mp_holistic.FACEMESH_TESSELATION)

In [None]:
def draw_styled_landmarks(input_image, detection_results):
    # Set drawing specifications for face connections
    face_points = mp_drawing.DrawingSpec(color=(255,0,0), thickness=1, circle_radius=1) 
    face_lines = mp_drawing.DrawingSpec(color=(255,255,0), thickness=1, circle_radius=1)

    # Draw face connections
    mp_drawing.draw_landmarks(input_image, detection_results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, face_points, face_lines)

In [None]:
# Extract keypoints into flattened arrays
def extract_keypoints (detection_results):

    face = np.array([[results.x, results.y, results.z]
    for results in detection_results.face_landmarks.landmark]).flatten() if detection_results.face_landmarks else np.zeros(468*3)

    return np.concatenate([face])

In [None]:
DATA_PATH = os.path.join('EmotionData')

# Gesture that will be detected 
emotions = np.array(['Happy','Sad','Angry','Neutral','Surprised'])

# Number of sequences of data
n_frames = 20
# number of frames in each sequence 
length_frames = 3

In [None]:
import contextlib
for emotion in emotions:
    for frame in range(n_frames):
        with contextlib.suppress(Exception):
            os.makedirs(os.path.join(DATA_PATH, emotion, str(frame)))

In [None]:
cap = cv2.VideoCapture(0)
# Set mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    # NEW LOOP
    # Loop through actions
    for emotion in emotions:
        # Loop through sequences aka videos
        for video in range(n_frames):
            # Loop through video length aka sequence length
            for frame_num in range(length_frames):

                # Read feed
                ret, input_frame = cap.read()

                # Make detections
                input_image, results = mediapipe_detection(input_frame, holistic)

                # Draw landmarks
                draw_styled_landmarks(input_image, results)

                # NEW Apply wait logic
                if frame_num == 0: 
                    cv2.putText(input_image, 'STARTING COLLECTION', (120,200), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 1, cv2.LINE_AA)
                    cv2.putText(
                        input_image,
                        f'Collecting frames for {emotion} Video Number {video}',
                        (15, 12),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        0.5,
                        (0, 0, 255),
                        1,
                        cv2.LINE_AA,
                    )
                    # Show to screen
                    cv2.imshow('OpenCV Feed', input_image)
                    cv2.waitKey(2000)
                else: 
                    cv2.putText(
                        input_image,
                        f'Collecting frames for {emotion} Video Number {video}',
                        (15, 12),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        0.5,
                        (0, 0, 255),
                        1,
                        cv2.LINE_AA,
                    )
                    # Show to screen
                    cv2.imshow('OpenCV Feed', input_image)

                # NEW Export keypoints
                keypoints = extract_keypoints(results)
                npy_path = os.path.join(DATA_PATH, emotion, str(video), str(frame_num))
                np.save(npy_path, keypoints)

                # Break gracefully
                if cv2.waitKey(10) & 0xFF == ord('q'):

                    break

    cap.release()
    cv2.destroyAllWindows()