In [2]:
import mediapipe as mp
import cv2
import numpy as np

In [3]:
# Drawing helpers
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

### 1. Make some detection

In [4]:
def rescale_frame(frame, percent=50):
    '''
    Rescale a frame to a certain percentage compare to its original frame
    '''
    width = int(frame.shape[1] * percent/ 100)
    height = int(frame.shape[0] * percent/ 100)
    dim = (width, height)
    return cv2.resize(frame, dim, interpolation =cv2.INTER_AREA)

In [10]:
cap = cv2.VideoCapture("../data/squat_right_4.mp4")

with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret, image = cap.read()

        if not ret:
            break

        # Recolor image from BGR to RGB for mediapipe
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False

        results = pose.process(image)

        # Recolor image from BGR to RGB for mediapipe
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Draw landmarks and connections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS, mp_drawing.DrawingSpec(color=(244, 117, 66), thickness=2, circle_radius=4), mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))

        cv2.imshow("CV2", image)

        cv2.imshow("Media Pipe feed", image)
        
        # Press Q to close cv2 window
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

    # (Optional)Fix bugs cannot close windows in MacOS (https://stackoverflow.com/questions/6116564/destroywindow-does-not-close-window-on-mac-using-python-and-opencv)
    for i in range (1, 5):
        cv2.waitKey(1)

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


### 2. Save video from CV2 to a file

In [16]:
# Optional, use it to save live video from webcam
cap = cv2.VideoCapture(0) # Capture video from camera

# Get the width and height of frame
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) + 0.5)
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) + 0.5)

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Be sure to use the lower case
out = cv2.VideoWriter('../data/webcam.mp4', fourcc, 20.0, (width, height))

while(cap.isOpened()):
    ret, frame = cap.read()
    if ret == True:
        frame = cv2.flip(frame,0)

        # write the flipped frame
        out.write(frame)

        cv2.imshow('frame', frame)

    # Press Q to close cv2 window
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

# (Optional)Fix bugs cannot close windows in MacOS (https://stackoverflow.com/questions/6116564/destroywindow-does-not-close-window-on-mac-using-python-and-opencv)
for i in range (1, 5):
    cv2.waitKey(1)

### 3. Capture landmarks and Export to CSV

In [5]:
import csv
import os
from matplotlib import pyplot as plt

#### Generate Data Frame

The data frame will be saved in a .csv file.

A data frame will contains a "Label" columns which represent the label of a data point
There are another 9 x 4 columns represent 9 features of a human pose that are important for a squat.

In that each landmark's info will be flatten

According to the [Mediapipe documentation](https://google.github.io/mediapipe/solutions/pose#python-solution-api),
Each landmark consists of the following:
- x and y: Landmark coordinates normalized to [0.0, 1.0] by the image width and height respectively.
- z: Represents the landmark depth with the depth at the midpoint of hips being the origin, and the smaller the value the closer the landmark is to the camera. The magnitude of z uses roughly the same scale as x.
- visibility: A value in [0.0, 1.0] indicating the likelihood of the landmark being visible (present and not occluded) in the image.

In [13]:
# Determine important landmarks for squat

IMPORTANT_LMS = [
    "NOSE",
    "LEFT_SHOULDER",
    "RIGHT_SHOULDER",
    "LEFT_HIP",
    "RIGHT_HIP",
    "LEFT_KNEE",
    "RIGHT_KNEE",
    "LEFT_ANKLE",
    "RIGHT_ANKLE"
]

In [28]:
# Generate all columns of the data frame

landmarks = ["label"] # Label column

for lm in IMPORTANT_LMS:
    landmarks += [f"{lm.lower()}_x", f"{lm.lower()}_y", f"{lm.lower()}_z", f"{lm.lower()}_v"]

In [29]:
landmarks[1:]

['nose_x',
 'nose_y',
 'nose_z',
 'nose_v',
 'left_shoulder_x',
 'left_shoulder_y',
 'left_shoulder_z',
 'left_shoulder_v',
 'right_shoulder_x',
 'right_shoulder_y',
 'right_shoulder_z',
 'right_shoulder_v',
 'left_hip_x',
 'left_hip_y',
 'left_hip_z',
 'left_hip_v',
 'right_hip_x',
 'right_hip_y',
 'right_hip_z',
 'right_hip_v',
 'left_knee_x',
 'left_knee_y',
 'left_knee_z',
 'left_knee_v',
 'right_knee_x',
 'right_knee_y',
 'right_knee_z',
 'right_knee_v',
 'left_ankle_x',
 'left_ankle_y',
 'left_ankle_z',
 'left_ankle_v',
 'right_ankle_x',
 'right_ankle_y',
 'right_ankle_z',
 'right_ankle_v']

In [35]:
def init_csv():
    '''
    Create a black csv file with just columns
    '''

    # Write all the columns to a file
    with open("data_frame.csv", mode="w", newline="") as f:
        csv_writer = csv.writer(f, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
        csv_writer.writerow(landmarks)

In [None]:
# landmarks = results.pose_landmarks.landmark
# for res in landmarks:
#     for lm in IMPORTANT_LMS:
#         keypoints = landmarks[mp_pose.PoseLandmark[lm].value]
#         print(lm, keypoints)

In [49]:
def export_landmark_to_csv(results, action: str) -> None:
    '''
    Export Labeled Data from detected landmark to csv
    '''
    landmarks = results.pose_landmarks.landmark
    keypoints = []

    try:
        # Extract coordinate of important landmarks
        for lm in IMPORTANT_LMS:
            keypoint = landmarks[mp_pose.PoseLandmark[lm].value]
            keypoints.append([keypoint.x, keypoint.y, keypoint.z, keypoint.visibility])
        
        keypoints = list(np.array(keypoints).flatten())

        # Insert action as the label (first column)
        keypoints.insert(0, action)

        # Append new row to .csv file
        with open("data_frame.csv", mode="a", newline="") as f:
            csv_writer = csv.writer(f, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
            csv_writer.writerow(keypoints)
        

    except Exception as e:
        print(e)
        pass

Extract and labeled data from the video dataset

In [50]:
cap = cv2.VideoCapture("../data/squat_right_2.mp4")
state_of_save = ""

init_csv()

with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret, image = cap.read()

        if not ret:
            break

        # Recolor image from BGR to RGB for mediapipe
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False

        results = pose.process(image)

        # Recolor image from BGR to RGB for mediapipe
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Draw landmarks and connections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS, mp_drawing.DrawingSpec(color=(244, 117, 66), thickness=2, circle_radius=4), mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))

        cv2.putText(image, f"Just saved {state_of_save}", (50, 150), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 1, cv2.LINE_AA)

        cv2.imshow("CV2", image)

        cv2.imshow("Media Pipe feed", image)

        # Press to extract and save to .csv
        pressed_key = cv2.waitKey(1)

        # Pressed U to label frame as UP position
        if pressed_key == 117:
            export_landmark_to_csv(results, "up")
            state_of_save = "up"

        # Pressed D to label frame as DOWN position
        if pressed_key == 100:
            export_landmark_to_csv(results, "down")
            state_of_save = "down"
        
        
        # Press Q to close cv2 window
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

    # (Optional)Fix bugs cannot close windows in MacOS (https://stackoverflow.com/questions/6116564/destroywindow-does-not-close-window-on-mac-using-python-and-opencv)
    for i in range (1, 5):
        cv2.waitKey(1)