# Install

In [None]:
%pip install -q opencv-python-headless
%pip install -q mediapipe
%pip install -q numpy
%pip install -q rembg

# Hyperparameter

In [None]:
import os

In [None]:
video_path = './video/fish.mov'	# change this to the path of your video
output_folder = './training_pose2img'

# Create the training folder

- train: contains the pose skeleton and image side by side
- trainA: contains the pose skeleton
- trainB: contains the original image but without background

In [None]:
# check if the output path exists, if not create it
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
    # make subfolders: train, trainA, trainB
    os.makedirs(output_folder + '/train')
    os.makedirs(output_folder + '/trainA')
    os.makedirs(output_folder + '/trainB')

# Prepare MediaPipe for Pose Estimation

In [None]:
import cv2
import mediapipe as mp
import csv
import numpy as np
import os
import numpy as np
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
from rembg import remove

## Initialize MediaPipe Pose and Drawing utilities

In [None]:
# drawing utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_drawing = mp.solutions.drawing_utils

# partial body pose landmarks
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, model_complexity=2, enable_segmentation=True, min_detection_confidence=0.1, smooth_landmarks=True)

# full body pose landmarks
mp_holistic = mp.solutions.holistic
holistic = mp_holistic.Holistic(static_image_mode=False, model_complexity=2, enable_segmentation=True, min_detection_confidence=0.1, smooth_landmarks=True)

# drawing styles for hands landmarks
left_hand_landmark_style = mp_drawing.DrawingSpec(color=(255, 0, 0), thickness=2, circle_radius=2)
right_hand_landmark_style = mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2, circle_radius=2)

## Inferencing on video

In [None]:
# Open the video file
cap = cv2.VideoCapture(video_path)

frame_number = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Convert the frame to RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Create a black background image
    black_background = np.zeros_like(frame_rgb)

    # Process the frame with MediaPipe Pose
    result = holistic.process(frame_rgb)

    # Draw the pose landmarks on the black background
    if result.pose_landmarks:
        # Right hand
        mp_drawing.draw_landmarks(black_background, result.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, landmark_drawing_spec=right_hand_landmark_style)
        # Left hand
        mp_drawing.draw_landmarks(black_background, result.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, landmark_drawing_spec=left_hand_landmark_style)
        # Body
        mp_drawing.draw_landmarks(black_background, result.pose_landmarks, mp_holistic.POSE_CONNECTIONS, landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
    
    # remove background
    removed_background = remove(frame_rgb)  # output shape: (height, width, RGBA)
    removed_background = cv2.cvtColor(removed_background, cv2.COLOR_BGR2RGB)    # convert to RGB
    
    # write out the images
    cv2.imwrite(f"{output_folder}/trainA/{frame_number}.jpg", black_background)  # write out the pose skeleton
    cv2.imwrite(f"{output_folder}/trainB/{frame_number}.jpg", removed_background)  # write out the image without background

    # Exit if 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

    frame_number += 1

cap.release()

# Align the pose skeleton with the extractd image

In [None]:
from align_dataset import align_images

align_images(source_folder=f"{output_folder}")

# Write the aligned images to a video (optional)

In [None]:
%pip install -q moviepy --upgrade
%pip install -q Pillow

In [None]:
import os
from PIL import Image
import numpy as np

# write video out from images
images_path = './training_pose2img/train'
output_path = './training_pose2img/'

# check if the output path exists, if not create it
if not os.path.exists(output_path):
    os.makedirs(output_path)

image_array = os.listdir(images_path)
image_array.sort(key=lambda x: int(x.split('.')[0]))

img_array = []
for filename in image_array:
    img = Image.open(os.path.join(images_path, filename))
    img = np.array(img)
    height, width, layers = img.shape
    size = (width, height)
    img_array.append(img)

print(f"Number of frames: {len(img_array)}")
print(f"Frame size: {size}")
# print(img_array)

In [None]:
from moviepy.editor import VideoFileClip, AudioFileClip, ImageSequenceClip

clip = ImageSequenceClip(img_array, fps=30)  # Adjust fps as needed

clip.write_videofile(f'{output_path}/pose2img.mp4', codec='libx264', audio_codec="aac", fps=30)