# Install

In [None]:
%pip install -q opencv-python-headless
%pip install -q mediapipe
%pip install -q numpy
%pip install -q rembg

# Hyperparameter

In [None]:
import os

In [None]:
video_path = './video/magnetic.mp4'	# change this to the path of your video
output_folder = './test_pose'

# Create the testing folder

- testA: contains the pose skeleton
- testB: contains the original frame image

In [None]:
# check if the output path exists, if not create it
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
    # make subfolders: testA
    os.makedirs(output_folder + '/testA')
    os.makedirs(output_folder + '/testB')
else:
    # remove the directory and recreate
    os.system('rm -rf ' + output_folder+'/testA')
    os.system('rm -rf ' + output_folder+'/testB')
    os.makedirs(output_folder + '/testA')
    os.makedirs(output_folder + '/testB')

# Prepare MediaPipe for Pose Estimation

In [None]:
import cv2
import mediapipe as mp
import csv
import numpy as np
import os
import numpy as np
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
from rembg import remove

## Initialize MediaPipe Pose and Drawing utilities

In [None]:
# drawing utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_drawing = mp.solutions.drawing_utils

# partial body pose landmarks
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, model_complexity=2, enable_segmentation=True, min_detection_confidence=0.1, smooth_landmarks=True)

# full body pose landmarks
mp_holistic = mp.solutions.holistic
holistic = mp_holistic.Holistic(static_image_mode=False, model_complexity=2, enable_segmentation=True, min_detection_confidence=0.1, smooth_landmarks=True)

# drawing styles for hands landmarks
left_hand_landmark_style = mp_drawing.DrawingSpec(color=(255, 0, 0), thickness=2, circle_radius=2)
right_hand_landmark_style = mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2, circle_radius=2)

## Inferencing on video

In [None]:
# Open the video file
cap = cv2.VideoCapture(video_path)

frame_number = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Convert the frame to RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Create a black background image
    black_background = np.zeros_like(frame_rgb)

    # Process the frame with MediaPipe Pose
    result = holistic.process(frame_rgb)

    # Draw the pose landmarks on the black background
    if result.pose_landmarks:
        # Right hand
        mp_drawing.draw_landmarks(black_background, result.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, landmark_drawing_spec=right_hand_landmark_style)
        # Left hand
        mp_drawing.draw_landmarks(black_background, result.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, landmark_drawing_spec=left_hand_landmark_style)
        # Body
        mp_drawing.draw_landmarks(black_background, result.pose_landmarks, mp_holistic.POSE_CONNECTIONS, landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())


    # Write out the pose skeleton to testA
    cv2.imwrite(f"{output_folder}/testA/{frame_number}.jpg", black_background)  # write out the pose skeleton
    # Write out the original rgb frame to testB
    cv2.imwrite(f"{output_folder}/testB/{frame_number}.jpg", frame)  # write out the original frame

    # Exit if 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

    frame_number += 1

cap.release()

# Write the pose_dance images to a video

In [None]:
# write video out from images in ouput folder

images_path = './output'
output_path = './output_video'

# check if the output path exists, if not create it
if not os.path.exists(output_path):
    os.makedirs(output_path)

image_array = os.listdir(images_path)
image_array.sort(key=lambda x: int(x.split('.')[0]))
# print(image_array)

img_array = []
for filename in image_array:
    img = cv2.imread(os.path.join(images_path, filename))
    height, width, layers = img.shape
    size = (width, height)
    img_array.append(img)

print(f"Number of frames: {len(img_array)}")
print(f"Frame size: {size}")
print(img_array)
# create the video file
# write out as mp4
out = cv2.VideoWriter(f'{output_path}/output.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps=30, frameSize=size)
# write out as avi
# out = cv2.VideoWriter(f'{output_path}/output.avi', cv2.VideoWriter_fourcc(*'DIVX'), 30, size)

# write the images to the video file
for i in range(len(img_array)):
    out.write(img_array[i])

out.release()

# Align the pose skeleton with the original frame

In [None]:
!python make_dataset_aligned.py --dataset-path test_pose

# Write the aligned images to a video

In [None]:
%pip install -q moviepy --upgrade
%pip install -q Pillow

In [None]:
import os
from PIL import Image
import numpy as np

# write video out from images in ouput folder

images_path = './test_pose/test'
output_path = './test_pose/'

# check if the output path exists, if not create it
if not os.path.exists(output_path):
    os.makedirs(output_path)

image_array = os.listdir(images_path)
image_array.sort(key=lambda x: int(x.split('.')[0]))

img_array = []
for filename in image_array:
    img = Image.open(os.path.join(images_path, filename))
    img = np.array(img)
    height, width, layers = img.shape
    size = (width, height)
    img_array.append(img)

print(f"Number of frames: {len(img_array)}")
print(f"Frame size: {size}")
# print(img_array)

In [None]:
from moviepy.editor import VideoFileClip, AudioFileClip, ImageSequenceClip

clip = ImageSequenceClip(img_array, fps=30)  # Adjust fps as needed

clip.write_videofile(f'{output_path}/pose2img.mp4', codec='libx264', fps=30)

# Combine video and audio

In [None]:
from moviepy.editor import VideoFileClip, AudioFileClip

output_video_path = "./video/magnetic_pose2img.mp4"
input_video_path = "./test_pose/pose2img.mp4"
input_audio_path = "./video/magnetic.mp4"

# Create a video clip
video = VideoFileClip(f"{input_video_path}")

# Create an audio clip
audio = AudioFileClip(f"{input_audio_path}")

# get the duration
print(f"Video duration: {video.duration}")
print(f"Audio duration: {audio.duration}")

# duration = min(video.duration, audio.duration)

# set the duration
# video = video.set_duration(duration)
# audio = audio.set_duration(duration)

# # Add the audio clip to the video clip
video = video.set_audio(audio)

# Write the result to a file
video.write_videofile(f"{output_video_path}", codec="libx264", audio_codec="aac")