In [None]:
# convert video to images
import cv2
import os

def video_to_frames(video_path, output_folder):
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Open the video file
    cap = cv2.VideoCapture(video_path)

    # Check if the video was successfully opened
    if not cap.isOpened():
        print(f"Unable to open video file {video_path}")
        return

    frame_count = 0
    while True:
        # Read the video frame by frame
        ret, frame = cap.read()

        # If reading fails (i.e., end of the video), break the loop
        if not ret:
            break

        # Save the current frame as a JPG image
        frame_filename = os.path.join(output_folder, f"frame_{frame_count:04d}.jpg")
        cv2.imwrite(frame_filename, frame)

        frame_count += 1

    # Release the video capture object
    cap.release()

    print(f"Saved {frame_count} frames to folder {output_folder}")

seq_name = "gBR_sBM_c01_d05_mBR0_ch06"
# Example usage
video_path = f"data/aist/videos/{seq_name}.mp4"
output_folder = f"data/aist/{seq_name}/image"
video_to_frames(video_path, output_folder)


In [None]:
# crop images
import json
import cv2
import os

# Input and output directories
input_dir = f'data/aist/{seq_name}/image/'  # Path to the original image folder
output_dir = f'data/aist/{seq_name}/image_crop/'  # Path to the folder where cropped images will be saved

# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Define the cropping region
y_start = 320   # Retain the part from row 500 to row 1012
x_start = 630    # Retain the part from column 650 to column 1162
cropped_pixel = 620
data = {
    "y_start": y_start,
    "x_start": x_start,
    "cropped_pixel": cropped_pixel
}
# Write the dictionary to a JSON file
with open(os.path.join(output_dir, "setting.json"), "w") as f:
    json.dump(data, f, indent=4)
# Process all images
for filename in os.listdir(input_dir):
    if filename.endswith('.png') or filename.endswith('.jpg'):  # Check file format
        # Read the image
        image_path = os.path.join(input_dir, filename)
        image = cv2.imread(image_path)

        # Ensure the image was loaded correctly
        if image is not None:
            # Crop the specified region of the image
            cropped_image = image[y_start:y_start+cropped_pixel, x_start:x_start+cropped_pixel]

            # Save the cropped image
            output_path = os.path.join(output_dir, filename)
            cv2.imwrite(output_path, cropped_image)
        else:
            print(f"Failed to load {filename}, skipping...")

print("All images have been processed.")


In [None]:
# convert images to video
import cv2
import os

def images_to_video(image_folder, output_video, fps=30):
    # Get list of images in the folder, sort by filename
    images = sorted([img for img in os.listdir(image_folder) if img.endswith(".jpg") or img.endswith(".png")])

    # Check if there are any images in the folder
    if not images:
        print("No images found in the folder.")
        return

    # Read the first image to get the size (height, width)
    first_image_path = os.path.join(image_folder, images[0])
    frame = cv2.imread(first_image_path)
    height, width, layers = frame.shape

    # Initialize the video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for .mp4
    video = cv2.VideoWriter(output_video, fourcc, fps, (width, height))

    # Loop over all the images and write them to the video file
    for image in images:
        img_path = os.path.join(image_folder, image)
        frame = cv2.imread(img_path)

        # Check if the image was properly read
        if frame is None:
            print(f"Skipping {img_path}, unable to read.")
            continue

        video.write(frame)

    # Release the video writer
    video.release()
    print(f"Video saved as {output_video}")

# Example usage
# image_folder = f'data/aist/{seq_name}/image_crop'  # Replace with the path to your image folder
# output_video = f'data/aist/{seq_name}/image_crop.mp4'       # Desired output video file name
image_folder = f'logs/aist_gt_15000step/seq=gBR_sBM_c01_d05_mBR0_ch06_prof=aist_data=aist/viz_only_human_viz/blend_img_opti'  # Replace with the path to your image folder
output_video = f'gBR_sBM_c01_d05_mBR0_ch06_opti.mp4'       # Desired output video file name
fps = 30  # Frames per second

images_to_video(image_folder, output_video, fps)

In [None]:
# convert mask video to mask images
import cv2
import os

def video_to_mask(video_path, output_dir):
    # Ensure the output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Open the video file
    cap = cv2.VideoCapture(video_path)
    
    # Initialize frame count
    frame_count = 0
    
    while True:
        # Read the next frame from the video
        ret, frame = cap.read()

        # If no frame is returned, video has ended
        if not ret:
            break

        # Convert the frame to grayscale (assuming black and white video)
        gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Create the mask: set pixels to 255 (white) where the grayscale value is non-zero
        mask = cv2.threshold(gray_frame, 100, 255, cv2.THRESH_BINARY)[1]

        # Save the mask image
        mask_filename = os.path.join(output_dir, f"frame_{frame_count:05d}.png")
        cv2.imwrite(mask_filename, mask)

        # Increment the frame count
        frame_count += 1

    # Release the video capture object
    cap.release()
    print(f"Finished processing {frame_count} frames.")

# Example usage
video_path = f'data/aist/{seq_name}/pha.mp4'
output_dir = f'data/aist/{seq_name}/mask_crop'
video_to_mask(video_path, output_dir)


In [None]:
# pad mask images
import cv2
import os
import numpy as np

# Input and output directories
input_dir = f'data/aist/{seq_name}/mask_crop/'  # Path to the folder with cropped masks
output_dir = f'data/aist/{seq_name}/mask/'  # Path to the folder where padded masks will be saved

# Define the original image size (1920x1080)
original_height, original_width = 1080, 1920

# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Dimensions and position of the cropped region
# y_start, x_start = 320, 630
# cropped_pixel = 620  # Retain rows from 500 to 1012

# Process all cropped masks
for filename in os.listdir(input_dir):
    if filename.endswith('.png') or filename.endswith('.jpg'):  # Check file format
        # Read the cropped mask image (in grayscale mode)
        image_path = os.path.join(input_dir, filename)
        cropped_mask = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  # Ensure it's single-channel

        # Ensure the image was loaded correctly
        if cropped_mask is not None:
            # Create an all-black mask of the original size (1080x1920)
            padded_mask = np.zeros((original_height, original_width), dtype=np.uint8)

            # Place the cropped mask at the specified position on the all-black image
            padded_mask[y_start:y_start+cropped_pixel, x_start:x_start+cropped_pixel] = cropped_mask

            # Save the padded mask
            output_path = os.path.join(output_dir, filename)
            cv2.imwrite(output_path, padded_mask)

        else:
            print(f"Failed to load {filename}, skipping...")

print("All mask images have been processed.")


In [None]:
# transform world coordinate to camera

import cv2
import torch
import pickle
import os
import json
import numpy as np
import trimesh

from lib_gart.hmr2.models import load_hmr2
from pytorch3d.transforms import axis_angle_to_matrix, matrix_to_axis_angle
from lib_gart.hmr2.models.smpl_wrapper import SMPL

def copy2cpu(tensor):
    if isinstance(tensor, np.ndarray):
        return tensor
    if isinstance(tensor, torch.Tensor):
        return tensor.detach().cpu().numpy()
    else:
        return None

def generator(points=None, pred_vertices=None, opti_vertices=None, gt_vertices=None, faces=None):
    if points is not None:
        batch_size = len(points)
    elif pred_vertices is not None:
        batch_size = len(pred_vertices)
    elif opti_vertices is not None:
        batch_size = len(opti_vertices)
    elif gt_vertices is not None:
        batch_size = len(gt_vertices)
    for i in range(batch_size):
        res = {}
        res.update(dict(
            points = dict(
                pcl = copy2cpu(points[i][:,:3]) if points is not None else None,
                # colors = copy2cpu(points[i][:,3:6]) if points[:,3:6] else [0,0,0.8],
                color = [0,0,0.8],
            ),
            pred_verts = dict(
                mesh = [copy2cpu(pred_vertices)[i], copy2cpu(faces)] if pred_vertices is not None else None,
                color = np.asarray([143, 240, 166]) / 255
            ),
            opti_verts = dict(
                mesh = [copy2cpu(opti_vertices)[i], copy2cpu(faces)] if opti_vertices is not None else None,
                color = np.asarray([158, 219, 251]) / 255
            ),
            label_verts = dict(
                mesh = [copy2cpu(gt_vertices)[i], copy2cpu(faces)] if gt_vertices is not None else None,
                color = np.asarray([235, 189, 191]) / 255,
            ),
        ))
        yield res

seq_name = "gBR_sBM_c01_d05_mBR0_ch06"
cam = int(seq_name[9:11]) - 1
import re
seq_label = re.sub(r"c\d{2}", "cAll", seq_name)

smpl_fn = f"data/aist/motions/{seq_label}.pkl"
data = np.load(smpl_fn, allow_pickle=True)
gt_pose = torch.from_numpy(data['smpl_poses']).reshape(-1, 24, 3).cuda()
trans = torch.from_numpy(data['smpl_trans']).cuda()
scale = torch.from_numpy(data['smpl_scaling']).cuda()
hmr_model, _ = load_hmr2('data/hmr/epoch=35-step=1000000.ckpt')
hmr_model = hmr_model.cuda()
smpl = SMPL(model_path='data/smpl-meta', num_body_joints=23, mean_params='data/smpl-meta/smpl_mean_params.npz').cuda()
with open('data/aist/cameras/setting1.json', 'rt') as f:
    setting = json.load(f)
K = np.array(setting[cam]['matrix'])
R = axis_angle_to_matrix(torch.tensor(setting[cam]['rotation'])[None])[0].cuda()
t = torch.tensor(setting[cam]['translation']).cuda()
beta_list = []

with open(f"data/aist/keypoints3d/{seq_label}.pkl", "rb") as f:
    kp = pickle.load(f)
keypoints3d = torch.tensor(kp['keypoints3d']).float().cuda()

from lib_render.visualization import StreamVisualization
o3d_viz = StreamVisualization()

with torch.no_grad():
    for i, img in enumerate(sorted(os.listdir(f'data/aist/{seq_name}/image_crop'))):
        rgb = cv2.imread(os.path.join(f'data/aist/{seq_name}/image_crop', img))
        rgb = torch.from_numpy(rgb).cuda().permute(2,0,1)[None]/255.
        res_rgb = torch.nn.functional.interpolate(rgb, size=(256, 256), mode='bilinear')
        hmr_output = hmr_model(res_rgb)
        beta_list.append(hmr_output['pred_smpl_params']['betas'])
        gt_smpl_params = {}
        gt_smpl_params['global_orient'] = axis_angle_to_matrix(gt_pose[i][:1][None])
        gt_smpl_params['body_pose'] = axis_angle_to_matrix(gt_pose[i][1:][None])
        gt_smpl_params['betas'] = hmr_output['pred_smpl_params']['betas']
        gt_smpl_output = smpl(**{k: v.float() for k,v in gt_smpl_params.items()}, pose2rot=False)
        root_joint = gt_smpl_output.joints[0, 0]
        root_rota = R @ axis_angle_to_matrix(gt_pose[i][:1])
        opti_root_pose = matrix_to_axis_angle(root_rota)[0]
        opti_trans = R @ trans[i]/scale + t/scale 
        opti_smpl_params = {}
        opti_smpl_params['global_orient'] = axis_angle_to_matrix(opti_root_pose[None])
        opti_smpl_params['body_pose'] = axis_angle_to_matrix(gt_pose[i][1:][None])
        opti_smpl_params['betas'] = hmr_output['pred_smpl_params']['betas']
        opti_smpl_output = smpl(**{k: v.float() for k,v in opti_smpl_params.items()}, pose2rot=False)
        offset = (gt_smpl_output.joints[0,0] + trans[i]/scale) @ R.T + t/scale - (opti_smpl_output.joints[0,0] + opti_trans)
        gen = generator(
            points=(keypoints3d[i]/scale @ R.T + t/scale)[None],
            # points=(gt_smpl_output.joints + trans[i]/scale) @ R.T + t/scale,
            pred_vertices=hmr_output['pred_vertices'],
            opti_vertices=opti_smpl_output.vertices + opti_trans + offset,
            gt_vertices=(gt_smpl_output.vertices + trans[i]/scale) @ R.T + t/scale,
            faces=smpl.faces
        )
        o3d_viz.show(gen)
        trans[i] = opti_trans + offset
        gt_pose[i][:1] = opti_root_pose

o3d_viz.close_view()
data['smpl_beta'] = torch.cat(beta_list, dim=0).cpu().numpy()
data['smpl_poses'] = gt_pose[:-1].cpu().numpy().reshape(-1, 72)
data['smpl_trans'] = trans[:-1].cpu().numpy()

# with open(f"data/aist/{seq_name}/smpl.pkl", "wb") as f:
#     pickle.dump(data, f)

In [1]:
# render mesh to image

import cv2
import torch
import pickle
import json
import os
import numpy as np
import trimesh
import pyrender
import matplotlib.pyplot as plt

from lib_gart.hmr2.models import load_hmr2
from pytorch3d.transforms import axis_angle_to_matrix, matrix_to_axis_angle
from lib_gart.hmr2.models.smpl_wrapper import SMPL

def render_mesh(img, v, f, K):
    mm = trimesh.Trimesh(vertices=v,faces=f)
    mesh = pyrender.Mesh.from_trimesh(mm)
    scene = pyrender.Scene()
    scene.add(mesh)
    camera = pyrender.IntrinsicsCamera(fx=K[0,0], fy=K[1,1], cx=K[0,2], cy=K[1,2], znear=0.05, zfar=100000.0, name=None)
    camera_pose = np.array([
       [1.0,  0.0, 0.0, 0.0],
       [0.0,  -1.0, 0.0, 0.0],
       [0.0,  0.0, -1.0, 0.0],
       [0.0,  0.0, 0.0, 1.0],
    ])
    scene.add(camera, pose=camera_pose)
    light = pyrender.SpotLight(color=np.asarray([158, 219, 251]) / 255, intensity=128,
                               innerConeAngle=np.pi/16.0,
                               outerConeAngle=np.pi/6.0)
    scene.add(light, pose=camera_pose)
    r = pyrender.OffscreenRenderer(1920, 1080)
    color, depth = r.render(scene)
    blend = (color==255)
    blend = blend[:,:,0] & blend[:,:,1] & blend[:,:,2]
    blend = (1-blend.astype(np.float32)) * 0.8
    blend = blend[:,:,None]
    blended_image = color*blend + img*(1-blend)
    return blended_image


def copy2cpu(tensor):
    if isinstance(tensor, np.ndarray):
        return tensor
    if isinstance(tensor, torch.Tensor):
        return tensor.detach().cpu().numpy()
    else:
        return None

seq_name = "gBR_sBM_c01_d04_mBR0_ch05"
cam = int(seq_name[9:11]) - 1
import re
seq_label = re.sub(r"c\d{2}", "cAll", seq_name)

smpl_fn = f"data/aist/{seq_name}/smpl.pkl"
data = np.load(smpl_fn, allow_pickle=True)
gt_pose = torch.from_numpy(data['smpl_poses']).reshape(-1, 24, 3).cuda()
trans = torch.from_numpy(data['smpl_trans']).cuda()
scale = torch.from_numpy(data['smpl_scaling']).cuda()
hmr_model, _ = load_hmr2('data/hmr/epoch=35-step=1000000.ckpt')
hmr_model = hmr_model.cuda()
smpl = SMPL(model_path='data/smpl-meta', num_body_joints=23, mean_params='data/smpl-meta/smpl_mean_params.npz').cuda()
with open('data/aist/cameras/setting1.json', 'rt') as f:
    setting = json.load(f)
K = np.array(setting[cam]['matrix'])
R = axis_angle_to_matrix(torch.tensor(setting[cam]['rotation'])[None])[0].cuda()
t = torch.tensor(setting[cam]['translation']).cuda()
        
with torch.no_grad():
    for i, img_fname in enumerate(sorted(os.listdir(f'data/aist/{seq_name}/image_crop'))):
        # if i != 0:
        #     continue
        rgb = cv2.imread(os.path.join(f'data/aist/{seq_name}/image_crop', img_fname))
        rgb = torch.from_numpy(rgb).cuda().permute(2,0,1)[None]/255.
        res_rgb = torch.nn.functional.interpolate(rgb, size=(256, 256), mode='bilinear')
        hmr_output = hmr_model(res_rgb)
        gt_smpl_params = {}
        # rota_pose = R @ axis_angle_to_matrix(gt_pose[i])
        # gt_pose[i][0] = matrix_to_axis_angle(rota_pose)[0]
        gt_smpl_params['global_orient'] = axis_angle_to_matrix(gt_pose[i][:1][None])
        gt_smpl_params['body_pose'] = axis_angle_to_matrix(gt_pose[i][1:][None])
        gt_smpl_params['betas'] = hmr_output['pred_smpl_params']['betas']
        gt_smpl_output = smpl(**{k: v.float() for k,v in gt_smpl_params.items()}, pose2rot=False)
        
        gt_verts = copy2cpu(gt_smpl_output.vertices[0] + trans[i])
        img = cv2.imread(os.path.join(f'data/aist/{seq_name}/image', img_fname))
        blended_gt = render_mesh(img, gt_verts, smpl.faces, K)
        cv2.imwrite(f'test_gt/{img_fname}.jpg', blended_gt)
        # pred_verts = copy2cpu(hmr_output["pred_vertices"][0] + trans[i])
        # blended_pred = render_mesh(img, pred_verts, smpl.faces, K)
        # cv2.imwrite(f'test_pred/{img_fname}.jpg', blended_pred)
        # plt.imshow(cv2.cvtColor(blended_image, cv2.COLOR_BGR2RGB)/255)
        # plt.axis("off")
        # plt.show()



Please either pass the dim explicitly or simply use torch.linalg.cross.
The default value of dim will change to agree with that of linalg.cross in a future release. (Triggered internally at ../aten/src/ATen/native/Cross.cpp:62.)
  b3 = torch.cross(b1, b2)



NameError: name 'blended_image' is not defined

In [None]:
# image to gif
import imageio
import os

# Define the input directory where images are stored and the output GIF path
input_dir = 'test_gt'   # Folder with your images (e.g., 'images/')
output_gif = 'test_gt.gif'       # Name for the output GIF

# Get a sorted list of image file paths
images = sorted([os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.endswith(('.png', '.jpg', '.jpeg'))])

# Read images and save them to a GIF
with imageio.get_writer(output_gif, mode='I', duration=0.1) as writer:
    for filename in images:
        image = imageio.imread(filename)
        writer.append_data(image)
        
print("GIF created successfully!")

In [None]:
# video to gif
import cv2
import imageio

# Define the input video path and output GIF path
input_video = 'test_gt.mp4'  # Replace with your video file
output_gif = 'test_gt.gif'

# Open the video using OpenCV
video = cv2.VideoCapture(input_video)

# Get video frame rate and duration per frame for the GIF
fps = video.get(cv2.CAP_PROP_FPS)
duration_per_frame = 1 / fps  # Duration per frame in seconds for the GIF

# Read frames and write them to the GIF
with imageio.get_writer(output_gif, mode='I', duration=duration_per_frame) as writer:
    while True:
        ret, frame = video.read()
        if not ret:
            break
        # Convert the frame from BGR to RGB
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # Append the frame to the GIF
        writer.append_data(frame_rgb)

# Release the video capture
video.release()
print("GIF created successfully!")

In [None]:
gt_smpl_params = {}
for i in range(100):
    # gt_smpl_params['global_orient'] = axis_angle_to_matrix(gt_pose[i][:1][None])
    gt_smpl_params['body_pose'] = axis_angle_to_matrix(gt_pose[i][1:][None])
    gt_smpl_params['betas'] = beta_list[i]
    gt_smpl_output = smpl(**{k: v.float() for k,v in gt_smpl_params.items()}, pose2rot=False)
    root_R = axis_angle_to_matrix(gt_pose[i][:1])[0]
    j0 = gt_smpl_output.joints[0, 0]
    # j0 = root_R @ (j0)
    print(j0)
    # offset = R @ (j0 - root_R @ j0)
    # print(offset)

In [None]:
import numpy as np

data = dict(np.load("logs/aist_whmr_15000step/seq=gBR_sBM_c01_d05_mBR0_ch06_prof=aist_data=aist/training_poses.pth", allow_pickle=True))
pose = data["training_poses/data.pkl"]
pose