In [1]:
import os
os.environ["OPENCV_IO_ENABLE_OPENEXR"]="1"
from smplx import SMPL
import os
import h5py
import torch
import imageio
import numpy as np
np.float = float
np.int = int
import glob
import skvideo.io
import json
import matplotlib.pyplot as plt
import cv2
import wget
import requests
import io

from core.datasets.preprocess.process_spin import SMPL_JOINT_MAPPER, write_to_h5py
from core.utils.skeleton_utils import *

%load_ext autoreload
%autoreload 2
np.set_printoptions(suppress=True)

np.set_printoptions(suppress=True)
np.float = float
np.int = int
np.bool = bool
np.complex = complex
np.object = object
np.unicode = None
np.str = str

In [2]:
smpl_rest_pose = np.array([[ 0.00000000e+00,  2.30003661e-09, -9.86228770e-08],
                           [ 1.63832515e-01, -2.17391014e-01, -2.89178602e-02],
                           [-1.57855421e-01, -2.14761734e-01, -2.09642015e-02],
                           [-7.04505108e-03,  2.50450850e-01, -4.11837511e-02],
                           [ 2.42021069e-01, -1.08830070e+00, -3.14962119e-02],
                           [-2.47206554e-01, -1.10715497e+00, -3.06970738e-02],
                           [ 3.95125849e-03,  5.94849110e-01, -4.03754264e-02],
                           [ 2.12680623e-01, -1.99382353e+00, -1.29327580e-01],
                           [-2.10857525e-01, -2.01218796e+00, -1.23002514e-01],
                           [ 9.39484313e-03,  7.19204426e-01,  2.06931755e-02],
                           [ 2.63385147e-01, -2.12222481e+00,  1.46775618e-01],
                           [-2.51970559e-01, -2.12153077e+00,  1.60450473e-01],
                           [ 3.83779174e-03,  1.22592449e+00, -9.78838727e-02],
                           [ 1.91201791e-01,  1.00385976e+00, -6.21964522e-02],
                           [-1.77145526e-01,  9.96228695e-01, -7.55542740e-02],
                           [ 1.68482102e-02,  1.38698268e+00,  2.44048554e-02],
                           [ 4.01985168e-01,  1.07928419e+00, -7.47655183e-02],
                           [-3.98825467e-01,  1.07523870e+00, -9.96334553e-02],
                           [ 1.00236952e+00,  1.05217218e+00, -1.35129794e-01],
                           [-9.86728609e-01,  1.04515052e+00, -1.40235111e-01],
                           [ 1.56646240e+00,  1.06961894e+00, -1.37338534e-01],
                           [-1.56946480e+00,  1.05935931e+00, -1.53905824e-01],
                           [ 1.75282109e+00,  1.04682994e+00, -1.68231070e-01],
                           [-1.75758195e+00,  1.04255080e+00, -1.77773550e-01]], dtype=np.float32)

In [3]:
def process_RANA(subject_folder):
    json_files = sorted(glob.glob(os.path.join(subject_folder, '*.json')))
    img_paths = sorted(glob.glob(os.path.join(subject_folder, '*[0-9].png')))
    mask_paths = sorted(glob.glob(os.path.join(subject_folder, '*semantic.png')))
    num_frames = len(json_files)
    print('number of frames:', num_frames)
    print(json_files)
    print(img_paths)
    print(mask_paths)


    #################################################################### INDICES #######################################################################
    cam_idxs = np.zeros(num_frames) # maps from image idx to camera idx (they all use the same camera with idx 0)
    kp_idxs = np.arange(num_frames) # maps from image idx to pose idx (monocular data so just arange num frames)

    ################################################################## CAMERA STUFF #####################################################################
    temp_data = json.load(open(json_files[0]))['skeleton_0']['smpl_data']
    W = 1280
    H = 720
    
    K = []
    focals = []
    centers = []
    c2ws = []

    # for camera in camera_names:
    K.append(np.array(temp_data['K']))
    focals.append([K[-1][0, 0], K[-1][1, 1]])
    centers.append([K[-1][0, 2], K[-1][1, 2]])
    ext = np.eye(4)
    c2w = np.linalg.inv(ext)
    c2w = swap_mat(c2w)
    c2ws.append(c2w)

    K = np.stack(K)
    focals = np.stack(focals)
    centers = np.stack(centers)
    c2ws = np.stack(c2ws)

    print("K", K.shape)
    print("focals", focals.shape)
    print("centers", centers.shape)
    print("c2ws", c2ws.shape)

    ##################################################################### HDRis ####################################################################
    list_of_hdris = []
    hdri_indices = []
    hdris = []
    for json_file in json_files:
        hdri_fn = json.load(open(json_file))['bg_file']
        exr_fn = hdri_fn[:-3] + 'exr'
        if exr_fn in list_of_hdris:
            hdri_indices.append(list_of_hdris.index(exr_fn))
        else:
            hdri_folder = './data/HDRis/'
            # download hdri
            image_url =  'https://dl.polyhaven.org/file/ph-assets/HDRIs/exr/4k/'+exr_fn
            response = requests.get(image_url, stream=True, headers={'User-agent': 'Mozilla/5.0'})
            with open(hdri_folder+exr_fn, 'wb') as f:
                f.write(response.content)
            # open hdri and append to list
            hdri = cv2.cvtColor(cv2.imread(hdri_folder+exr_fn,  cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH), cv2.COLOR_BGR2RGB)
            hdris.append(hdri)
            list_of_hdris.append(exr_fn)
            hdri_indices.append(list_of_hdris.index(exr_fn))
            
    hdri_indices = np.array(hdri_indices).astype(int)
    hdris = np.array(hdris)

    ##################################################################### POSE #####################################################################
    smpl = SMPL(model_path=smpl_neutral_pkl_file, gender='neutral', joint_mapper=SMPL_JOINT_MAPPER)
    bones, betas, root_bones, root_locs = [], [], [], []
    for json_file in json_files:
        smpl_data = json.load(open(json_file))['skeleton_0']['smpl_data']
        joints = np.array(smpl_data['joints'])
        verts = np.array(smpl_data['vertices'])
        pose = np.array(smpl_data['pose']).reshape(-1,3)[None,...]
        beta = np.array(smpl_data['betas'])[None, ...]
        scale = smpl_data['scale']
        global_orient = np.array(smpl_data['global_orient']).reshape(-1,3)[None,...]
        global_trans = np.array(smpl_data['global_trans']).reshape(-1,3)

        smpl_output = smpl(betas=torch.Tensor(beta),
                           body_pose=torch.Tensor(pose),
                           global_orient=torch.Tensor(global_orient),
                           pose2rot=True
                        )
        
        root_loc = global_trans/scale

        full_pose = np.concatenate((global_orient, pose), axis=1)

        bones.append(full_pose)
        betas.append(beta)
        root_locs.append(root_loc)

    bones = np.concatenate(bones, axis=0)
    betas = np.concatenate(betas, axis=0)
    root_locs = np.concatenate(root_locs, axis=0)
    print('bones:',bones.shape)
    print('betas:',betas.shape)

    #################################################################### REST_POSE #################################################################
    betas = torch.DoubleTensor(betas)
    dummy = torch.eye(3).view(1, 1, 3, 3).expand(-1, 24, -1, -1)

    rest_info = smpl(
        betas=betas.mean(0)[None].float(),
        body_pose=dummy[:, 1:].float(),
        global_orient=dummy[:, :1].float(),
        pose2rot=False
    )

    rest_verts = rest_info.vertices[0].detach().numpy()
    rest_pose = rest_info.joints[0]
    rest_pose = rest_pose.detach().numpy()
    rest_verts -= rest_pose[0]
    rest_pose -= rest_pose[0] # center rest pose

    ext_scale=1.0
    scale_to_ref=False
    # scale the rest pose if needed
    if scale_to_ref:
        ref_pose = smpl_rest_pose * ext_scale
        bone_len = calculate_bone_length(rest_pose).mean()
        ref_bone_len = calculate_bone_length(ref_pose).mean()
        pose_scale = ref_bone_len / bone_len
    else:
        pose_scale = 1.0
    rest_pose = rest_pose * pose_scale
    rest_verts = rest_verts * pose_scale

    ########################### POSE Pt.2 ###################
    
    l2ws = np.array([get_smpl_l2ws(bone, rest_pose=rest_pose) for bone in bones])
    l2ws[..., :3, -1] += root_locs[:, None]
    print('l2ws:',l2ws.shape)
    kp3d = l2ws[..., :3, -1]
    skts = np.array([np.linalg.inv(l2w) for l2w in l2ws])
    print('kp3d:',kp3d.shape)
    print('skts:',skts.shape)

    ###################################################################### CYLS ######################################################################
    cyls = get_kp_bounding_cylinder(
        kp3d,
        ext_scale=0.001,
        skel_type=SMPLSkeleton,
        extend_mm=250,
        top_expand_ratio=1.00,
        bot_expand_ratio=0.25,
        head='y'
    )

    ################################################################## IMAGES, MASKS, BGs ##############################################################

    imgs = []
    bkgds = []
    masks = []
    sampling_masks = []
    bkgd_masks = []

    for img_path in img_paths:
        imgs.append(np.asarray(Image.open(img_path))[:,:,:3])

    for mask_path in mask_paths:
        mask = np.copy(np.asarray(Image.open(mask_path))[:,:,3])
        mask[mask < 128] = 0
        mask[mask > 128] = 1
        kernel = np.ones((11, 11), np.uint8)
        sampling_mask = cv2.dilate(mask, kernel=kernel, iterations=2)
        kernel = np.ones((3, 3), np.uint8)
        bkgd_mask = cv2.dilate(mask, kernel=kernel, iterations=2)
        masks.append(mask.astype(np.uint8)[..., None])
        sampling_masks.append(sampling_mask.astype(np.uint8)[..., None])
        bkgd_masks.append(bkgd_mask.astype(np.uint8)[..., None])
    
    imgs = np.stack(imgs)
    print('imgs', imgs.shape)


    masks = np.stack(masks)
    sampling_masks = np.stack(sampling_masks).astype(np.uint8)
    bkgd_masks = np.stack(bkgd_masks).astype(np.uint8)
    print('masks', masks.shape)
    print('sampling masks', sampling_masks.shape)

    bkgds.append(np.max(imgs * (1-bkgd_masks), axis=0))

    bkgds = np.stack(bkgds).astype(np.uint8)
    print('bkgds', bkgds.shape)

    ################################################################### FINAL OUTPUT ##################################################################
    data = {
        'imgs': np.array(imgs),
        'bkgds': np.array(bkgds),
        'bkgd_idxs': cam_idxs.astype(int),
        'masks': np.array(masks).reshape(-1, H, W, 1),
        'sampling_masks': np.array(sampling_masks).reshape(-1, H, W, 1),
        'c2ws': c2ws.astype(np.float32),
        'img_pose_indices': cam_idxs.astype(int),
        'kp_idxs': np.array(kp_idxs).astype(int),
        'centers': centers.astype(np.float32),
        'focals': focals.astype(np.float32),
        'kp3d': kp3d.astype(np.float32),
        'betas': betas.numpy().astype(np.float32),
        'bones': bones.astype(np.float32),
        'skts': skts.astype(np.float32),
        'cyls': cyls.astype(np.float32),
        'rest_pose': rest_pose.astype(np.float32),
        'hdri_indices': hdri_indices,
        'hdris': hdris,
    }

    return data

In [4]:
smpl_neutral_pkl_file = './data/RelightingHumans-release-v0.1/SMPL_NEUTRAL.pkl'
data = process_RANA('./data/RelightingHumans-release-v0.1/train_p2_p3/subject_01/')

number of frames: 150
['./data/RelightingHumans-release-v0.1/train_p2_p3/subject_01\\frame_000000.json', './data/RelightingHumans-release-v0.1/train_p2_p3/subject_01\\frame_000001.json', './data/RelightingHumans-release-v0.1/train_p2_p3/subject_01\\frame_000002.json', './data/RelightingHumans-release-v0.1/train_p2_p3/subject_01\\frame_000003.json', './data/RelightingHumans-release-v0.1/train_p2_p3/subject_01\\frame_000004.json', './data/RelightingHumans-release-v0.1/train_p2_p3/subject_01\\frame_000005.json', './data/RelightingHumans-release-v0.1/train_p2_p3/subject_01\\frame_000006.json', './data/RelightingHumans-release-v0.1/train_p2_p3/subject_01\\frame_000007.json', './data/RelightingHumans-release-v0.1/train_p2_p3/subject_01\\frame_000008.json', './data/RelightingHumans-release-v0.1/train_p2_p3/subject_01\\frame_000009.json', './data/RelightingHumans-release-v0.1/train_p2_p3/subject_01\\frame_000010.json', './data/RelightingHumans-release-v0.1/train_p2_p3/subject_01\\frame_000011.

In [5]:
write_to_h5py(os.path.join("./data/RANA/subject_01_train.h5"), data)
print('done!')

imgs: img to chunk in size (1, 4096, 3), flatten as (150, 921600, 3)
bkgds: img to chunk in size (1, 4096, 3), flatten as (1, 921600, 3)
bkgd_idxs: data to store as <class 'numpy.int64'>
masks: img to chunk in size (1, 4096, 1), flatten as (150, 921600, 1)
sampling_masks: img to chunk in size (1, 921600, 1), flatten as (150, 921600, 1)
c2ws: data to store as <class 'numpy.float32'>
img_pose_indices: data to store as <class 'numpy.int64'>
kp_idxs: data to store as <class 'numpy.int64'>
centers: data to store as <class 'numpy.float32'>
focals: data to store as <class 'numpy.float32'>
kp3d: data to store as <class 'numpy.float32'>
betas: data to store as <class 'numpy.float32'>
bones: data to store as <class 'numpy.float32'>
skts: data to store as <class 'numpy.float32'>
cyls: data to store as <class 'numpy.float32'>
rest_pose: data to store as <class 'numpy.float32'>
hdri_indices: data to store as <class 'numpy.int64'>
hdris: data to store as <class 'numpy.float32'>
done!
