# dataloading

# RTMV

In [131]:
from torch.utils.data import Dataset
import os
import json
import numpy as np
import torch
import math
import imageio
import cv2
from tqdm.notebook import tqdm
from torchvision import transforms

In [99]:
root_dir = '/home/rliu/Desktop/cvfiler04/datasets/RTMV/google_scanned'

In [100]:
objects = next(os.walk(root_dir))[1]

In [101]:
json_path = '/home/rliu/Desktop/cvfiler04/datasets/RTMV/google_scanned/00000/transforms.json'
with open(json_path, "r") as f:
    meta = json.load(f)

In [102]:
np.array(meta['frames'][0]['transform_matrix'])

array([[-8.36230278e-01, -2.53794730e-01,  4.86114502e-01,
         9.35986042e-01],
       [ 5.48378587e-01, -3.87015104e-01,  7.41282940e-01,
         1.43672696e+00],
       [-1.49011612e-08,  8.86457920e-01,  4.62809265e-01,
         1.02960816e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         1.00000000e+00]])

In [110]:
class RTMV(Dataset):
    def __init__(self, root_dir='/home/rliu/Desktop/cvfiler04/datasets/RTMV/google_scanned',\
                 first_K=64, resolution=256, load_target=False):
        self.root_dir = root_dir
        self.scene_list = next(os.walk(root_dir))[1]
        self.resolution = resolution
        self.first_K = first_K
        self.load_target = load_target

    def __len__(self):
        return len(self.scene_list)

    def __getitem__(self, idx):
        scene_dir = os.path.join(self.root_dir, self.scene_list[idx])
        with open(os.path.join(scene_dir, 'transforms.json'), "r") as f:
            meta = json.load(f)
        imgs = []
        poses = []
        for i_img in range(self.first_K):
            meta_img = meta['frames'][i_img]

            if i_img == 0 or self.load_target:
                img_path = os.path.join(scene_dir, meta_img['file_path'])
                img = imageio.imread(img_path)
                img = cv2.resize(img, (self.resolution, self.resolution), interpolation = cv2.INTER_LINEAR)
                imgs.append(img)
            
            c2w = meta_img['transform_matrix']
            poses.append(c2w)
            
        imgs = (np.array(imgs) / 255.).astype(np.float32)  # (RGBA) imgs
        imgs = torch.tensor(self.blend_rgba(imgs))
        imgs = (imgs + 1.) / 2. # convert to stable diffusion range
        poses = torch.tensor(np.array(poses).astype(np.float32))
        return imgs, poses
                
    def blend_rgba(self, img):
        img = img[..., :3] * img[..., -1:] + (1. - img[..., -1:])  # blend A to RGB
        return img

In [166]:
dataset = RTMV(load_target=False)

In [167]:
input_img, poses = dataset[0]

In [120]:
poses.shape

torch.Size([64, 4, 4])

In [128]:
input_im, poses = dataset[0]

input_im = input_im[0]
input_pose = poses[0]
target_poses = poses[1:]

In [133]:
target_poses[0]

tensor([[ 6.6184e+00,  7.5305e-01, -2.2068e+00, -5.0729e-01],
        [-2.3318e+00,  2.1374e+00, -6.2638e+00, -1.4399e+00],
        [-5.2282e-08,  6.6411e+00,  2.2662e+00,  5.2094e-01],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  1.0000e+00]])

In [143]:
def cartesian_to_spherical(xyz):
    ptsnew = np.hstack((xyz, np.zeros(xyz.shape)))
    xy = xyz[:,0]**2 + xyz[:,1]**2
    z = np.sqrt(xy + xyz[:,2]**2)
    theta = np.arctan2(np.sqrt(xy), xyz[:,2]) # for elevation angle defined from Z-axis down
    #ptsnew[:,4] = np.arctan2(xyz[:,2], np.sqrt(xy)) # for elevation angle defined from XY-plane up
    azimuth = np.arctan2(xyz[:,1], xyz[:,0])
    return np.array([theta, azimuth, z])


def get_T(T_target, T_cond):
    theta_cond, azimuth_cond, z_cond = cartesian_to_spherical(T_cond[None, :])
    theta_target, azimuth_target, z_target = cartesian_to_spherical(T_target[None, :])
    
    d_theta = theta_target - theta_cond
    d_azimuth = (azimuth_target - azimuth_cond) % (2 * math.pi)
    d_z = z_target - z_cond
    
    d_T = torch.tensor([d_theta.item(), math.sin(d_azimuth.item()), math.cos(d_azimuth.item()), d_z.item()])
    return d_T

In [163]:
x_T = []
for i_frame in range(len(target_poses)):
    T = get_T(target_poses[i_frame][:3, -1].numpy(), input_pose[:3, -1].numpy())[None, :]
    x_T.append(T)
x_T = torch.cat(x_T)[:, None, :]

In [164]:
T.shape

torch.Size([1, 4])

In [165]:
x_T.shape

torch.Size([4, 1, 4])

# GSO

In [122]:
class GSO(Dataset):
    def __init__(self, root_dir='/home/rliu/Desktop/cvfiler04/datasets/GoogleScannedObjects',\
                 split='val', first_K=5, resolution=256, load_target=False):
        self.root_dir = root_dir
        with open(os.path.join(root_dir, '%s.json' % split), "r") as f:
            self.scene_list = json.load(f)
        self.resolution = resolution
        self.first_K = first_K
        self.load_target = load_target

    def __len__(self):
        return len(self.scene_list)

    def __getitem__(self, idx):
        scene_dir = os.path.join(self.root_dir, self.scene_list[idx])
        with open(os.path.join(scene_dir, 'transforms_render_mvs.json'), "r") as f:
            meta = json.load(f)
        imgs = []
        poses = []
        for i_img in range(self.first_K):
            meta_img = meta['frames'][i_img]
            
            if i_img == 0 or self.load_target:
                img_path = os.path.join(scene_dir, meta_img['file_path'])
                img = imageio.imread(img_path)
                img = cv2.resize(img, (self.resolution, self.resolution), interpolation = cv2.INTER_LINEAR)
                imgs.append(img)
            
            c2w = meta_img['transform_matrix']
            poses.append(c2w)
            
        imgs = (np.array(imgs) / 255.).astype(np.float32)  # (RGBA) imgs
        mask = imgs[:, :, :, -1]
        imgs = torch.tensor(self.blend_rgba(imgs))
        imgs = (imgs + 1.) / 2. # convert to stable diffusion range
        poses = torch.tensor(np.array(poses).astype(np.float32))
        return imgs, poses
                
    def blend_rgba(self, img):
        img = img[..., :3] * img[..., -1:] + (1. - img[..., -1:])  # blend A to RGB
        return img

In [124]:
dataset = GSO('/home/rliu/Desktop/cvfiler04/datasets/GoogleScannedObjects')

In [125]:
imgs, poses = dataset[0]

In [126]:
imgs.shape

torch.Size([1, 256, 256, 3])

In [127]:
poses.shape

torch.Size([5, 4, 4])