In [24]:
import numpy as np
import imageio
import os
import glob
from tqdm import tqdm
import json
import ffmpeg
from PIL import Image
%matplotlib inline
import matplotlib.pyplot as plt
from multiprocessing import Process
import torch


In [3]:
1024*1024

1048576

In [4]:
128*128*128

2097152

In [17]:
20000*8192  - 263454720

-99614720

In [16]:
536*960

263454720

In [20]:
data_path = '/home/loyot/workspace/Datasets/NeRF/3d_vedio_datasets/coffee_martini'

## load data from video

In [21]:
def _load_data(basedir, factor=None, width=None, height=None, split='train'):
    
    poses_arr = np.load(os.path.join(basedir, 'poses_bounds.npy'))
    poses = poses_arr[:, :-2].reshape([-1, 3, 5]).transpose([1,2,0])
    bds = poses_arr[:, -2:].transpose([1,0])
    
    vid = imageio.get_reader(basedir +'/cam00.mp4',  'ffmpeg')
    img0 = vid.get_data(0)
    sh = img0.shape
    r_w, r_h = sh[1], sh[0]
    r_w, r_h = r_w / factor, r_h / factor
    print("poses size:", poses.shape[-1])
    print("original image shape:", (sh[0], sh[1]))
    print("dst image shape:", (r_w, r_h))

    images = []
    timestamps = []
    video_list = glob.glob(os.path.join(basedir, '*.mp4'))
    print("lodading video:")
    for video in tqdm.tqdm(video_list):
        if 'cam00.mp4' in video and split == 'train':
            continue

        if 'cam00.mp4' not in video and split == 'test':
            continue

        vid = imageio.get_reader(video,  'ffmpeg')
        for i, im in enumerate(vid):
            im = Image.fromarray(im).resize((int(r_w), int(r_h)))
            images.append(im)
            timestamps.append(i)
    
    factor = 1
    
    poses[:2, 4, :] = np.array(sh[:2]).reshape([2, 1])
    poses[2, 4, :] = poses[2, 4, :] * 1./factor

    poses = poses.transpose([2,0,1])
    bds = bds.transpose([1,0])
    
    return poses, bds, images

In [None]:
basedir
factor=1
width=None
height=None
split='train'

In [22]:
np.array([-1.2895256280899048, -0.38866135478019714, -0.2731894850730896, 1.326205849647522, 0.3273159861564636, 0.16015827655792236]) * 2

array([-2.57905126, -0.77732271, -0.54637897,  2.6524117 ,  0.65463197,
        0.32031655])

## load json file

In [3]:
def _load_data_from_json(basedir, factor=1, width=None, height=None, split='train'):
    
    poses_arr = np.load(os.path.join(basedir, 'poses_bounds.npy'))
    poses = poses_arr[:, :-2].reshape([-1, 3, 5]).transpose([1,2,0])
    bds = poses_arr[:, -2:].transpose([1,0])
    
    json_file = os.path.join(basedir, f'images_x{factor}_list.json')
    with open(json_file) as jf:
        json_data = json.load(jf)
    
    r_w = json_data['videos'][0]['images'][0]['weight']
    r_h = json_data['videos'][0]['images'][0]['height']

    video_list = json_data['videos']
    scene = json_data['scene']
    
    poses[:2, 4, :] = np.array([r_h, r_w]).reshape([2, 1])

    poses = poses.transpose([2,0,1])
    bds = bds.transpose([1,0])

    images = []
    timestamps = []
    poses_list = []
    bds_list = []
    print("lodading video:")
    with tqdm(position=0) as progress:
        for i, video in enumerate(video_list):
            v_name = video['video_name']

            if 'cam00' in v_name and split == 'train':
                continue

            if 'cam00' not in v_name and split == 'test':
                continue

            pose = poses[i]
            bd = bds[i]

            vids = video['images']
            sizeofimage = len(vids)-1 # 0~n-1
            progress.set_description_str(f'{scene}-{v_name}')
            progress.reset(total=len(vids))
            for im in vids:
                progress.update()
                img = Image.open(im['path'])
                idx = im['idx']
                images.append(np.array(img))
                timestamps.append(idx/sizeofimage)
                poses_list.append(pose)
                bds_list.append(bd)
            progress.refresh()
        
    return images, poses_list, timestamps, bds_list

In [4]:
images, poses, timestamps, bds = _load_data_from_json(data_path, factor=4, split='train')

lodading video:


coffee_martini-cam20: 100%|██████████| 300/300 [00:02<00:00, 128.11it/s]


In [1]:
from examples.datasets.dnerf_3d_video import SubjectLoader
import torch

In [2]:
data_root_fp = '/home/loyot/workspace/Datasets/NeRF/3d_vedio_datasets'
train_dataset_kwargs = {"color_bkgd_aug": "random", "factor": 4}
train_dataset = SubjectLoader(
    subject_id='coffee_martini',
    root_fp=data_root_fp,
    split='train',
    num_rays=8192,
    **train_dataset_kwargs,
)

lodading video:


coffee_martini-cam20: 100%|██████████| 300/300 [00:02<00:00, 129.27it/s]


In [3]:
train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    num_workers=16,
    persistent_workers=True,
    batch_size=None,
    pin_memory=True
)

In [4]:
len(train_dataset.images)

5100

In [5]:
data = next(iter(train_dataloader))

In [8]:
data.keys()

dict_keys(['pixels', 'rays', 'color_bkgd', 'timestamps', 'idx'])

In [None]:
data['idx'].min()

tensor(0)

## Covert all video to images using ffmpeg

In [22]:
data_path_root = '/home/loyot/workspace/Datasets/NeRF/3d_vedio_datasets/'
scenes = ['coffee_martini', 'cook_spinach', 'cut_roasted_beef', 'flame_salmon_1', 'flame_steak', 'sear_steak']
ori_res = (2028, 2704)
dst_res = (int(2704/2), int(2028/2))

def exc_fn(video_list):
    for video_path in video_list:
        print("start processing video:", video_path)
        out, _ = (
            ffmpeg
            .input(video_path)
            .output('pipe:', format='rawvideo', pix_fmt='rgb24', loglevel="quiet")
            .global_args('-hide_banner')
            .run(capture_stdout=True)
        )

        video = (
            np
            .frombuffer(out, np.uint8)
            .reshape([-1, 2028, 2704, 3])
        )

        basename = os.path.basename(video_path).split('.')[0]
        root = os.path.join(data_path, f"images/{basename}")
        os.makedirs(root, exist_ok=True)
        print("start saving images")
        for idx in tqdm.tqdm(range(video.shape[0])):
            img0 = Image.fromarray(video[idx]).resize(dst_res)
            img0.save(os.path.join(data_path, root, f'{idx}.png'))

        del out
        del video

p_list = []
for scene in scenes:
    data_path = os.path.join(data_path_root, scene)
    video_list = glob.glob(os.path.join(data_path, '*.mp4'))
    # exc_fn(video_list)
    p = Process(target=exc_fn, args=(video_list,))
    p_list.append(p)
    p.start()

for p in p_list:
    p.join()

## HyberNeRF

In [20]:
from examples.datasets.hypernerf import Load_hyper_data
import numpy as np

In [57]:
root = "/home/loyot/workspace/Datasets/NeRF/HyberNeRF/vrig_broom/"

In [24]:
import glob

In [26]:
data_list = glob.glob("/home/loyot/workspace/Datasets/NeRF/HyberNeRF/*")

In [27]:
data_list = [da.split('/')[-1] for da in data_list]

In [30]:
data_list

['interp_aleks-teapot',
 'interp_chickchicken',
 'interp_cut-lemon',
 'interp_hand',
 'interp_slice-banana',
 'interp_torchocolate',
 'misc_americano',
 'misc_cross-hands',
 'misc_espresso',
 'misc_keyboard',
 'misc_oven-mitts',
 'misc_split-cookie',
 'misc_tamping',
 'vrig_3dprinter',
 'vrig_broom',
 'vrig_chicken',
 'vrig_peel-banana',
 'zip_files']

In [59]:
hyper_data = Load_hyper_data(datadir=root+'broom', add_cam=True)

self.i_train [0, 3, 4, 7, 8, 11, 12, 15, 16, 19, 20, 23, 24, 27, 28, 31, 32, 35, 36, 39, 40, 43, 44, 47, 48, 51, 52, 55, 56, 59, 60, 63, 64, 67, 68, 71, 72, 75, 76, 79, 80, 83, 84, 87, 88, 91, 92, 95, 96, 99, 100, 103, 104, 107, 108, 111, 112, 115, 116, 119, 120, 123, 124, 127, 128, 131, 132, 135, 136, 139, 140, 143, 144, 147, 148, 151, 152, 155, 156, 159, 160, 163, 164, 167, 168, 171, 172, 175, 176, 179, 180, 183, 184, 187, 188, 191, 192, 195, 196, 199, 200, 203, 204, 207, 208, 211, 212, 215, 216, 219, 220, 223, 224, 227, 228, 231, 232, 235, 236, 239, 240, 243, 244, 247, 248, 251, 252, 255, 256, 259, 260, 263, 264, 267, 268, 271, 272, 275, 276, 279, 280, 283, 284, 287, 288, 291, 292, 295, 296, 299, 300, 303, 304, 307, 308, 311, 312, 315, 316, 319, 320, 323, 324, 327, 328, 331, 332, 335, 336, 339, 340, 343, 344, 347, 348, 351, 352, 355, 356, 359, 360, 363, 364, 367, 368, 371, 372, 375, 376, 379, 380, 383, 384, 387, 388, 391, 392]
self.i_test [1, 2, 5, 6, 9, 10, 13, 14, 17, 18, 21, 22, 

In [11]:
np.concatenate([hyper_data.all_cam_params[0].orientation, hyper_data.all_cam_params[0].position[:, None]], axis=-1)

array([[ 0.9971445 , -0.07056575,  0.02689415],
       [ 0.07406207,  0.9833857 , -0.16573292],
       [-0.01475226,  0.16725151,  0.9858039 ]], dtype=float32)

In [12]:
hyper_data.all_cam_params[0].position

array([ 0.00865268, -0.00921293, -0.70470877])

In [23]:
np.concatenate([hyper_data.all_cam_params[0].orientation, hyper_data.all_cam_params[0].position[:, None]], axis=-1)

array([[ 0.99714452, -0.07056575,  0.02689415,  0.00865268],
       [ 0.07406207,  0.98338568, -0.16573292, -0.00921293],
       [-0.01475226,  0.16725151,  0.9858039 , -0.70470877]])

In [19]:
hyper_data.all_cam_params[hyper_data.i_test]

TypeError: only integer scalar arrays can be converted to a scalar index

In [None]:
for i in hyper_data.i_test:
    

In [74]:
hyper_data.all_cam_params[5].tangential_distortion

array([-0.00263565, -0.00133319], dtype=float32)

In [84]:
hyper_data.all_cam_params[5].radial_distortion

array([ 0.07964308, -0.15445076,  0.        ], dtype=float32)

In [103]:
hyper_data.all_cam_params[5].principal_point_x

268.0

In [107]:
np.concatenate([hyper_data.all_cam_params[5].radial_distortion, hyper_data.all_cam_params[5].tangential_distortion]).shape

(5,)

In [119]:
a = torch.eye(3)[None, ...].repeat(10, 1, 1)

In [125]:
torch.randint(0, 1, (10,))

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [126]:
a = torch.eye(3)

In [129]:
torch.stack([a, a]).shape

torch.Size([2, 3, 3])

In [130]:
16384 * 2

32768