In [19]:
import numpy as np
import imageio
import os
import glob
from tqdm import tqdm
import json
import ffmpeg
from PIL import Image
%matplotlib inline
import matplotlib.pyplot as plt
from multiprocessing import Process
import torch


In [20]:
data_path = '/home/loyot/workspace/Datasets/NeRF/3d_vedio_datasets/coffee_martini'

## load data from video

In [21]:
def _load_data(basedir, factor=None, width=None, height=None, split='train'):
    
    poses_arr = np.load(os.path.join(basedir, 'poses_bounds.npy'))
    poses = poses_arr[:, :-2].reshape([-1, 3, 5]).transpose([1,2,0])
    bds = poses_arr[:, -2:].transpose([1,0])
    
    vid = imageio.get_reader(basedir +'/cam00.mp4',  'ffmpeg')
    img0 = vid.get_data(0)
    sh = img0.shape
    r_w, r_h = sh[1], sh[0]
    r_w, r_h = r_w / factor, r_h / factor
    print("poses size:", poses.shape[-1])
    print("original image shape:", (sh[0], sh[1]))
    print("dst image shape:", (r_w, r_h))

    images = []
    timestamps = []
    video_list = glob.glob(os.path.join(basedir, '*.mp4'))
    print("lodading video:")
    for video in tqdm.tqdm(video_list):
        if 'cam00.mp4' in video and split == 'train':
            continue

        if 'cam00.mp4' not in video and split == 'test':
            continue

        vid = imageio.get_reader(video,  'ffmpeg')
        for i, im in enumerate(vid):
            im = Image.fromarray(im).resize((int(r_w), int(r_h)))
            images.append(im)
            timestamps.append(i)
    
    factor = 1
    
    poses[:2, 4, :] = np.array(sh[:2]).reshape([2, 1])
    poses[2, 4, :] = poses[2, 4, :] * 1./factor

    poses = poses.transpose([2,0,1])
    bds = bds.transpose([1,0])
    
    return poses, bds, images

In [None]:
basedir
factor=1
width=None
height=None
split='train'

In [22]:
np.array([-1.2895256280899048, -0.38866135478019714, -0.2731894850730896, 1.326205849647522, 0.3273159861564636, 0.16015827655792236]) * 2

array([-2.57905126, -0.77732271, -0.54637897,  2.6524117 ,  0.65463197,
        0.32031655])

## load json file

In [3]:
def _load_data_from_json(basedir, factor=1, width=None, height=None, split='train'):
    
    poses_arr = np.load(os.path.join(basedir, 'poses_bounds.npy'))
    poses = poses_arr[:, :-2].reshape([-1, 3, 5]).transpose([1,2,0])
    bds = poses_arr[:, -2:].transpose([1,0])
    
    json_file = os.path.join(basedir, f'images_x{factor}_list.json')
    with open(json_file) as jf:
        json_data = json.load(jf)
    
    r_w = json_data['videos'][0]['images'][0]['weight']
    r_h = json_data['videos'][0]['images'][0]['height']

    video_list = json_data['videos']
    scene = json_data['scene']
    
    poses[:2, 4, :] = np.array([r_h, r_w]).reshape([2, 1])

    poses = poses.transpose([2,0,1])
    bds = bds.transpose([1,0])

    images = []
    timestamps = []
    poses_list = []
    bds_list = []
    print("lodading video:")
    with tqdm(position=0) as progress:
        for i, video in enumerate(video_list):
            v_name = video['video_name']

            if 'cam00' in v_name and split == 'train':
                continue

            if 'cam00' not in v_name and split == 'test':
                continue

            pose = poses[i]
            bd = bds[i]

            vids = video['images']
            sizeofimage = len(vids)-1 # 0~n-1
            progress.set_description_str(f'{scene}-{v_name}')
            progress.reset(total=len(vids))
            for im in vids:
                progress.update()
                img = Image.open(im['path'])
                idx = im['idx']
                images.append(np.array(img))
                timestamps.append(idx/sizeofimage)
                poses_list.append(pose)
                bds_list.append(bd)
            progress.refresh()
        
    return images, poses_list, timestamps, bds_list

In [4]:
images, poses, timestamps, bds = _load_data_from_json(data_path, factor=4, split='train')

lodading video:


coffee_martini-cam20: 100%|██████████| 300/300 [00:02<00:00, 128.11it/s]


In [1]:
from examples.datasets.dnerf_3d_video import SubjectLoader
import torch

In [2]:
data_root_fp = '/home/loyot/workspace/Datasets/NeRF/3d_vedio_datasets'
train_dataset_kwargs = {"color_bkgd_aug": "random", "factor": 4}
train_dataset = SubjectLoader(
    subject_id='coffee_martini',
    root_fp=data_root_fp,
    split='train',
    num_rays=8192,
    **train_dataset_kwargs,
)

lodading video:


coffee_martini-cam20: 100%|██████████| 300/300 [00:02<00:00, 129.27it/s]


In [3]:
train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    num_workers=16,
    persistent_workers=True,
    batch_size=None,
    pin_memory=True
)

In [4]:
len(train_dataset.images)

5100

In [5]:
data = next(iter(train_dataloader))

In [8]:
data.keys()

dict_keys(['pixels', 'rays', 'color_bkgd', 'timestamps', 'idx'])

In [None]:
data['idx'].min()

tensor(0)

## Covert all video to images using ffmpeg

In [2]:
data_path_root = '/home/loyot/workspace/Datasets/NeRF/3d_vedio_datasets/'
scenes = ['coffee_martini', 'cook_spinach', 'cut_roasted_beef', 'flame_salmon_1', 'flame_steak', 'sear_steak']
ori_res = (2028, 2704)
dst_res = (int(2704/2), int(2028/2))

def exc_fn(video_list):
    for video_path in video_list:
        print("start processing video:", video_path)
        out, _ = (
            ffmpeg
            .input(video_path)
            .output('pipe:', format='rawvideo', pix_fmt='rgb24', loglevel="quiet")
            .global_args('-hide_banner')
            .run(capture_stdout=True)
        )

        video = (
            np
            .frombuffer(out, np.uint8)
            .reshape([-1, 2028, 2704, 3])
        )

        basename = os.path.basename(video_path).split('.')[0]
        root = os.path.join(data_path, f"images/{basename}")
        os.makedirs(root, exist_ok=True)
        print("start saving images")
        for idx in tqdm.tqdm(range(video.shape[0])):
            img0 = Image.fromarray(video[idx]).resize(dst_res)
            img0.save(os.path.join(data_path, root, f'{idx}.png'))

        del out
        del video

p_list = []
for scene in scenes:
    data_path = os.path.join(data_path_root, scene)
    video_list = glob.glob(os.path.join(data_path, '*.mp4'))
    # exc_fn(video_list)
    p = Process(target=exc_fn, args=(video_list,))
    p_list.append(p)
    p.start()

for p in p_list:
    p.join()

start processing video: /home/loyot/workspace/Datasets/NeRF/3d_vedio_datasets/coffee_martini/cam14.mp4
start saving images


100%|██████████| 300/300 [01:31<00:00,  3.29it/s]


start processing video: /home/loyot/workspace/Datasets/NeRF/3d_vedio_datasets/coffee_martini/cam00.mp4
start saving images


100%|██████████| 300/300 [01:30<00:00,  3.30it/s]


start processing video: /home/loyot/workspace/Datasets/NeRF/3d_vedio_datasets/coffee_martini/cam08.mp4
start saving images


100%|██████████| 300/300 [01:25<00:00,  3.50it/s]


start processing video: /home/loyot/workspace/Datasets/NeRF/3d_vedio_datasets/coffee_martini/cam11.mp4
start saving images


100%|██████████| 300/300 [01:28<00:00,  3.39it/s]


start processing video: /home/loyot/workspace/Datasets/NeRF/3d_vedio_datasets/coffee_martini/cam07.mp4
start saving images


100%|██████████| 300/300 [01:29<00:00,  3.36it/s]


start processing video: /home/loyot/workspace/Datasets/NeRF/3d_vedio_datasets/coffee_martini/cam18.mp4
start saving images


  7%|▋         | 22/300 [00:06<01:22,  3.37it/s]