In [1]:
import sys
sys.path.append('../../')

In [2]:
import numpy as np
from lib.utils.builder import build
from lib.utils.utils import cfg_from_file
from lib.data.datasets.nuscene_dataset import cam_relative_pose_nusc

BASEPATH= "/home/yxliu/multi_cam/monodepth" #Change this
NUSCPATH = "/data/nuscene"

In [3]:
from easydict import EasyDict as edict
import os
train_dataset = edict(
    name = "lib.data.datasets.nuscene_dataset.NusceneDepthMonoDataset",
    frame_idxs = [0, 1, -1],
    is_motion_mask = False,
    is_precompute_flow = False,
    is_filter_static = True,
    nuscenes_dir = NUSCPATH,
    split_file = os.path.join(BASEPATH, 'meta_data', 'nusc_trainsub', 'nusc_train.txt'),   
    channels = ['CAM_FRONT'],
    augmentation = edict(
    name='lib.utils.builder.Sequential',
    cfg_list=[
        edict(name='lib.data.augmentations.augmentations.ConvertToFloat'),
        edict(name='lib.data.augmentations.augmentations.Resize', size=(288, 384), preserve_aspect_ratio=True, force_pad=True),
        edict(name='lib.data.augmentations.augmentations.Normalize', mean=np.array([0.485, 0.456, 0.406]), stds=np.array([0.229, 0.224, 0.225]),),
        edict(name='lib.data.augmentations.augmentations.ConvertToTensor'),
    ],
    image_keys=[('image', 0)], 
    calib_keys=['P2']
    )
)

In [4]:
train_dataset.__str__()

"{'name': 'lib.data.datasets.nuscene_dataset.NusceneDepthMonoDataset', 'frame_idxs': [0, 1, -1], 'is_motion_mask': False, 'is_precompute_flow': False, 'is_filter_static': True, 'nuscenes_dir': '/data/nuscene', 'split_file': '/home/yxliu/multi_cam/monodepth/meta_data/nusc_trainsub/nusc_train.txt', 'channels': ['CAM_FRONT'], 'augmentation': {'name': 'lib.utils.builder.Sequential', 'cfg_list': [{'name': 'lib.data.augmentations.augmentations.ConvertToFloat'}, {'name': 'lib.data.augmentations.augmentations.Resize', 'size': [288, 384], 'preserve_aspect_ratio': True, 'force_pad': True}, {'name': 'lib.data.augmentations.augmentations.Normalize', 'mean': array([0.485, 0.456, 0.406]), 'stds': array([0.229, 0.224, 0.225])}, {'name': 'lib.data.augmentations.augmentations.ConvertToTensor'}], 'image_keys': [('image', 0)], 'calib_keys': ['P2']}}"

In [5]:
output_dict={}
output_dict['cfg'] = train_dataset.__str__()
output_dict['samples'] = []


In [6]:
dataset = build(**train_dataset)

FileNotFoundError: [Errno 2] No such file or directory: '/home/yxliu/multi_cam/monodepth/meta_data/nusc_trainsub/nusc_train.txt'

In [7]:
import tqdm
for i in tqdm.tqdm(range(len(dataset)), dynamic_ncols=True):
    token_index       = i // len(dataset.cameras)
    camera_type_index = i % len(dataset.cameras)
    camera_type       = dataset.cameras[camera_type_index]
    
    sample_tokens = dataset.token_list[token_index]
    samples        = list(map(dataset.nusc_get_sample, sample_tokens))
    camera_datas   = list(map(dataset.nusc_get_sample_data, [sample['data'][camera_type] for sample in samples]))
    cs_records     = list(map(dataset.nusc_get_sensor, [camera_data['calibrated_sensor_token'] for camera_data in camera_datas]))
    ego_records    = list(map(dataset.nusc_get_ego_pose, [camera_data['ego_pose_token'] for camera_data in camera_datas]))

    image_dirs = [os.path.join(dataset.nuscenes_dir, camera_data['filename']) for camera_data in camera_datas]
    P2 = dataset.get_intrinsic(cs_records[0])
    extrinsics = list(map(dataset.get_extrinsic, cs_records)) #[T] 4 x 4 x 3
    poses      = list(map(dataset.get_ego_pose, ego_records)) #[T] 4 x 4 x 3
    relative_pose01 = cam_relative_pose_nusc(
               poses[0], poses[1], np.linalg.inv(extrinsics[0])
            ).astype(np.float32)
    relative_pose02 = cam_relative_pose_nusc(
               poses[0], poses[2], np.linalg.inv(extrinsics[0])
            ).astype(np.float32)
    translation01 = np.linalg.norm(relative_pose01[0:3, 3])
    translation02 = np.linalg.norm(relative_pose02[0:3, 3])
    is_static = False
    if translation01 < dataset.filter_threshold or translation01 > 3:
        is_static = True
    if translation02 < dataset.filter_threshold or translation02 > 3:
        is_static = True
    if not is_static:
        data = dict()
        data['frame0'] = image_dirs[0]
        data['frame1'] = image_dirs[1]
        data['frame-1'] = image_dirs[2]
        data['pose01'] = relative_pose01.reshape(-1).tolist()
        data['pose0-1'] = relative_pose02.reshape(-1).tolist()
        data['P2'] = P2.reshape(-1).tolist()
        data['camera_type_indexes'] = camera_type_index
        data['camera_type'] = camera_type
        output_dict['samples'].append(data)
print(len(output_dict['samples']))

100%|██████████| 18703/18703 [00:02<00:00, 6598.65it/s]

7401





In [8]:
import json
json.dump(output_dict, open('json_nusc_front_train.json', 'w'))

In [9]:
val_dataset = edict(
    name = "lib.data.datasets.nuscene_dataset.NusceneDepthMonoDataset",
    frame_idxs = [0, 1, -1],
    is_motion_mask = False,
    is_precompute_flow = False,
    is_filter_static = True,
    nuscenes_dir = NUSCPATH,
    split_file = os.path.join(BASEPATH, 'meta_data', 'nusc_trainsub', 'nusc_val.txt'),   
    channels = ['CAM_BACK', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_RIGHT', 'CAM_BACK_LEFT', 'CAM_FRONT_LEFT'],
    augmentation = edict(
    name='lib.utils.builder.Sequential',
    cfg_list=[
        edict(name='lib.data.augmentations.augmentations.ConvertToFloat'),
        edict(name='lib.data.augmentations.augmentations.Resize', size=(288, 384), preserve_aspect_ratio=True, force_pad=True),
        edict(name='lib.data.augmentations.augmentations.Normalize', mean=np.array([0.485, 0.456, 0.406]), stds=np.array([0.229, 0.224, 0.225]),),
        edict(name='lib.data.augmentations.augmentations.ConvertToTensor'),
    ],
    image_keys=[('image', 0)], 
    calib_keys=['P2']
    )
)

In [10]:
dataset = build(**val_dataset)

Found 850 in the v1.0-trainval


In [11]:
output_dict={}
output_dict['cfg'] = val_dataset.__str__()
output_dict['samples'] = []
import tqdm
for i in tqdm.tqdm(range(len(dataset)), dynamic_ncols=True):
    token_index       = i // len(dataset.cameras)
    camera_type_index = i % len(dataset.cameras)
    camera_type       = dataset.cameras[camera_type_index]
    
    sample_tokens = dataset.token_list[token_index]
    samples        = list(map(dataset.nusc_get_sample, sample_tokens))
    camera_datas   = list(map(dataset.nusc_get_sample_data, [sample['data'][camera_type] for sample in samples]))
    cs_records     = list(map(dataset.nusc_get_sensor, [camera_data['calibrated_sensor_token'] for camera_data in camera_datas]))
    ego_records    = list(map(dataset.nusc_get_ego_pose, [camera_data['ego_pose_token'] for camera_data in camera_datas]))

    image_dirs = [os.path.join(dataset.nuscenes_dir, camera_data['filename']) for camera_data in camera_datas]
    P2 = dataset.get_intrinsic(cs_records[0])
    extrinsics = list(map(dataset.get_extrinsic, cs_records)) #[T] 4 x 4 x 3
    poses      = list(map(dataset.get_ego_pose, ego_records)) #[T] 4 x 4 x 3
    relative_pose01 = cam_relative_pose_nusc(
               poses[0], poses[1], np.linalg.inv(extrinsics[0])
            ).astype(np.float32)
    relative_pose02 = cam_relative_pose_nusc(
               poses[0], poses[2], np.linalg.inv(extrinsics[0])
            ).astype(np.float32)
    data = dict()
    data['frame0'] = image_dirs[0]
    data['frame1'] = image_dirs[1]
    data['frame-1'] = image_dirs[2]
    data['pose01'] = relative_pose01.reshape(-1).tolist()
    data['pose0-1'] = relative_pose02.reshape(-1).tolist()
    data['P2'] = P2.reshape(-1).tolist()
    data['camera_type_indexes'] = camera_type_index
    data['camera_type'] = camera_type
    output_dict['samples'].append(data)
print(len(output_dict['samples']))
json.dump(output_dict, open('json_nusc_all_val.json', 'w'))

100%|██████████| 25362/25362 [00:03<00:00, 6834.86it/s]


25362


In [12]:
train_sweep_dataset = edict(
    name = "lib.data.datasets.nuscene_dataset.NusceneSweepDepthMonoDataset",
    frame_idxs = [0, 1, -1],
    is_motion_mask = False,
    is_precompute_flow = False,
    is_filter_static = True,
    nuscenes_dir = NUSCPATH,
    split_file = os.path.join(BASEPATH, 'meta_data', 'nusc_trainsub', 'nusc_train.txt'),   
    channels = ['CAM_FRONT_RIGHT', 'CAM_BACK_RIGHT', 'CAM_BACK_LEFT', 'CAM_FRONT_LEFT'],
    augmentation = edict(
    name='lib.utils.builder.Sequential',
    cfg_list=[
        edict(name='lib.data.augmentations.augmentations.ConvertToFloat'),
        edict(name='lib.data.augmentations.augmentations.Resize', size=(288, 384), preserve_aspect_ratio=True, force_pad=True),
        edict(name='lib.data.augmentations.augmentations.Normalize', mean=np.array([0.485, 0.456, 0.406]), stds=np.array([0.229, 0.224, 0.225]),),
        edict(name='lib.data.augmentations.augmentations.ConvertToTensor'),
    ],
    image_keys=[('image', 0)], 
    calib_keys=['P2']
    )
)

In [13]:
dataset = build(**train_sweep_dataset)

Found 850 in the v1.0-trainval


In [14]:
output_dict={}
output_dict['cfg'] = train_sweep_dataset.__str__()
output_dict['samples'] = []
import tqdm
for i in tqdm.tqdm(range(len(dataset)), dynamic_ncols=True):
    token_index       = i // len(dataset.cameras)
    camera_type_index = i % len(dataset.cameras)
    camera_type       = dataset.cameras[camera_type_index]
    
    sample_tokens = dataset.token_list[token_index]
    main_token     = sample_tokens[0] # center sample data
    main_sample    = dataset.nusc_get_sample(main_token)
    main_camera_instance = dataset.nusc_get_sample_data(main_sample['data'][camera_type])
    camera_datas = [main_camera_instance]

    for frame_id in dataset.frame_ids[1:]:
        next_key = 'next' if frame_id > 0 else 'prev'
        tmp_camera_instance = main_camera_instance
        for _ in range(abs(frame_id)):
            tmp_camera_instance = dataset.nusc_get_sample_data(tmp_camera_instance[next_key])
        camera_datas.append(tmp_camera_instance)
    cs_records     = list(map(dataset.nusc_get_sensor, [camera_data['calibrated_sensor_token'] for camera_data in camera_datas]))
    ego_records    = list(map(dataset.nusc_get_ego_pose, [camera_data['ego_pose_token'] for camera_data in camera_datas]))

    image_dirs = [os.path.join(dataset.nuscenes_dir, camera_data['filename']) for camera_data in camera_datas]
    P2 = dataset.get_intrinsic(cs_records[0])
    extrinsics = list(map(dataset.get_extrinsic, cs_records)) #[T] 4 x 4 x 3
    poses      = list(map(dataset.get_ego_pose, ego_records)) #[T] 4 x 4 x 3
    relative_pose01 = cam_relative_pose_nusc(
               poses[0], poses[1], np.linalg.inv(extrinsics[0])
            ).astype(np.float32)
    relative_pose02 = cam_relative_pose_nusc(
               poses[0], poses[2], np.linalg.inv(extrinsics[0])
            ).astype(np.float32)
    translation01 = np.linalg.norm(relative_pose01[0:3, 3])
    translation02 = np.linalg.norm(relative_pose02[0:3, 3])
    is_static = False
    if translation01 < dataset.filter_threshold or translation01 > 3:
        is_static = True
    if translation02 < dataset.filter_threshold or translation02 > 3:
        is_static = True
    if not is_static:
        data = dict()
        data['frame0'] = image_dirs[0]
        data['frame1'] = image_dirs[1]
        data['frame-1'] = image_dirs[2]
        data['pose01'] = relative_pose01.reshape(-1).tolist()
        data['pose0-1'] = relative_pose02.reshape(-1).tolist()
        data['P2'] = P2.reshape(-1).tolist()
        data['camera_type_indexes'] = camera_type_index
        data['camera_type'] = camera_type
        output_dict['samples'].append(data)
print(len(output_dict['samples']))
json.dump(output_dict, open('json_nusc_sweep_train.json', 'w'))

100%|██████████| 74812/74812 [00:13<00:00, 5634.78it/s]


60411
