In [1]:
%matplotlib inline
from nuscenes.nuscenes import NuScenes
import numpy as np
import os
import shutil
import json

In [2]:
MAX_SCENES = 2
DATA_VER = 'v1.0-mini'
DATA_ROOT = '/users/bangya/projects/vlm/nuscenes-data'
OUTPUT_ROOT = './structured-data'
SENSOR = 'CAM_FRONT'
POSTFIX = 'jpg'

"""
structured-data
 - scene_token
    - sample_token
      SENSOR_raw.jpg
      SENSOR_box.jpg
      SENSOR_meta.json
"""

os.makedirs(OUTPUT_ROOT, exist_ok=True)
nusc = NuScenes(version=DATA_VER, dataroot=DATA_ROOT, verbose=True)

Loading NuScenes tables for version v1.0-mini...
23 category,
8 attribute,
4 visibility,
911 instance,
12 sensor,
120 calibrated_sensor,
31206 ego_pose,
8 log,
10 scene,
404 sample,
31206 sample_data,
18538 sample_annotation,
4 map,
Done loading in 0.236 seconds.
Reverse indexing ...
Done reverse indexing in 0.0 seconds.


In [3]:
def corners_8_to_2(corners_8, W, H):
    left_x = np.min(corners_8[0, :])
    right_x = np.max(corners_8[0, :])
    top_y = np.max(corners_8[1, :])
    bottom_y = np.min(corners_8[1, :])
    
    # clipping
    left_x = np.clip(left_x, 0, W)
    right_x = np.clip(right_x, 0, W)
    top_y = np.clip(top_y, 0, H)
    bottom_y = np.clip(bottom_y, 0, H)

    return np.array([[left_x, right_x], [bottom_y, top_y]])

In [None]:

for sc in nusc.scene[:MAX_SCENES]:
    print(f" > Processing scene {sc['token']} ...")
    sc_dir = os.path.join(OUTPUT_ROOT, sc['token'])
    os.makedirs(sc_dir, exist_ok=True)

    first_frame = nusc.get('sample', sc['first_sample_token'])
    all_frames = []
    all_frames.append(first_frame)

    while True:
        if all_frames[-1]['next'] == '':
            break
        all_frames.append(nusc.get('sample', all_frames[-1]['next']))
    print(f" - Scene {sc['token']} has {len(all_frames)} frames.")

    for frame in all_frames:
        frame_dir = os.path.join(sc_dir, str(frame['timestamp']))
        os.makedirs(frame_dir, exist_ok=True)

        # Copy RGB image
        raw_path = nusc.get_sample_data_path(frame['data'][SENSOR])
        rgb_path = os.path.join(frame_dir, f'{SENSOR}_raw.{POSTFIX}')
        shutil.copy(raw_path, rgb_path)

        # dump RGB image with boxes
        rgb_box_path = os.path.join(frame_dir, f'{SENSOR}_box.{POSTFIX}')
        box_per_annos = nusc.render_sample_data(frame['data'][SENSOR], out_path=rgb_box_path, verbose=False)

        # dump meta data
        anno_path = os.path.join(frame_dir, f'{SENSOR}_meta.json')
        meta = {
            'scene_token': sc['token'],
            'sample_token': frame['token'],
            'sample_data_token': frame['data'][SENSOR],
            'timestamp': frame['timestamp'],
            'image_path': rgb_path,
            'image_box_path': rgb_box_path,
            'image_width': nusc.get('sample_data', frame['data'][SENSOR])['width'],
            'image_height': nusc.get('sample_data', frame['data'][SENSOR])['height'],
            'cam_t': nusc.get('calibrated_sensor', nusc.get('sample_data', frame['data'][SENSOR])['calibrated_sensor_token'])['translation'],
            'cam_r': nusc.get('calibrated_sensor', nusc.get('sample_data', frame['data'][SENSOR])['calibrated_sensor_token'])['rotation'],
            'intrinsic': nusc.get('calibrated_sensor', nusc.get('sample_data', frame['data'][SENSOR])['calibrated_sensor_token'])['camera_intrinsic'],
            'annos' : [],
        }

        annos = []
        for anno_token, details in box_per_annos.items():
            if details is None:
                continue
            anno = nusc.get('sample_annotation', anno_token)
            visibility_token = anno['visibility_token']
            visibility = nusc.get('visibility', visibility_token)['level']
            diag = corners_8_to_2(details['corners'], meta['image_width'], meta['image_height']) 

            annos.append({
                'anno_token': anno_token,
                'category_name': anno['category_name'],
                'box_t': anno['translation'],
                'box_r': anno['rotation'],
                'box_size': anno['size'],
                'visibility': visibility,
                '2d_crop': {
                    'diag': diag.tolist(),
                    'area': float((diag[0, 1] - diag[0, 0]) * (diag[1, 1] - diag[1, 0])),
                },
            })
        meta['annos'] = annos

        with open(anno_path, 'w') as f:
            json.dump(meta, f, indent=2)


 > Processing scene cc8c0bf57f984915a77078b10eb33198 ...
 - Scene cc8c0bf57f984915a77078b10eb33198 has 39 frames.
 > Processing scene fcbccedd61424f1b85dcbf8f897f9754 ...
 - Scene fcbccedd61424f1b85dcbf8f897f9754 has 40 frames.
