In [None]:
%matplotlib inline
from nuscenes.nuscenes import NuScenes
import numpy as np
import os
import shutil
import json
from PIL import Image


In [None]:
MAX_SCENES = -1
DATA_VER = 'v1.0-trainval'
DATA_ROOT = '/users/bangya/projects/vlm/nuscenes-data'
OUTPUT_ROOT = './structured-data'
CROPS_ROOT = './instance_crops'
SENSOR = 'CAM_FRONT'
POSTFIX = 'jpg'

"""
structured-data
 - scene_token
    - sample_token
      SENSOR_raw.jpg
      SENSOR_box.jpg
      SENSOR_meta.json
"""

"""
instance_crops
 - scene_token
    INSTANCE.jpg
"""

os.makedirs(OUTPUT_ROOT, exist_ok=True)
nusc = NuScenes(version=DATA_VER, dataroot=DATA_ROOT, verbose=True)

In [None]:
def corners_8_to_2(corners_8, W, H):
    left_x = np.min(corners_8[0, :])
    right_x = np.max(corners_8[0, :])
    top_y = np.max(corners_8[1, :])
    bottom_y = np.min(corners_8[1, :])
    
    # clipping
    left_x = np.clip(left_x, 0, W)
    right_x = np.clip(right_x, 0, W)
    top_y = np.clip(top_y, 0, H)
    bottom_y = np.clip(bottom_y, 0, H)

    return np.array([[left_x, right_x], [bottom_y, top_y]])

In [None]:

for sc in nusc.scene:
    inst_crops = {} # token: [image_path, 2d_crop_diag, 2d_crop_area, visb]

    print(f" > Processing scene {sc['token']} ...")
    sc_dir = os.path.join(OUTPUT_ROOT, sc['token'])
    # if os.path.exists(sc_dir):
    #     print(f" - Scene {sc['token']} already exists, skip.")
    #     continue
    os.makedirs(sc_dir, exist_ok=True)

    first_frame = nusc.get('sample', sc['first_sample_token'])
    all_frames = []
    all_frames.append(first_frame)

    while True:
        if all_frames[-1]['next'] == '':
            break
        all_frames.append(nusc.get('sample', all_frames[-1]['next']))
    print(f" - Scene {sc['token']} has {len(all_frames)} frames.")

    for frame in all_frames:
        frame_dir = os.path.join(sc_dir, str(frame['timestamp']))
        os.makedirs(frame_dir, exist_ok=True)

        # Copy RGB image
        raw_path = nusc.get_sample_data_path(frame['data'][SENSOR])
        rgb_path = os.path.join(frame_dir, f'{SENSOR}_raw.{POSTFIX}')
        shutil.copy(raw_path, rgb_path)

        # dump RGB image with boxes
        rgb_box_path = os.path.join(frame_dir, f'{SENSOR}_box.{POSTFIX}')
        box_per_annos = nusc.render_sample_data(frame['data'][SENSOR], out_path=rgb_box_path, verbose=False)

        # dump meta data
        anno_path = os.path.join(frame_dir, f'{SENSOR}_meta.json')
        meta = {
            'scene_token': sc['token'],
            'sample_token': frame['token'],
            'sample_data_token': frame['data'][SENSOR],
            'timestamp': frame['timestamp'],
            'image_path': rgb_path,
            'image_box_path': rgb_box_path,
            'image_width': nusc.get('sample_data', frame['data'][SENSOR])['width'],
            'image_height': nusc.get('sample_data', frame['data'][SENSOR])['height'],
            'cam_t': nusc.get('ego_pose', nusc.get('sample_data', frame['data'][SENSOR])['ego_pose_token'])['translation'],
            'cam_r': nusc.get('ego_pose', nusc.get('sample_data', frame['data'][SENSOR])['ego_pose_token'])['rotation'],
            # 'cam_left_t': nusc.get('ego_pose', nusc.get('sample_data', frame['data'][SENSOR+"_LEFT"])['ego_pose_token'])['translation'],
            # 'cam_left_r': nusc.get('ego_pose', nusc.get('sample_data', frame['data'][SENSOR+"_LEFT"])['ego_pose_token'])['rotation'],
            'intrinsic': nusc.get('calibrated_sensor', nusc.get('sample_data', frame['data'][SENSOR])['calibrated_sensor_token'])['camera_intrinsic'],
            'annos' : [],
        }

        annos = []
        for anno_token, details in box_per_annos.items():
            if details is None:
                continue
            anno = nusc.get('sample_annotation', anno_token)
            visibility_token = anno['visibility_token']
            attribute_tokens = anno['attribute_tokens']
            instance_token = anno['instance_token']
            attrs = [nusc.get('attribute', token)['name'] for token in attribute_tokens]
            visibility = nusc.get('visibility', visibility_token)['level']
            diag = corners_8_to_2(details['corners'], meta['image_width'], meta['image_height']) 
            area = float((diag[0, 1] - diag[0, 0]) * (diag[1, 1] - diag[1, 0]))
            annos.append({
                'anno_token': anno_token,
                'instance_token': instance_token,
                'category_name': anno['category_name'],
                'box_t': anno['translation'],
                'box_r': anno['rotation'],
                'box_size': anno['size'],
                'visibility': visibility,
                'attribute': attrs,
                '2d_crop': {
                    'diag': diag.tolist(),
                    'area': area,
                },
            })

            # dump instance crops
            MIN_AREA = 10000
            MIN_VIS = 100
            vis_val = int(visibility.split('-')[-1])
            crop = inst_crops.get(instance_token, None)
            if crop is None or area > crop[2]:
                if area > MIN_AREA and vis_val >= MIN_VIS:
                    inst_crops[instance_token] = [rgb_path, diag, area]

        meta['annos'] = annos

        with open(anno_path, 'w') as f:
            json.dump(meta, f, indent=2)
        
    # dump instance crops
    crop_sc_dir = os.path.join(CROPS_ROOT, sc['token'])
    os.makedirs(crop_sc_dir, exist_ok=True)
    for inst_token, crop in inst_crops.items():
        img_path, diag, area = crop
        img_save_path = os.path.join(crop_sc_dir, f'{inst_token}.{POSTFIX}')
        # read the image
        # crop the image
        # save the image

        img = Image.open(img_path)
        img = img.crop((diag[0, 0], diag[1, 0], diag[0, 1], diag[1, 1]))
        img.save(img_save_path)
