In [1]:
from nuscenes import NuScenes
import open3d as o3d
import numpy as np

In [2]:
dataroot = 'core/data/datasets/nuscenes'
split = 'mini' #mini or trainval
nusc = NuScenes(version='v1.0-'+split, dataroot=dataroot, verbose=True)

Loading NuScenes tables for version v1.0-mini...
Loading nuScenes-lidarseg...
32 category,
8 attribute,
4 visibility,
911 instance,
12 sensor,
120 calibrated_sensor,
31206 ego_pose,
8 log,
10 scene,
404 sample,
31206 sample_data,
18538 sample_annotation,
4 map,
404 lidarseg,
Done loading in 0.548 seconds.
Reverse indexing ...
Done reverse indexing in 0.1 seconds.


# Point Cloud and Semantic Label Extraction

In [3]:
print(len(nusc.sample))

404


In [4]:
print(len(nusc.lidarseg))

404


In [5]:
sample = nusc.sample[0]
sample_data_token = sample['data']['LIDAR_TOP']
print(sample_data_token)

9d9bf11fb0e144c8b446d54a8a00184f


In [6]:
for i,l in enumerate(nusc.lidarseg):
    if l['sample_data_token'] == sample_data_token:
        print(i)
        break

204


In [7]:
labels = np.fromfile(dataroot+'/'+l['filename'], dtype=np.uint8)
print(labels.shape)

(34688,)


In [8]:
data_path = nusc.get_sample_data_path(sample_data_token)
pc = np.fromfile(data_path, dtype=np.float32).reshape((-1,5))[:,:4]

print(pc.shape)

(173440,)


Start with a sample and then need to find associated lidarseg annotations

In [None]:
nusc.get('lidarseg', sample_data_token)

# Dataset

In [3]:
from torchsparse import SparseTensor
from torchsparse.utils import sparse_collate_fn, sparse_quantize

In [11]:
size = len(nusc.sample)

pc_data_files = [None]*size
pc_label_files = [None]*size

for idx, sample in enumerate(nusc.sample):
    sample_data_token = sample['data']['LIDAR_TOP']

    data_path = nusc.get_sample_data_path(sample_data_token)
    label_file = nusc.get('lidarseg', sample_data_token)['filename']

    pc_data_files[idx] = data_path
    pc_label_files[idx] = dataroot + '/' + label_file

In [13]:
np.fromfile(pc_data_files[0], dtype=np.float32).reshape((-1,5))[:,:4].shape

(34688, 4)

In [14]:
np.fromfile(pc_label_files[0], dtype=np.uint8).shape

(34688,)

In [30]:
idx = 1

#Get the point cloud data
lidar_pc = np.fromfile(pc_data_files[idx], dtype=np.float32)
lidar_pc = lidar_pc.reshape((-1,5))[:,:4]
print(lidar_pc.shape)

#Get the segmentation annotations
labels_ = np.fromfile(pc_label_files[idx], dtype=np.uint8)
print(labels_.shape)
print(max(labels_))

(34720, 4)
(34720,)
31


In [32]:
voxel_size = 0.05

pc_ = np.round(lidar_pc / voxel_size)
pc_ -= pc_.min(0, keepdims=1)
print(pc_[0])
print(pc_.shape)

[1312. 2008.   61.   60.]
(34720, 4)


In [33]:
feat_ = lidar_pc
inds, labels, inverse_map = sparse_quantize(pc_, feat_, labels_,
                                                    return_index=True, return_invs=True)

In [34]:
print(inds.shape)
print(labels.shape)
print(inverse_map.shape)

(26409,)
(26409,)
(34720,)


In [35]:
pc = pc_[inds] #Just get the unique voxel coordinates (no duplicates)
feat = feat_[inds]
labels = labels_[inds]

In [36]:
print(pc.shape)
print(feat.shape)
print(labels.shape)

(26409, 4)
(26409, 4)
(26409,)


# Categories

In [59]:
d = {}
for cat in nusc.category:
    d[cat['index']] = cat['name']
    
print(d)

{0: 'noise', 1: 'animal', 2: 'human.pedestrian.adult', 3: 'human.pedestrian.child', 4: 'human.pedestrian.construction_worker', 5: 'human.pedestrian.personal_mobility', 6: 'human.pedestrian.police_officer', 7: 'human.pedestrian.stroller', 8: 'human.pedestrian.wheelchair', 9: 'movable_object.barrier', 10: 'movable_object.debris', 11: 'movable_object.pushable_pullable', 12: 'movable_object.trafficcone', 13: 'static_object.bicycle_rack', 14: 'vehicle.bicycle', 15: 'vehicle.bus.bendy', 16: 'vehicle.bus.rigid', 17: 'vehicle.car', 18: 'vehicle.construction', 19: 'vehicle.emergency.ambulance', 20: 'vehicle.emergency.police', 21: 'vehicle.motorcycle', 22: 'vehicle.trailer', 23: 'vehicle.truck', 24: 'flat.driveable_surface', 25: 'flat.other', 26: 'flat.sidewalk', 27: 'flat.terrain', 28: 'static.manmade', 29: 'static.other', 30: 'static.vegetation', 31: 'vehicle.ego'}


# Time

In [5]:
s1 = nusc.sample[0]
print(s1['prev'])
print(s1['next'])


39586f9d59004284a7114a68825e8eec


In [16]:
for i in range(len(nusc.sample)-1):
    print(nusc.sample[i]['next'] == nusc.sample[i+1]['token'])

True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
False
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
False
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
False
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
False
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
T

# Train vs Val

In [23]:
import nuscenes as nu
from nuscenes.utils import splits

In [42]:
names = sorted(splits.mini_train)
print(names)

['scene-0061', 'scene-0553', 'scene-0655', 'scene-0757', 'scene-0796', 'scene-1077', 'scene-1094', 'scene-1100']


In [44]:
start = 0
data = []
label = []
for scene_name in names:
    for i,scene in enumerate(nusc.scene[start:], start=start):
        if scene_name == scene['name']:
            token = scene['first_sample_token']
            while token != '':
                sample = nusc.get('sample', token)

                sample_data_token = sample['data']['LIDAR_TOP']

                data_path = nusc.get_sample_data_path(sample_data_token)
                label_file = nusc.get('lidarseg', sample_data_token)['filename']

                data.append(data_path)
                label.append(dataroot + '/' + label_file)

                token = sample['next']

            start = i
            break

In [43]:
print(len(data))
print(len(label))

323
323
