## UNION: Unsupervised 3D Object Detection using Appearance-based Pseudo-Classes

![](./figures/figure1-plots/figure1.png)

In [None]:
intermediate_results_root = 'PUT_YOUR_DIRECTORY_HERE'
data_root                 = 'PUT_YOUR_DIRECTORY_HERE'


assert intermediate_results_root!='PUT_YOUR_DIRECTORY_HERE', print('Folder for storing UNION results. Change to directory in your file system!')
assert data_root!='PUT_YOUR_DIRECTORY_HERE', print('Directory to nuScenes dataset. Change to directory in your file system!')

## Settings for notebook

In [None]:
USE_MINI_SPLIT                 = False

VISUALIZE_GROUNDTRUTH          = True

COMPUTE_GROUNDREMOVAL          = True
COMPUTE_SPATIALCLUSTERING      = True
COMPUTE_SCENEFLOW              = True
COMPUTE_APPEARANCEEMBEDDING    = True
COMPUTE_APPEARANCECLUSTERING   = True

VISUALIZE_GROUNDREMOVAL        = True
VISUALIZE_SPATIALCLUSTERING    = True
VISUALIZE_SCENEFLOW            = True
VISUALIZE_APPEARANCEEMBEDDING  = True
VISUALIZE_APPEARANCECLUSTERING = True

first_scene                    = 0
num_of_scenes                  = 850

## Create folder for results


Create folder for intermediate results.
You need less than 1TB memory for storing everything.

In [None]:
import os



if not os.path.exists(intermediate_results_root):
    os.mkdir(intermediate_results_root)
print(intermediate_results_root)

## Detection and mobile classes


The nuScenes dataset has 10 detection classes:


1. Barrier (static)
2. Bicycle (static & dynamic)
3. Bus (static & dynamic)
4. Car (static & dynamic)
5. Construction vehicle (static & dynamic)
6. Motorcycle (static & dynamic)
7. Pedestrian (static & dynamic)
8. Traffic cone (static)
9. Trailer (static & dynamic)
10. Truck (static & dynamic)


The classes `barrier` and `traffic cone` do not have the potential to move.

In [None]:
detection_classes = {'movable_object.barrier': 'barrier',
                     'vehicle.bicycle': 'bicycle',
                     'vehicle.bus.bendy': 'bus',
                     'vehicle.bus.rigid': 'bus',
                     'vehicle.car': 'car',
                     'vehicle.construction': 'construction_vehicle',
                     'vehicle.motorcycle': 'motorcycle',
                     'human.pedestrian.adult': 'pedestrian',
                     'human.pedestrian.child': 'pedestrian',
                     'human.pedestrian.construction_worker': 'pedestrian',
                     'human.pedestrian.police_officer': 'pedestrian',
                     'movable_object.trafficcone': 'traffic_cone',
                     'vehicle.trailer': 'trailer',
                     'vehicle.truck': 'truck',}

mobile_classes = detection_classes.copy()
del mobile_classes['movable_object.barrier']
del mobile_classes['movable_object.trafficcone']

## Create nuScenes object and fill scenes list

- `sample_record = nusc.get('sample', sample_token)`
- `sensor_data_record = nusc.get('sample_data', sample_sensor_token)`
- `sensor_egopose_record = nusc.get('ego_pose', sensor_egopose_token)` 
- `sensor_pose_record = nusc.get('calibrated_sensor', sensor_pose_token)`
- `annot_record = nusc.get('sample_annotation', annot)`

In [None]:
import numpy as np
from nuscenes.nuscenes import NuScenes
from nuscenes.utils.splits import train, val
from utils.utils_functions import get_scene_information



nuscenes_version = 'v1.0-trainval' if not USE_MINI_SPLIT else 'v1.0-mini'
nusc             = NuScenes(version=nuscenes_version, dataroot=data_root, verbose=False)


scenes = get_scene_information(nusc)

## Visualize manual annotations (THESE ARE NOT USED DURING TRAINING)


Visualize manual annotations.
Annotations are only shown if (1) class of the instance is mobile (see above), (2) distance is within 50 meters horizontally from LiDAR frame, and (3) annotation has at least 1 LiDAR point.
Points are shown up to 55 meters from LiDAR.
Note: Manual annotations are NOT used for computing pseudo-labels with UNION.

In [None]:
if VISUALIZE_GROUNDTRUTH:
    import ipywidgets
    from utils.utils_visualization import plot_manual_annotations
    
    scene_widget1  = ipywidgets.Dropdown(options=range(len(scenes)))
    sample_widget1 = ipywidgets.Dropdown(options=range(len(scenes[0]['sample_tokens'])))   # Indirectly: range(len(scenes[scene_idx]['sample_tokens']))
    
    def update_sample_indices_range1(*args):
        sample_widget1.options = range(len(scenes[scene_widget1.value]['sample_tokens']))
    scene_widget1.observe(update_sample_indices_range1, 'value')
    
    ipywidgets.interact(lambda scene_idx, sample_idx: plot_manual_annotations(nusc=nusc, scene_idx=scene_idx, sample_idx=sample_idx, scenes=scenes, mobile_classes=mobile_classes, annot_range_thres=50.0, num_lidar_points_thres=1,),
                        scene_idx=scene_widget1,
                        sample_idx=sample_widget1,)

## Component: ground point removal

Make a folder for ground removal results (boolean arrays) and one for ground removal results (Ts_coneplane_lidar).

In [None]:
intermediate_results_groundremoval_dir1 = os.path.join(intermediate_results_root, 'component_groundremoval1')
if not os.path.exists(intermediate_results_groundremoval_dir1):
    os.mkdir(intermediate_results_groundremoval_dir1)
print(intermediate_results_groundremoval_dir1)


intermediate_results_groundremoval_dir2 = os.path.join(intermediate_results_root, 'component_groundremoval2')
if not os.path.exists(intermediate_results_groundremoval_dir2):
    os.mkdir(intermediate_results_groundremoval_dir2)
print(intermediate_results_groundremoval_dir2)

Remove ground points for all sweeps.

In [None]:
groundremoval_hyperparameters = {'Step0__M': 5,   # Unit: 1.
                                 'Step1__xyradius_threshold': 40.00,   # Unit: meters.
                                 'Step1__zmin_threshold': -1.84023-1.00,   # Unit: meters.
                                 'Step1__zmax_threshold': -1.84023+1.00,   # Unit: meters.
                                 'Step2__min_sample_points': 250,   # Unit: 1.
                                 'Step2__residual_threshold': 0.10,   # Unit: meters.
                                 'Step2__max_trials': 20,   # Unit: 1.
                                 'Step3__dmax_thres': 0.30,   # Unit: meters.
                                 'Step3__num_cones': 8,   # Unit: 1.
                                 'Step3__min_number_cone_points': 500,   # Unit: 1.
                                 'Step3__min_sample_points': 250,   # Unit: 1.
                                 'Step3__residual_threshold': 0.05,   # Unit: meters.
                                 'Step3__max_trials': 20,}   # Unit: 1.


if COMPUTE_GROUNDREMOVAL:
    from components.component_groundremoval import main__ground_point_removal
    main__ground_point_removal(nusc=nusc, scenes=scenes, hyperparameters=groundremoval_hyperparameters, intermediate_results_groundremoval_dir1=intermediate_results_groundremoval_dir1, intermediate_results_groundremoval_dir2=intermediate_results_groundremoval_dir2, first_scene=first_scene, num_of_scenes=num_of_scenes if not USE_MINI_SPLIT else 10,)

Visualize the ground removal results.
Points are shown up to 50 meters from LiDAR.

In [None]:
if VISUALIZE_GROUNDREMOVAL:
    import ipywidgets
    from utils.utils_visualization import plot_ground_segmented_sweep
    
    scene_widget2 = ipywidgets.Dropdown(options=range(len(scenes)))
    sweep_widget2 = ipywidgets.Dropdown(options=range(len(scenes[0]['sweep_lidar_tokens'])))   # Indirectly: range(len(scenes[scene_idx]['sweep_lidar_tokens']))
    
    def update_sweep_indices_range2(*args):
        sweep_widget2.options = range(len(scenes[scene_widget2.value]['sweep_lidar_tokens']))
    scene_widget2.observe(update_sweep_indices_range2, 'value')
    
    ipywidgets.interact(lambda scene_idx, sweep_idx: plot_ground_segmented_sweep(nusc=nusc, scenes=scenes, intermediate_results_groundremoval_dir1=intermediate_results_groundremoval_dir1, scene_idx=scene_idx, sweep_idx=sweep_idx,),
                        scene_idx=scene_widget2,
                        sweep_idx=sweep_widget2,)

## Component: spatial clusterting

Make a folder for spatial clustering results (cluster_dict).

In [None]:
intermediate_results_spatialclustering_dir = os.path.join(intermediate_results_root, 'component_spatialclustering')
if not os.path.exists(intermediate_results_spatialclustering_dir):
    os.mkdir(intermediate_results_spatialclustering_dir)
print(intermediate_results_spatialclustering_dir)

Cluster the points for all samples.

In [None]:
spatialclustering_hyperparameters = {'Step0__M': 7,   # Unit: 1.
                                     'Step1__sky_threshold': 4.00,   # Unit: meters.
                                     'Step1__range_threshold': 55.00,   # Unit: meters.
                                     'Step1__x_range_threshold': None,   # Unit: meters.
                                     'Step1__y_range_threshold': None,   # Unit: meters.
                                     'Step2__clustersize_threshold': 16,   # Unit: 1.
                                     'Step2__cluster_selection_epsilon': 0.50,   # Unit: meters.
                                     'Step3__num_cones': groundremoval_hyperparameters['Step3__num_cones'],   # Unit: 1.
                                     'Step4__length_max_threshold': 20.00,   # Unit: meters.
                                     'Step4__width_max_threshold': 6.00,   # Unit: meters.
                                     'Step4__height_min_threshold': 0.25,   # Unit: meters.
                                     'Step4__height_above_ground_max_threshold': 0.75,   # Unit: meters.
                                     'Step4__length_width_max_ratio_threshold': 8,   # Unit: 1.
                                     'Step4__area_min_threshold': 0.25,}   # Unit: square meters.


if COMPUTE_SPATIALCLUSTERING:
    from components.component_spatialclustering import main__spatial_clustering
    main__spatial_clustering(nusc=nusc, scenes=scenes, hyperparameters=spatialclustering_hyperparameters, intermediate_results_groundremoval_dir1=intermediate_results_groundremoval_dir1, intermediate_results_groundremoval_dir2=intermediate_results_groundremoval_dir2, intermediate_results_spatialclustering_dir=intermediate_results_spatialclustering_dir, first_scene=first_scene, num_of_scenes=num_of_scenes if not USE_MINI_SPLIT else 10,)

Visualize the spatial clustering results, i.e. fitted spatial clusters and fitted bounding boxes.

In [None]:
if VISUALIZE_SPATIALCLUSTERING:
    import ipywidgets
    from utils.utils_visualization import plot_spatial_clusters
    
    scene_widget3  = ipywidgets.Dropdown(options=range(len(scenes)))
    sample_widget3 = ipywidgets.Dropdown(options=range(len(scenes[0]['sample_tokens'])))   # Indirectly: range(len(scenes[scene_idx]['sample_tokens']))
    dense_widget3  = ipywidgets.Dropdown(options=[False,True])
    bbox_widget3   = ipywidgets.Dropdown(options=[False,True])
    map_widget3    = ipywidgets.Dropdown(options=[False,True])
    
    def update_sample_indices_range3(*args):
        sample_widget3.options = range(len(scenes[scene_widget3.value]['sample_tokens']))
    scene_widget3.observe(update_sample_indices_range3, 'value')
    
    ipywidgets.interact(lambda scene_idx, sample_idx, dense, bbox: plot_spatial_clusters(nusc=nusc, scenes=scenes, M=spatialclustering_hyperparameters['Step0__M'], intermediate_results_spatialclustering_dir=intermediate_results_spatialclustering_dir, scene_idx=scene_idx, sample_idx=sample_idx, dense=dense, bbox=bbox,),
                        scene_idx=scene_widget3,
                        sample_idx=sample_widget3,
                        dense=dense_widget3,
                        bbox=bbox_widget3,)

## Component: scene flow estimation

Make a folder for scene flow estimation results.

In [None]:
intermediate_results_sceneflow_dir = os.path.join(intermediate_results_root, 'component_sceneflow')
if not os.path.exists(intermediate_results_sceneflow_dir):
    os.mkdir(intermediate_results_sceneflow_dir)
print(intermediate_results_sceneflow_dir)

Compute velocity for all clusters.

In [None]:
sceneflow_hyperparameters = {'Step0__M': spatialclustering_hyperparameters['Step0__M'],   # Unit: 1.
                             'Step0__T': 5,   # Unit: 1.
                             'Step1__bottom_drop_thres': 0.25,   # Unit: meters.
                             'Step1__top_drop_thres': 2.50,   # Unit: meters.
                             'Step1__min_points_per_pc_thres': 16,   # Unit: 1.
                             'Step1__search_size': 4.00,   # Unit: meters.
                             'Step1__search_step': 0.10,   # Unit: meters.
                             'Step1__max_icp_iterations': 10,   # Unit: 1.
                             'Step1__max_dist_inlier_thres': 0.30,   # Unit: meters.
                             'Step1__max_pc_size': 800,   # Unit: 1.
                             'Step2__max_dist_inlier_thres': 0.30,   # Unit: meters.
                             'Step3__search_size': 20.00,   # Unit: meters.
                             'Step3__search_step': 0.10,   # Unit: meters.
                             'Step3__max_icp_iterations': 10,   # Unit: 1.
                             'Step3__max_dist_inlier_thres': 0.30,   # Unit: meters.
                             'Step4__num_cones': groundremoval_hyperparameters['Step3__num_cones'],   # Unit: 1.
                             'Step4__lidar_frequency': 20,}   # Unit: Hertz.


if COMPUTE_SCENEFLOW:
    from components.component_sceneflow import main__scene_flow
    main__scene_flow(nusc=nusc, scenes=scenes, hyperparameters=sceneflow_hyperparameters, intermediate_results_groundremoval_dir2=intermediate_results_groundremoval_dir2, intermediate_results_spatialclustering_dir=intermediate_results_spatialclustering_dir, intermediate_results_sceneflow_dir=intermediate_results_sceneflow_dir, first_scene=first_scene, num_of_scenes=num_of_scenes if not USE_MINI_SPLIT else 10,)

Visualize the motion status, i.e. dynamic object proposals in red.
Points are shown up to 50 meters from LiDAR.

In [None]:
if VISUALIZE_SCENEFLOW:
    import ipywidgets
    from utils.utils_visualization import plot_motion_status
    
    scene_widget4  = ipywidgets.Dropdown(options=range(len(scenes)))
    sample_widget4 = ipywidgets.Dropdown(options=range(len(scenes[0]['sample_tokens'])))   # Indirectly: range(len(scenes[scene_idx]['sample_tokens']))
    dense_widget4  = ipywidgets.Dropdown(options=[False,True])
    bbox_widget4   = ipywidgets.Dropdown(options=[False,True])
    label_widget4  = ipywidgets.Dropdown(options=[False,True])
    
    def update_sample_indices_range4(*args):
        sample_widget4.options = range(len(scenes[scene_widget4.value]['sample_tokens']))
    scene_widget4.observe(update_sample_indices_range4, 'value')
    
    ipywidgets.interact(lambda scene_idx, sample_idx, dense, bbox, label: plot_motion_status(nusc=nusc, scenes=scenes, M=spatialclustering_hyperparameters['Step0__M'], intermediate_results_sceneflow_dir=intermediate_results_sceneflow_dir, scene_idx=scene_idx, sample_idx=sample_idx, dense=dense, bbox=bbox, label=label,),
                        scene_idx=scene_widget4,
                        sample_idx=sample_widget4,
                        dense=dense_widget4,
                        bbox=bbox_widget4,
                        label=label_widget4,)

## Component: appearance embedding (including unsupervised encoding)

Make a folder for feature map results.

In [None]:
intermediate_results_appearanceembedding_dir = os.path.join(intermediate_results_root, 'component_appearanceembedding_dinov2-vitl14-reg')
if not os.path.exists(intermediate_results_appearanceembedding_dir):
    os.mkdir(intermediate_results_appearanceembedding_dir)
print(intermediate_results_appearanceembedding_dir)

Compute appearance embedding for all clusters.

In [None]:
appearanceembedding_hyperparameters = {'Step0__stride': 14,}   # Unit:1.


if COMPUTE_APPEARANCEEMBEDDING:
    from components.component_appearanceembedding import main__appearance_embedding
    main__appearance_embedding(nusc=nusc, scenes=scenes, hyperparameters=appearanceembedding_hyperparameters, intermediate_results_spatialclustering_dir=intermediate_results_spatialclustering_dir, intermediate_results_appearanceembedding_dir=intermediate_results_appearanceembedding_dir, first_scene=first_scene, num_of_scenes=num_of_scenes if not USE_MINI_SPLIT else 10,)

Visualize the appearance embedding results by coloring the clusters reddish based on cosine similarity with a reference cluster (indicated in blue).
Points are shown up to 50 meters from LiDAR.

In [None]:
if VISUALIZE_APPEARANCEEMBEDDING:
    import ipywidgets
    from utils.utils_visualization import plot_appearance_similarities
    
    scene_widget5   = ipywidgets.Dropdown(options=range(len(scenes)))
    sample_widget5  = ipywidgets.Dropdown(options=range(len(scenes[0]['sample_tokens'])))   # Indirectly: range(len(scenes[scene_idx]['sample_tokens']))
    dense_widget5   = ipywidgets.Dropdown(options=[False,True])
    bbox_widget5    = ipywidgets.Dropdown(options=[False,True])
    cluster_widget5 = ipywidgets.Dropdown(options=range(100))
    label_widget5   = ipywidgets.Dropdown(options=[True,False])
    
    def update_sample_indices_range5(*args):
        sample_widget5.options = range(len(scenes[scene_widget5.value]['sample_tokens']))
    scene_widget5.observe(update_sample_indices_range5, 'value')
    
    ipywidgets.interact(lambda scene_idx, sample_idx, dense, bbox, cluster_idx, label: plot_appearance_similarities(nusc=nusc, scenes=scenes, M=spatialclustering_hyperparameters['Step0__M'], intermediate_results_spatialclustering_dir=intermediate_results_spatialclustering_dir, intermediate_results_appearanceembedding_dir=intermediate_results_appearanceembedding_dir, scene_idx=scene_idx, sample_idx=sample_idx, dense=dense, bbox=bbox, cluster_idx=cluster_idx, label=label,),
                        scene_idx=scene_widget5,
                        sample_idx=sample_widget5,
                        dense=dense_widget5,
                        bbox=bbox_widget5,
                        cluster_idx=cluster_widget5,
                        label=label_widget5,)

## Component: appearance clustering

Make a folder for appearance clustering results.

In [None]:
intermediate_results_appearanceclustering_dir = os.path.join(intermediate_results_root, 'component_appearanceclustering_dinov2-vitl14-reg')
if not os.path.exists(intermediate_results_appearanceclustering_dir):
    os.mkdir(intermediate_results_appearanceclustering_dir)
print(intermediate_results_appearanceclustering_dir)

Cluster appearance embeddings and obtain mobile objects.

In [None]:
appearanceclustering_hyperparameters = {'Step0__feature_dim': 1024,   # Unit: 1.
                                        'Step1__K__class_agnostic': 20,   # Unit: 1.
                                        'Step1__max_iterations': 5000,   # Unit: 1.
                                        'Step1__num_init': 10,   # Unit: 1.
                                        'Step2__velocity_thres': 0.50,   # Unit: m/s.
                                        'Step2__moving_fraction_thres': 0.050,   # Unit: 1.
                                        'Step4__K__multi_class_list': [5, 10, 15, 20],   # Unit: 1.
                                        'Step4__max_iterations': 5000,   # Unit: 1.
                                        'Step4__num_init': 10,}   # Unit: 1.


if COMPUTE_APPEARANCECLUSTERING:
    from components.component_appearanceclustering import main__appearance_clustering
    main__appearance_clustering(nusc=nusc, scenes=scenes, hyperparameters=appearanceclustering_hyperparameters, intermediate_results_sceneflow_dir=intermediate_results_sceneflow_dir, intermediate_results_appearanceembedding_dir=intermediate_results_appearanceembedding_dir, intermediate_results_appearanceclustering_dir=intermediate_results_appearanceclustering_dir, first_scene=first_scene, num_of_scenes=num_of_scenes if not USE_MINI_SPLIT else 10,)

Visualize velocity fractions per appearance cluster.
Velocity fractions are sorted based on value.
Non-mobile and mobile clusters are indicated in blue and orange, respectively.
This is Figure 4 in the paper.

In [None]:
if VISUALIZE_APPEARANCECLUSTERING:
    from utils.utils_visualization import plot_velocity_fractions
    plot_velocity_fractions(hyperparameters=appearanceclustering_hyperparameters, intermediate_results_appearanceclustering_dir=intermediate_results_appearanceclustering_dir,)

Visualize mobile objects. Points are shown up to 50 meters from LiDAR.

In [None]:
if VISUALIZE_APPEARANCECLUSTERING:
    import ipywidgets
    from utils.utils_visualization import plot_mobile_objects
    
    scene_widget6  = ipywidgets.Dropdown(options=range(len(scenes)))
    sample_widget6 = ipywidgets.Dropdown(options=range(len(scenes[0]['sample_tokens'])))   # Indirectly: range(len(scenes[scene_idx]['sample_tokens']))
    dense_widget6  = ipywidgets.Dropdown(options=[False,True])
    bbox_widget6   = ipywidgets.Dropdown(options=[False,True])
    label_widget6  = ipywidgets.Dropdown(options=[False,True])
    
    def update_sample_indices_range6(*args):
        sample_widget6.options = range(len(scenes[scene_widget6.value]['sample_tokens']))
    scene_widget6.observe(update_sample_indices_range6, 'value')
    
    ipywidgets.interact(lambda scene_idx, sample_idx, dense, bbox, label: plot_mobile_objects(nusc=nusc, scenes=scenes, M=spatialclustering_hyperparameters['Step0__M'], hyperparameters=appearanceclustering_hyperparameters, intermediate_results_sceneflow_dir=intermediate_results_sceneflow_dir, intermediate_results_appearanceclustering_dir=intermediate_results_appearanceclustering_dir, scene_idx=scene_idx, sample_idx=sample_idx, dense=dense, bbox=bbox, label=label,),
                        scene_idx=scene_widget6,
                        sample_idx=sample_widget6,
                        dense=dense_widget6,
                        bbox=bbox_widget6,
                        label=label_widget6,)

Visualize (a) HDBSCAN, (b) Scene Flow, (c) UNION, and (d) Ground Truth bounding boxes for scene-1100.
This is Figure 3 in the paper.

In [None]:
if VISUALIZE_APPEARANCECLUSTERING:
    from utils.utils_visualization import plot_qualitative_example
    plot_qualitative_example(nusc=nusc, scenes=scenes, hyperparameters=appearanceclustering_hyperparameters, intermediate_results_spatialclustering_dir=intermediate_results_spatialclustering_dir, intermediate_results_sceneflow_dir=intermediate_results_sceneflow_dir, intermediate_results_appearanceclustering_dir=intermediate_results_appearanceclustering_dir, USE_MINI_SPLIT=USE_MINI_SPLIT, mobile_classes=mobile_classes, load_reference=True,)