## UNION: Unsupervised 3D Object Detection using Appearance-based Pseudo-Classes

In [None]:
intermediate_results_root = 'PUT_YOUR_DIRECTORY_HERE'
data_root                 = 'PUT_YOUR_DIRECTORY_HERE'


assert intermediate_results_root!='PUT_YOUR_DIRECTORY_HERE', print('Folder for storing UNION results. Change to directory in your file system!')
assert data_root!='PUT_YOUR_DIRECTORY_HERE', print('Directory to nuScenes dataset. Change to directory in your file system!')

In [None]:
import os



evaluation_dir = 'evaluation-results'
if not os.path.exists(evaluation_dir):
    os.mkdir(evaluation_dir)

## Create nuScenes object

- `sample_record = nusc.get('sample', sample_token)`
- `sensor_data_record = nusc.get('sample_data', sample_sensor_token)`
- `sensor_egopose_record = nusc.get('ego_pose', sensor_egopose_token)` 
- `sensor_pose_record = nusc.get('calibrated_sensor', sensor_pose_token)`
- `annot_record = nusc.get('sample_annotation', annot)`

In [None]:
import numpy as np
from nuscenes.nuscenes import NuScenes
from nuscenes.utils.splits import train, val
from utils.utils_functions import get_scene_information



nuscenes_version = 'v1.0-trainval'
nusc             = NuScenes(version=nuscenes_version, dataroot=data_root, verbose=False)


scenes = get_scene_information(nusc)

## Evaluate class-agnostic

In [None]:
experiment_names_ca = ['CenterPoint-Pillar0200__second-secfpn-8xb4-cyclic-20e-nus-3d__Class-Agnostic-Training__Labels-GT__UNION-file',   # Class-agnostic train - Ground truth.
                       'CenterPoint-Pillar0200__second-secfpn-8xb4-cyclic-20e-nus-3d__Class-Agnostic-Training__Labels-HDBSCAN__UNION-file',   # Class-agnostic train - HDBSCAN.
                       'CenterPoint-Pillar0200__second-secfpn-8xb4-cyclic-20e-nus-3d__Class-Agnostic-Training__Labels-Scene-Flow__UNION-file',   # Class-agnostic train - Scene flow.
                       'CenterPoint-Pillar0200__second-secfpn-8xb4-cyclic-20e-nus-3d__Class-Agnostic-Training__Labels-UNION__UNION-file',   # Class-agnostic train - UNION.
                      ]

In [None]:
from nuscenes.eval.detection.evaluate import DetectionEval
from nuscenes.eval.common.config import config_factory



for exp in experiment_names_ca:
    # Create evaluate instance.
    results_dir = os.path.join('mmdetection3d', 'work_dirs', exp, 'pred_instances_3d', 'results_nusc.json')
    nusc_eval   = DetectionEval(nusc, config=config_factory('detection_cvpr_2019'), result_path=results_dir, eval_set='val', output_dir=os.path.join(evaluation_dir, exp), verbose=1)
    
    
    # Make it class-agnostic (everything becomes car).
    for sample_token in nusc_eval.gt_boxes.boxes:
        for box in nusc_eval.gt_boxes.boxes[sample_token]:
            if box.detection_name in ['bicycle', 'bus', 'car', 'construction_vehicle', 'motorcycle', 'pedestrian', 'trailer', 'truck']:
                box.detection_name = 'car'
                
                
    # Evaluate.
    nusc_eval.main(plot_examples=10, render_curves=1)

In [None]:
import json



for exp in experiment_names_ca:
    # Load results.
    with open(os.path.join(evaluation_dir, exp, 'metrics_summary.json'), 'r') as file:
        data = json.load(file)
        
        
    # Get results.
    ap  = data['mean_dist_aps']['car']
    ate = data['label_tp_errors']['car']['trans_err']
    ase = data['label_tp_errors']['car']['scale_err']
    aoe = data['label_tp_errors']['car']['orient_err']
    ave = data['label_tp_errors']['car']['vel_err']
    aae = 1.0
    nds = (5*ap+max(1-ate,0)+max(1-ase,0)+max(1-aoe,0)+max(1-ave,0)+max(1-aae,0))/10
    
    
    # Print results.
    print(exp)
    print(f'AP  = {np.round(100*ap,1)}')
    print(f'NDS = {np.round(100*nds,1)}')
    print(f'ATE = {np.round(ate,3)}')
    print(f'ASE = {np.round(ase,3)}')
    print(f'AOE = {np.round(aoe,3)}')
    print(f'AVE = {np.round(ave,3)}')
    print(f'AAE = {np.round(aae,3)}')
    print()

## Evaluate multi-class

In [None]:
experiment_names_sp = ['CenterPoint-Pillar0200__second-secfpn-8xb4-cyclic-20e-nus-3d__Class-Agnostic-Training__Labels-HDBSCAN__UNION-file',   # Class-agnostic train - HDBSCAN.
                       'CenterPoint-Pillar0200__second-secfpn-8xb4-cyclic-20e-nus-3d__Class-Agnostic-Training__Labels-UNION__UNION-file',   # Class-agnostic train - UNION.
                      ]

In [None]:
import copy
import pickle



# Get standard train.
train_file_dir = os.path.join(data_root, 'nuscenes_infos_train.pkl')
with open(train_file_dir, 'rb') as f:
    data_train = pickle.load(f)    
data_array__train = np.array(data_train['data_list']).copy()


# Fill shape dict (all shapes in train).
shape_dict = {idx:[] for idx in range(8)}   # 8 detection classes.
for sample_idx in range(len(data_array__train)):
    instances = data_array__train[sample_idx]['instances']
    
    labels = [instance['bbox_label'] for instance in instances]
    dists  = [np.linalg.norm(instance['bbox_3d'][:2], ord=2) for instance in instances]
    bboxes = [instance['bbox_3d'] for instance in instances]
    
    for label, dist, bbox in zip(labels, dists, bboxes):
        if (label>=0 and label<=4 and dist<=50) or (label>=5 and label<=7 and dist<=40):
            l, w = max(bbox[3], bbox[4]), min(bbox[3], bbox[4])
            shape_dict[label].append((l, w, l*w))
            
            
# Get median values.
median_shape_dict = {}
class_names = list(data_train['metainfo']['categories'].keys())
for label, name in zip(shape_dict.keys(), class_names):
    dims        = np.array(shape_dict[label])
    dims_sorted = dims[np.argsort(dims[:,2]),:]
    median_dims = dims_sorted[len(dims)//2]
    median_shape_dict[name] = median_dims.tolist()
print(median_shape_dict)

In [None]:
from nuscenes.eval.detection.evaluate import DetectionEval
from nuscenes.eval.common.config import config_factory



for exp in experiment_names_sp:
    # Create evaluate instance.
    results_dir = os.path.join('mmdetection3d', 'work_dirs', exp, 'pred_instances_3d', 'results_nusc.json')
    new_exp     = exp.split('Class-Agnostic')[0] + 'Multi-Class-003' + exp.split('Class-Agnostic')[1].split('__UNION-file')[0] + '-plus-SP' + '__UNION-file'
    nusc_eval   = DetectionEval(nusc, config=config_factory('detection_cvpr_2019'), result_path=results_dir, eval_set='val', output_dir=os.path.join(evaluation_dir, new_exp), verbose=1)
    
    
    # Make it multi-class with 3 classes (car, bicycle, pedestrian).
    for sample_token in nusc_eval.gt_boxes.boxes:
        for box in nusc_eval.gt_boxes.boxes[sample_token]:
            if box.detection_name in ['car', 'truck', 'trailer', 'bus', 'construction_vehicle']:
                box.detection_name = 'car'
            elif box.detection_name in ['bicycle', 'motorcycle']:
                box.detection_name = 'bicycle'
            elif box.detection_name in ['pedestrian']:
                box.detection_name = 'pedestrian'
                
                
    # Assign each class-agnostic box a real class based on bounding box size.
    prototype_dims = np.array(list(median_shape_dict.values()))[:,0:2]
    for sample_token in nusc_eval.pred_boxes.boxes:
        for box in nusc_eval.pred_boxes.boxes[sample_token]:
            dims = box.size
            l, w = max(dims[0], dims[1]), min(dims[0], dims[1])
            
            intersects = (l*(prototype_dims[:,0]>=l)+prototype_dims[:,0]*(prototype_dims[:,0]<l)) * (w*(prototype_dims[:,1]>=w)+prototype_dims[:,1]*(prototype_dims[:,1]<w))
            unions     = (prototype_dims[:,0]*(prototype_dims[:,0]>=l)+l*(prototype_dims[:,0]<l)) * (prototype_dims[:,1]*(prototype_dims[:,1]>=w)+w*(prototype_dims[:,1]<w))
            ious       = intersects/unions
            
            idx            = np.argmax(ious)
            assigned_class = list(median_shape_dict.keys())[idx]
            
            if assigned_class in ['car', 'truck', 'trailer', 'bus', 'construction_vehicle']:
                box.detection_name = 'car'
            elif assigned_class in ['bicycle', 'motorcycle']:
                box.detection_name = 'bicycle'
            elif assigned_class in ['pedestrian']:
                box.detection_name = 'pedestrian'
                
                
    # Evaluate.
    nusc_eval.main(plot_examples=10, render_curves=1)

In [None]:
import json



for exp in experiment_names_sp:
    # Load results.
    new_exp = exp.split('Class-Agnostic')[0] + 'Multi-Class-003' + exp.split('Class-Agnostic')[1].split('__UNION-file')[0] + '-plus-SP' + '__UNION-file'
    with open(os.path.join(evaluation_dir, new_exp, 'metrics_summary.json'), 'r') as file:
        data = json.load(file)
        
        
    # Get results.
    ap_car  = data['mean_dist_aps']['car']
    ate_car = data['label_tp_errors']['car']['trans_err']
    ase_car = data['label_tp_errors']['car']['scale_err']
    aoe_car = data['label_tp_errors']['car']['orient_err']
    ave_car = data['label_tp_errors']['car']['vel_err']
    aae_car = 1.0
    
    ap_pedestrian  = data['mean_dist_aps']['pedestrian']
    ate_pedestrian = data['label_tp_errors']['pedestrian']['trans_err']
    ase_pedestrian = data['label_tp_errors']['pedestrian']['scale_err']
    aoe_pedestrian = data['label_tp_errors']['pedestrian']['orient_err']
    ave_pedestrian = data['label_tp_errors']['pedestrian']['vel_err']
    aae_pedestrian = 1.0
    
    ap_bicycle  = data['mean_dist_aps']['bicycle']
    ate_bicycle = data['label_tp_errors']['bicycle']['trans_err']
    ase_bicycle = data['label_tp_errors']['bicycle']['scale_err']
    aoe_bicycle = data['label_tp_errors']['bicycle']['orient_err']
    ave_bicycle = data['label_tp_errors']['bicycle']['vel_err']
    aae_bicycle = 1.0
    
    ap  = (ap_car+ap_pedestrian+ap_bicycle)/3
    ate = (ate_car+ate_pedestrian+ate_bicycle)/3
    ase = (ase_car+ase_pedestrian+ase_bicycle)/3
    aoe = (aoe_car+aoe_pedestrian+aoe_bicycle)/3
    ave = (ave_car+ave_pedestrian+ave_bicycle)/3
    aae = 1.0
    
    nds = (5*ap+max(1-ate,0)+max(1-ase,0)+max(1-aoe,0)+max(1-ave,0)+max(1-aae,0))/10
    
    
    # Print results.
    print(exp)
    print(f'mAP    = {np.round(100*ap,1)}')
    print(f'NDS    = {np.round(100*nds,1)}')
    print(f'AP-car = {np.round(100*ap_car,1)}')
    print(f'AP-ped = {np.round(100*ap_pedestrian,1)}')
    print(f'AP-cyc = {np.round(100*ap_bicycle,1)}')
    print()

In [None]:
experiment_names_mc = ['CenterPoint-Pillar0200__second-secfpn-8xb4-cyclic-20e-nus-3d__Multi-Class-003-Training__Labels-GT__UNION-file',   # Multi-class-003 train - Ground truth.
                       'CenterPoint-Pillar0200__second-secfpn-8xb4-cyclic-20e-nus-3d__Multi-Class-003-Training__Labels-UNION-005pc__UNION-file',   # Multi-class-003 train - UNION-Xpc.
                       'CenterPoint-Pillar0200__second-secfpn-8xb4-cyclic-20e-nus-3d__Multi-Class-003-Training__Labels-UNION-010pc__UNION-file',   # Multi-class-003 train - UNION-Xpc.
                       'CenterPoint-Pillar0200__second-secfpn-8xb4-cyclic-20e-nus-3d__Multi-Class-003-Training__Labels-UNION-015pc__UNION-file',   # Multi-class-003 train - UNION-Xpc.
                       'CenterPoint-Pillar0200__second-secfpn-8xb4-cyclic-20e-nus-3d__Multi-Class-003-Training__Labels-UNION-020pc__UNION-file',   # Multi-class-003 train - UNION-Xpc.
                      ]

In [None]:
import cv2
import matplotlib.pyplot as plt
import torch
from components.component_appearanceembedding import get_transform



appearanceembedding_hyperparameters = {'Step0__stride': 14,}   # Unit:1.


appearanceclustering_hyperparameters = {'Step0__feature_dim': 1024,   # Unit: 1.
                                        'Step1__K__class_agnostic': 20,   # Unit: 1.
                                        'Step1__max_iterations': 5000,   # Unit: 1.
                                        'Step1__num_init': 10,   # Unit: 1.
                                        'Step2__velocity_thres': 0.50,   # Unit: m/s.
                                        'Step2__moving_fraction_thres': 0.050,   # Unit: 1.
                                        'Step4__K__multi_class_list': [5, 10, 15, 20],   # Unit: 1.
                                        'Step4__max_iterations': 5000,   # Unit: 1.
                                        'Step4__num_init': 10,}   # Unit: 1.


example_img_dict = {'vehicle':    {'scene_idx': 509, 'sample_idx': 10, 'sensor': 'CAM_FRONT_LEFT', 'scene_name': 'scene-0655', 'h1': 31, 'h2': 51, 'w1': 28, 'w2':  74, 'embedding': None},
                    'pedestrian': {'scene_idx':  58, 'sample_idx': 17, 'sensor':  'CAM_BACK_LEFT', 'scene_name': 'scene-0061', 'h1': 25, 'h2': 47, 'w1': 98, 'w2': 107, 'embedding': None},
                    'cyclist':    {'scene_idx': 437, 'sample_idx':  0, 'sensor':      'CAM_FRONT', 'scene_name': 'scene-0553', 'h1': 30, 'h2': 42, 'w1': 92, 'w2': 102, 'embedding': None},}


for class_name in list(example_img_dict.keys()):
    print(f'Example for {class_name}:')
    
    sample_token  = scenes[example_img_dict[class_name]['scene_idx']]['sample_tokens'][example_img_dict[class_name]['sample_idx']]
    sample_record = nusc.get('sample', sample_token)

    cam_record = nusc.get('sample_data', sample_record['data'][example_img_dict[class_name]['sensor']])
    img_cam    = cv2.imread(os.path.join(nusc.dataroot, cam_record['filename']))[...,::-1]
    
    stride = appearanceembedding_hyperparameters['Step0__stride']
    plt.imshow(img_cam[stride*example_img_dict[class_name]['h1']:stride*example_img_dict[class_name]['h2'],stride*example_img_dict[class_name]['w1']:stride*example_img_dict[class_name]['w2'],:])
    plt.axis('off')
    plt.show()
    
    
# Define names, get model, and get transform.
sensor_names = ['CAM_FRONT','CAM_FRONT_RIGHT','CAM_BACK_RIGHT','CAM_BACK','CAM_BACK_LEFT','CAM_FRONT_LEFT']
stride       = appearanceembedding_hyperparameters['Step0__stride']

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model  = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitl14_reg').to(device)
model.eval()

transform = get_transform()


# Compute embeddings.
for class_name in list(example_img_dict.keys()):    
    sample_token  = scenes[example_img_dict[class_name]['scene_idx']]['sample_tokens'][example_img_dict[class_name]['sample_idx']]
    sample_record = nusc.get('sample', sample_token)

    cam_record = nusc.get('sample_data', sample_record['data'][example_img_dict[class_name]['sensor']])
    img_cam    = cv2.imread(os.path.join(nusc.dataroot, cam_record['filename']))[...,::-1]
    
    imgs_cams_tensor = transform(img_cam).unsqueeze(dim=0).to(device)
    
    h, w  = stride*(imgs_cams_tensor.shape[2]//stride), stride*(imgs_cams_tensor.shape[3]//stride)
    with torch.no_grad():
        features_cams = model.forward_features(imgs_cams_tensor[...,:h,:w])['x_norm_patchtokens']
        features_cams = features_cams.reshape(1,h//stride,w//stride,-1).cpu()
        
    example_img_dict[class_name]['embedding'] = features_cams[0,example_img_dict[class_name]['h1']:example_img_dict[class_name]['h2'],example_img_dict[class_name]['w1']:example_img_dict[class_name]['w2'],:].reshape(-1,1024).mean(dim=0)
    
    
# Assign pseudo-classes to real classes.
intermediate_results_appearanceclustering_dir = os.path.join(intermediate_results_root, 'component_appearanceclustering_dinov2-vitl14-reg')

moving_fraction_thres = appearanceclustering_hyperparameters['Step2__moving_fraction_thres']

vehicle_prototype    = example_img_dict['vehicle']['embedding']
pedestrian_prototype = example_img_dict['pedestrian']['embedding']
cyclist_prototype    = example_img_dict['cyclist']['embedding']
prototypes           = torch.stack((vehicle_prototype, pedestrian_prototype, cyclist_prototype))
        
assign_dict = {}
for K__multi_class_Xpc in appearanceclustering_hyperparameters['Step4__K__multi_class_list']:
    filename = f'K-means-centers__K-multi-class{str(K__multi_class_Xpc).zfill(3)}_moving-fraction-thres0{str(int(10000*moving_fraction_thres)).zfill(4)}__multi-class.npy'
    centers  = torch.from_numpy(np.load(os.path.join(intermediate_results_appearanceclustering_dir, filename)))
    
    cos_sim     = torch.nn.functional.cosine_similarity(centers.unsqueeze(1), prototypes.unsqueeze(0), dim=2)
    max_indices = cos_sim.argmax(dim=1)
    
    class_mapping  = {0: 'car', 1: 'pedestrian', 2: 'bicycle'}
    assign_dict[K__multi_class_Xpc] = {f'pseudoclass{i:03d}': class_mapping[max_indices[i].item()] for i in range(K__multi_class_Xpc)}

In [None]:
import json
from nuscenes.eval.detection.evaluate import DetectionEval
from nuscenes.eval.common.config import config_factory



for exp in experiment_names_mc:
    # Map pseudo-classes to real classes.
    if 'Labels-GT' not in exp:
        pseudo_results_dir = os.path.join('mmdetection3d', 'work_dirs', exp, 'pred_instances_3d', 'results_nusc.json')
        with open(pseudo_results_dir) as json_file:
            json_data = json.load(json_file)
            
        K2 = int(exp.split('Labels-UNION-')[1].split('pc')[0])
        for key in list(json_data['results'].keys()):
            sample_dict = json_data['results'][key]
            for sample in sample_dict:
                real_class = assign_dict[K2][sample['detection_name']]
                if real_class=='car':
                    sample['detection_name'] = 'car'
                    sample['attribute_name'] = 'vehicle.moving'
                elif real_class=='bicycle':
                    sample['detection_name'] = 'bicycle'
                    sample['attribute_name'] = 'cycle.with_rider'
                elif real_class=='pedestrian':
                    sample['detection_name'] = 'pedestrian'
                    sample['attribute_name'] = 'pedestrian.moving'
                    
        results_dir = os.path.join('mmdetection3d', 'work_dirs', exp, 'pred_instances_3d', 'results_nusc__mapped.json')
        with open(results_dir, 'w') as f:
            json.dump(json_data, f)
    else:
        results_dir = os.path.join('mmdetection3d', 'work_dirs', exp, 'pred_instances_3d', 'results_nusc.json')
        
        
    # Create evaluate instance.
    nusc_eval = DetectionEval(nusc, config=config_factory('detection_cvpr_2019'), result_path=results_dir, eval_set='val', output_dir=os.path.join(evaluation_dir, exp), verbose=1)
    
    
    # Make it multi-class with 3 classes (car, bicycle, pedestrian).
    for sample_token in nusc_eval.gt_boxes.boxes:
        for box in nusc_eval.gt_boxes.boxes[sample_token]:
            if box.detection_name in ['car', 'truck', 'trailer', 'bus', 'construction_vehicle']:
                box.detection_name = 'car'
            elif box.detection_name in ['bicycle', 'motorcycle']:
                box.detection_name = 'bicycle'
            elif box.detection_name in ['pedestrian']:
                box.detection_name = 'pedestrian'
                
                
    # Evaluate.
    nusc_eval.main(plot_examples=10, render_curves=1)

In [None]:
import json



for exp in experiment_names_mc:
    # Load results.
    with open(os.path.join(evaluation_dir, exp, 'metrics_summary.json'), 'r') as file:
        data = json.load(file)
        
        
    # Get results.
    ap_car  = data['mean_dist_aps']['car']
    ate_car = data['label_tp_errors']['car']['trans_err']
    ase_car = data['label_tp_errors']['car']['scale_err']
    aoe_car = data['label_tp_errors']['car']['orient_err']
    ave_car = data['label_tp_errors']['car']['vel_err']
    aae_car = 1.0
    
    ap_pedestrian  = data['mean_dist_aps']['pedestrian']
    ate_pedestrian = data['label_tp_errors']['pedestrian']['trans_err']
    ase_pedestrian = data['label_tp_errors']['pedestrian']['scale_err']
    aoe_pedestrian = data['label_tp_errors']['pedestrian']['orient_err']
    ave_pedestrian = data['label_tp_errors']['pedestrian']['vel_err']
    aae_pedestrian = 1.0
    
    ap_bicycle  = data['mean_dist_aps']['bicycle']
    ate_bicycle = data['label_tp_errors']['bicycle']['trans_err']
    ase_bicycle = data['label_tp_errors']['bicycle']['scale_err']
    aoe_bicycle = data['label_tp_errors']['bicycle']['orient_err']
    ave_bicycle = data['label_tp_errors']['bicycle']['vel_err']
    aae_bicycle = 1.0
    
    ap  = (ap_car+ap_pedestrian+ap_bicycle)/3
    ate = (ate_car+ate_pedestrian+ate_bicycle)/3
    ase = (ase_car+ase_pedestrian+ase_bicycle)/3
    aoe = (aoe_car+aoe_pedestrian+aoe_bicycle)/3
    ave = (ave_car+ave_pedestrian+ave_bicycle)/3
    aae = 1.0
    
    nds = (5*ap+max(1-ate,0)+max(1-ase,0)+max(1-aoe,0)+max(1-ave,0)+max(1-aae,0))/10
    
    
    # Print results.
    print(exp)
    print(f'mAP    = {np.round(100*ap,1)}')
    print(f'NDS    = {np.round(100*nds,1)}')
    print(f'AP-car = {np.round(100*ap_car,1)}')
    print(f'AP-ped = {np.round(100*ap_pedestrian,1)}')
    print(f'AP-cyc = {np.round(100*ap_bicycle,1)}')
    print()