In [1]:
%load_ext autoreload
%autoreload 2

# SequencePointcloudRegistrationPipeline

A module that implements an algorithm for optimizing the position and orientation of a vehicle in space based on a sequence of multimodal data using neural network methods.

In [2]:
import sys
from copy import copy
from pathlib import Path
from time import time

import faiss

from tqdm import tqdm

from hydra.utils import instantiate
import numpy as np
from omegaconf import OmegaConf
from scipy.spatial.transform import Rotation
import torch
from torch.utils.data import DataLoader
import pandas as pd


try:
    from geotransformer.utils.registration import compute_registration_error
    from geotransformer.utils.pointcloud import get_transform_from_rotation_translation
except ImportError:
    print("WARNING: geotransformer not installed, registration error will not be computed")

from opr.datasets import NCLTDataset
from opr.pipelines.place_recognition import PlaceRecognitionPipeline
from opr.pipelines.registration.pointcloud import SequencePointcloudRegistrationPipeline

In [3]:
def pose_to_matrix(pose):
    """From the 6D poses in the [tx ty tz qx qy qz qw] format to 4x4 pose matrices."""
    position = pose[:3]
    orientation_quat = pose[3:]
    rotation = Rotation.from_quat(orientation_quat)
    pose_matrix = np.eye(4)
    pose_matrix[:3,:3] = rotation.as_matrix()
    pose_matrix[:3,3] = position
    return pose_matrix


def compute_error(estimated_pose, gt_pose):
    """For the 6D poses in the [tx ty tz qx qy qz qw] format."""
    estimated_pose = pose_to_matrix(estimated_pose)
    gt_pose = pose_to_matrix(gt_pose)
    error_pose = np.linalg.inv(estimated_pose) @ gt_pose
    dist_error = np.sum(error_pose[:3, 3]**2) ** 0.5
    r = Rotation.from_matrix(error_pose[:3, :3])
    rotvec = r.as_rotvec()
    angle_error = (np.sum(rotvec**2)**0.5) * 180 / np.pi
    angle_error = abs(90 - abs(angle_error-90))
    return dist_error, angle_error

def compute_translation_error(gt_pose, pred_pose):
    """For the 4x4 pose matrices."""
    gt_trans = gt_pose[:3, 3]
    pred_trans = pred_pose[:3, 3]
    error = np.linalg.norm(gt_trans - pred_trans)
    return error

def compute_rotation_error(gt_pose, pred_pose):
    """For the 4x4 pose matrices."""
    gt_rot = Rotation.from_matrix(gt_pose[:3, :3])
    pred_rot = Rotation.from_matrix(pred_pose[:3, :3])
    error = Rotation.inv(gt_rot) * pred_rot
    error = error.as_euler('xyz', degrees=True)
    error = np.linalg.norm(error)
    return error

def compute_absolute_pose_error(gt_pose, pred_pose):
    """For the 4x4 pose matrices."""
    rotation_error = compute_rotation_error(gt_pose, pred_pose)
    translation_error = compute_translation_error(gt_pose, pred_pose)
    return rotation_error, translation_error

You can **download the dataset**:

- Kaggle:
  - [NCLT_OpenPlaceRecognition](https://www.kaggle.com/datasets/creatorofuniverses/nclt-iprofi-hack-23)
- Hugging Face:
  - [NCLT_OpenPlaceRecognition](https://huggingface.co/datasets/OPR-Project/NCLT_OpenPlaceRecognition)

To **download the model weights**, run the following command:

```bash
# place recognition weights
wget -O ../../weights/place_recognition/multi-image_lidar_late-fusion_nclt.pth https://huggingface.co/OPR-Project/PlaceRecognition-NCLT/resolve/main/multi-image_lidar_late-fusion_nclt.pth

# registration weights
wget -O ../../weights/registration/geotransformer_kitti.pth https://huggingface.co/OPR-Project/Registration-KITTI/resolve/main/geotransformer_kitti.pth
```


In [4]:
# place recognition weights
!wget -O ../../weights/place_recognition/multi-image_lidar_late-fusion_nclt.pth https://huggingface.co/OPR-Project/PlaceRecognition-NCLT/resolve/main/multi-image_lidar_late-fusion_nclt.pth

# registration weights
!wget -O ../../weights/registration/geotransformer_kitti.pth https://huggingface.co/OPR-Project/Registration-KITTI/resolve/main/geotransformer_kitti.pth

--2025-04-18 12:35:02--  https://huggingface.co/OPR-Project/PlaceRecognition-NCLT/resolve/main/multi-image_lidar_late-fusion_nclt.pth
Resolving huggingface.co (huggingface.co)... 18.239.50.49, 18.239.50.103, 18.239.50.16, ...
Connecting to huggingface.co (huggingface.co)|18.239.50.49|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs-us-1.hf.co/repos/e8/30/e8306844a097b119f688c0cfcf564a9f584f52c28b0d3c5b11e560cb0c3e7eeb/db4f7efcbaf0acd445381074b1a78fba7c9e0972e7996994ed9db95fd4e0a243?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27multi-image_lidar_late-fusion_nclt.pth%3B+filename%3D%22multi-image_lidar_late-fusion_nclt.pth%22%3B&Expires=1744983302&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc0NDk4MzMwMn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmhmLmNvL3JlcG9zL2U4LzMwL2U4MzA2ODQ0YTA5N2IxMTlmNjg4YzBjZmNmNTY0YTlmNTg0ZjUyYzI4YjBkM2M1YjExZTU2MGNiMGMzZTdlZWIvZGI0ZjdlZmNiYWYwYWNkNDQ1Mz

In [5]:
DATASET_ROOT = "/home/docker_opr/Datasets/OpenPlaceRecognition/NCLT_preprocessed"  # change to your dataset path

SENSOR_SUITE = ["image_Cam5", "image_Cam2", "mask_Cam5", "mask_Cam2", "pointcloud_lidar"]

BATCH_SIZE = 32
NUM_WORKERS = 4
DEVICE = "cuda"

REGISTRATION_MODEL_CONFIG_PATH = "../../configs/model/registration/geotransformer_kitti.yaml"
REGISTRATION_WEIGHTS_PATH = "../../weights/registration/geotransformer_kitti.pth"

MODEL_CONFIG_PATH = "../../configs/model/place_recognition/multi-image_lidar_late-fusion.yaml"
WEIGHTS_PATH = "../../weights/place_recognition/multi-image_lidar_late-fusion_nclt.pth"

In [6]:
TRACK_LIST = sorted([str(subdir.name) for subdir in Path(DATASET_ROOT).iterdir() if subdir.is_dir()])
print(f"Found {len(TRACK_LIST)} tracks")
print(TRACK_LIST)

print("WARNING: track list limited")
TRACK_LIST = TRACK_LIST[:2]
print(TRACK_LIST)


Found 10 tracks
['2012-01-08', '2012-01-22', '2012-02-12', '2012-02-18', '2012-03-31', '2012-05-26', '2012-08-04', '2012-10-28', '2012-11-04', '2012-12-01']
['2012-01-08', '2012-01-22']


## Init model

In [7]:
model_config = OmegaConf.load(MODEL_CONFIG_PATH)
model = instantiate(model_config)
model.load_state_dict(torch.load(WEIGHTS_PATH))
model = model.to(DEVICE)
model.eval();



## Calculate descriptors for databases

In [8]:
dataset = NCLTDataset(
    dataset_root=DATASET_ROOT,
    subset="test",
    data_to_load=SENSOR_SUITE,
    pointcloud_quantization_size=0.5,
    max_point_distance=None,
    exclude_dynamic=True,
    dynamic_labels=[19] #Person
)

dataloader = DataLoader(
    dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    collate_fn=dataset.collate_fn,
)


In [9]:
descriptors = []
with torch.no_grad():
    for batch in tqdm(dataloader):
        batch = {k: v.to(DEVICE) for k, v in batch.items()}
        final_descriptor = model(batch)["final_descriptor"]
        descriptors.append(final_descriptor.detach().cpu().numpy())

descriptors = np.concatenate(descriptors, axis=0)

  0%|          | 0/86 [00:00<?, ?it/s]

100%|██████████| 86/86 [01:02<00:00,  1.37it/s]


### Saving database indexes

In [10]:
dataset_df = dataset.dataset_df

for track, indices in dataset_df.groupby("track").groups.items():
    track_descriptors = descriptors[indices]
    track_index = faiss.IndexFlatL2(track_descriptors.shape[1])
    track_index.add(track_descriptors)
    faiss.write_index(track_index, f"{DATASET_ROOT}/{track}/index.faiss")
    print(f"Saved index {DATASET_ROOT}/{track}/index.faiss")


Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/NCLT_preprocessed/2012-01-08/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/NCLT_preprocessed/2012-01-22/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/NCLT_preprocessed/2012-02-12/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/NCLT_preprocessed/2012-02-18/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/NCLT_preprocessed/2012-03-31/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/NCLT_preprocessed/2012-05-26/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/NCLT_preprocessed/2012-08-04/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/NCLT_preprocessed/2012-10-28/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/NCLT_preprocessed/2012-11-04/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/NCLT_preprocessed/2012-12-01/index.faiss


# Init GeoTransformer

In [11]:
geotransformer = instantiate(OmegaConf.load(REGISTRATION_MODEL_CONFIG_PATH))

registration_pipe = SequencePointcloudRegistrationPipeline(
    model=geotransformer,
    model_weights_path=REGISTRATION_WEIGHTS_PATH,
    device="cuda",  # the GeoTransformer currently only supports CUDA
    voxel_downsample_size=0.5,  # recommended for geotransformer_kitti configuration
)

# Test

In [12]:
from scipy.spatial.transform import Rotation as R

In [13]:
PR_THRESHOLD = 25.0

test_csv = pd.read_csv(Path(DATASET_ROOT) / "test.csv", index_col=0)

all_recalls = []
all_mean_dist_errors = []
all_mean_angle_errors = []
all_median_dist_errors = []
all_median_angle_errors = []
all_times = []

for db_track in TRACK_LIST:
    pipe = PlaceRecognitionPipeline(
        database_dir=Path(DATASET_ROOT) / db_track,
        model=model,
        model_weights_path=WEIGHTS_PATH,
        device=DEVICE,
    )
    for query_track in TRACK_LIST:
        if db_track == query_track:
            continue
        query_dataset = copy(dataset)
        query_dataset.dataset_df = query_dataset.dataset_df[query_dataset.dataset_df["track"] == query_track]
        db_dataset = copy(dataset)
        db_dataset.dataset_df = db_dataset.dataset_df[db_dataset.dataset_df["track"] == db_track]
        query_df = pd.read_csv(Path(DATASET_ROOT) / query_track / "track.csv", index_col=0)

        # filter out only test subset
        query_df = query_df[query_df['image'].isin(query_dataset.dataset_df['image'])].reset_index(drop=True)
        # and do not forget to change the database_df in the pipeline
        pipe.database_df = pipe.database_df[pipe.database_df['image'].isin(test_csv['image'])].reset_index(drop=True)

        pr_matches = []
        dist_errors = []
        angle_errors = []
        times = []

        true_pairs = []
        false_pairs = []

        # STAGE 1 - place recognition
        db_matches = {}
        for q_i in tqdm(range(1, len(query_dataset)), total=len(query_dataset)-1):
            query = query_dataset[q_i]
            query["pose"] = query_df.iloc[q_i][["tx", "ty", "tz", "qx", "qy", "qz", "qw"]].to_numpy()
            output = pipe.infer(query)
            db_matches[q_i] = output["idx"]
        torch.cuda.empty_cache()
        # STAGE 2 - registration
        for q_i in tqdm(range(1, len(query_dataset)), total=len(query_dataset)-1):
            query = query_dataset[q_i]
            query_seq = [query_dataset[q_i]["pointcloud_lidar_coords"]]
            output_idx = db_matches[q_i]
            db_match = db_dataset[output_idx]
            torch.cuda.empty_cache()
            db_match["pose"] = pipe.database_df.iloc[output_idx][["tx", "ty", "tz", "qx", "qy", "qz", "qw"]].to_numpy()
            db_pose = pose_to_matrix(db_match["pose"])
            db_pc = db_match["pointcloud_lidar_coords"]
            t = time()
            estimated_transformation = registration_pipe.infer(query_seq, db_pc)
            times.append(time() - t)
            optimized_pose = db_pose @ estimated_transformation
            torch.cuda.empty_cache()
            query["pose"] = query_df.iloc[q_i][["tx", "ty", "tz", "qx", "qy", "qz", "qw"]].to_numpy()
            angle_error, dist_error = compute_absolute_pose_error(optimized_pose, pose_to_matrix(query["pose"]))

            pr_matches.append(dist_error < PR_THRESHOLD)
            dist_errors.append(dist_error)
            angle_errors.append(angle_error)

        all_recalls.append(np.mean(pr_matches))
        all_mean_dist_errors.append(np.mean(dist_errors))
        all_mean_angle_errors.append(np.mean(angle_errors))
        all_median_dist_errors.append(np.median(dist_errors))
        all_median_angle_errors.append(np.median(angle_errors))
        all_times.extend(times[1:]) # drop the first iteration cause it is always slower

100%|██████████| 274/274 [00:26<00:00, 10.16it/s]
100%|██████████| 274/274 [04:50<00:00,  1.06s/it]
100%|██████████| 330/330 [00:34<00:00,  9.51it/s]
100%|██████████| 330/330 [05:31<00:00,  1.00s/it]


In [14]:
results_str = f"""Average Recall@1: {np.mean(all_recalls)*100:.2f}
Average mean dist error: {np.mean(all_mean_dist_errors):.2f}
Average mean angle error: {np.mean(all_mean_angle_errors):.2f}
Average median dist error: {np.mean(all_median_dist_errors):.2f}
Average median angle error: {np.mean(all_median_angle_errors):.2f}
"""

In [15]:
print(results_str)

Average Recall@1: 92.32
Average mean dist error: 9.03
Average mean angle error: 14.71
Average median dist error: 0.23
Average median angle error: 2.46

