In [1]:
import os
import copy
import random
from time import time

import cv2
import faiss
import torch
import numpy as np
from tqdm import tqdm
from pathlib import Path
from omegaconf import OmegaConf
from hydra.utils import instantiate
from torch.utils.data import DataLoader
from scipy.spatial.transform import Rotation
from geotransformer.utils.pointcloud import get_transform_from_rotation_translation

from opr.datasets.itlp import ITLPCampus
from opr.pipelines.localization import ArucoLocalizationPipeline
from opr.pipelines.place_recognition import PlaceRecognitionPipeline
from opr.pipelines.registration import PointcloudRegistrationPipeline

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [2]:
import warnings
warnings.filterwarnings("ignore")

You can download the dataset:

- Kaggle:
  - [ITLP Campus Outdoor](https://www.kaggle.com/datasets/alexandermelekhin/itlp-campus-outdoor)
- Hugging Face:
  - [ITLP Campus Outdoor](https://huggingface.co/datasets/OPR-Project/ITLP-Campus-Outdoor)


In [None]:
DATASET_ROOT = "/home/docker_opr/Datasets/OpenPlaceRecognition/itlp_campus_outdoor"
SENSOR_SUITE = ["front_cam", "back_cam", "lidar"]
TRACK_LIST = sorted([str(subdir.name) for subdir in Path(DATASET_ROOT).iterdir() if subdir.is_dir()])[5:]

print("Test track list:")
print(TRACK_LIST)

BATCH_SIZE = 4
NUM_WORKERS = 4
DEVICE = "cuda:0"

Test track list:
['05_2023-08-15-day', '06_2023-08-18-night', '07_2023-10-04-day', '08_2023-10-11-night']


In [4]:
def set_seed(seed: int = 18) -> None:
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ["PYTHONHASHSEED"] = str(seed)
    print(f"Random seed set as {seed}")

set_seed()

Random seed set as 18


In [5]:
def pose_to_matrix(pose):
    """From the 6D poses in the [tx ty tz qx qy qz qw] format to 4x4 pose matrices."""
    position = pose[:3]
    orientation_quat = pose[3:]
    rotation = Rotation.from_quat(orientation_quat)
    pose_matrix = np.eye(4)
    pose_matrix[:3,:3] = rotation.as_matrix()
    pose_matrix[:3,3] = position
    return pose_matrix

def compute_error(estimated_pose, gt_pose):
    """For the 6D poses in the [tx ty tz qx qy qz qw] format."""
    estimated_pose = pose_to_matrix(estimated_pose)
    gt_pose = pose_to_matrix(gt_pose)
    error_pose = np.linalg.inv(estimated_pose) @ gt_pose
    dist_error = np.sum(error_pose[:3, 3]**2) ** 0.5
    r = Rotation.from_matrix(error_pose[:3, :3])
    rotvec = r.as_rotvec()
    angle_error = (np.sum(rotvec**2)**0.5) * 180 / np.pi
    angle_error = abs(90 - abs(angle_error-90))
    return angle_error, dist_error

def inference():
    for db_track in TRACK_LIST:
        pr_pipe = PlaceRecognitionPipeline(
            database_dir=Path(DATASET_ROOT) / db_track,
            model=pr_model,
            model_weights_path=PR_WEIGHTS_PATH,
            device=DEVICE,
        )
        for query_track in TRACK_LIST:
            if db_track == query_track:
                continue

            reg_pipe = PointcloudRegistrationPipeline(
                model=reg_model,
                model_weights_path=REGISTRATION_WEIGHTS_PATH,
                device=DEVICE,
                voxel_downsample_size=0.3,
                num_points_downsample=8192,
            )
            loc_pipe = ArucoLocalizationPipeline(
                place_recognition_pipeline=pr_pipe,
                registration_pipeline=reg_pipe,
                precomputed_reg_feats=True,
                pointclouds_subdir="lidar",
                aruco_metadata=aruco_metadata,
                camera_metadata=camera_metadata,
                fastest=True,
                use_first_marker=True
            )

            query_dataset = copy.deepcopy(test_query_dataset)
            query_dataset.dataset_df = query_dataset.dataset_df[query_dataset.dataset_df["track"] == query_track].reset_index(drop=True)
            query_df = query_dataset.dataset_df
            ###
            # specific for aruco
            query_dataset.image_transform = lambda x: x

            db_dataset = copy.deepcopy(test_db_dataset)
            db_dataset.dataset_df = db_dataset.dataset_df[db_dataset.dataset_df["track"] == db_track].reset_index(drop=True)
            db_df = db_dataset.dataset_df
            ###
            # specific for aruco
            db_dataset.image_transform = lambda x: x
            warmup_sample = db_dataset[0]

            loc_pipe.pr_pipe.database_df = db_df
            loc_pipe.database_df = db_df

            reg_matches_aruco = []
            reg_rotation_errors_aruco = []
            reg_translation_errors_aruco = []
            times_aruco = []

            # fake launch to run first long call of torch model
            _ = loc_pipe.loc_part(warmup_sample)

            for q_i, query in tqdm(enumerate(query_dataset)):
                query_pose = query_df.iloc[q_i][["tx", "ty", "tz", "qx", "qy", "qz", "qw"]].to_numpy()
                start = time()
                output = loc_pipe.infer(query)
                torch.cuda.current_stream().synchronize()
                step_time = time() - start
                times_aruco.append(step_time)

                estimated_pose = output["pose_by_aruco"] if output["pose_by_aruco"] is not None else output["pose_by_place_recognition"]

                reg_rotation_error_aruco, reg_translation_error_aruco = compute_error(estimated_pose, query_pose)
                reg_correct_aruco = reg_translation_error_aruco < RECALL_THRESHOLD
                reg_matches_aruco.append(reg_correct_aruco)
                reg_rotation_errors_aruco.append(reg_rotation_error_aruco)
                reg_translation_errors_aruco.append(reg_translation_error_aruco)

            all_reg_recalls_aruco.append(np.nanmean(reg_matches_aruco))
            all_mean_reg_rotation_errors_aruco.append(np.nanmean(reg_rotation_errors_aruco))
            all_mean_reg_translation_errors_aruco.append(np.nanmean(reg_translation_errors_aruco))
            all_median_reg_rotation_errors_aruco.append(np.nanmedian(reg_rotation_errors_aruco))
            all_median_reg_translation_errors_aruco.append(np.nanmedian(reg_translation_errors_aruco))
            all_times_aruco.extend(times_aruco)

        results_str_aruco = f"""Average REG Recall@1:  {np.nanmean(all_reg_recalls_aruco)*100:.2f}
        Average Mean RRE REG:  {np.nanmean(all_mean_reg_rotation_errors_aruco):.2f}
        Average Mean RTE REG:  {np.nanmean(all_mean_reg_translation_errors_aruco):.2f}
        Average Median RRE REG:  {np.nanmean(all_median_reg_rotation_errors_aruco):.2f}
        Average Median RTE REG:  {np.nanmean(all_median_reg_translation_errors_aruco):.2f}
        Mean inference time:     {np.nanmean(all_times_aruco)*1000:.2f} ms
        """
    return results_str_aruco

In [6]:
test_query_dataset = ITLPCampus(
    dataset_root=DATASET_ROOT,
    subset="test",
    csv_file="aruco_test.csv",
    sensors=SENSOR_SUITE,
)
test_db_dataset = ITLPCampus(
    dataset_root=DATASET_ROOT,
    subset="test",
    csv_file="test.csv",
    sensors=SENSOR_SUITE,
)
test_db_dataset.dataset_df = test_db_dataset.dataset_df[test_db_dataset.dataset_df["track"].isin(TRACK_LIST)]
test_db_dataset.dataset_df.reset_index(inplace=True)

dataloader = DataLoader(
    test_db_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    collate_fn=test_db_dataset.collate_fn,
)

In [7]:
camera_metadata = {
    "front_cam_intrinsics": [[683.6199340820312, 0.0, 615.1160278320312],
                             [0.0, 683.6199340820312, 345.32354736328125],
                             [0.0, 0.0, 1.0]],
    "front_cam_distortion": [0.0, 0.0, 0.0, 0.0, 0.0],
    "front_cam2baselink": [-0.2388, 0.06, 0.75, -0.5, 0.49999999999755174, -0.5, 0.5000000000024483],
    "back_cam_intrinsics": [[910.4178466796875, 0.0, 648.44140625],
                            [0.0, 910.4166870117188, 354.0118408203125],
                            [0.0, 0.0, 1.0]],
    "back_cam_distortion": [0.0, 0.0, 0.0, 0.0, 0.0],
    "back_cam2baselink": [-0.3700594606670597, -0.006647301538708517, 0.7427924789987381, -0.4981412857230513, -0.4907829006275322, 0.5090864815669471, 0.5018149813673275]
}

aruco_metadata = {
    "aruco_type": cv2.aruco.DICT_4X4_250,
    "aruco_size": 0.2,
    "aruco_gt_pose_by_id": {
        0: [-23.76325316, 16.94296093, 1.51796168, 0.25454437, 0.65070725, 0.6526984, 0.29286864],
        2: [-8.81475372, -12.47510287, 1.75787052, 0.61022095, -0.21494468, -0.21004688, 0.73301397],
    }
}

In [8]:
REGISTRATION_MODEL_CONFIG_PATH = "../../configs/model/registration/hregnet_light_feats.yaml"
REGISTRATION_WEIGHTS_PATH = "../../weights/registration/hregnet_light_feats_nuscenes.pth"
reg_model_config = OmegaConf.load(REGISTRATION_MODEL_CONFIG_PATH)
reg_model = instantiate(reg_model_config)
reg_model.load_state_dict(torch.load(REGISTRATION_WEIGHTS_PATH))
reg_model = reg_model.to(DEVICE)
reg_model.eval();

In [9]:
# NCLT

all_reg_recalls_aruco = []
all_mean_reg_rotation_errors_aruco = []
all_mean_reg_translation_errors_aruco = []
all_median_reg_rotation_errors_aruco = []
all_median_reg_translation_errors_aruco = []
all_times_aruco = []
RECALL_THRESHOLD = 25.0

PR_MODEL_CONFIG_PATH = "../../configs/model/place_recognition/multi-image_lidar_late-fusion.yaml"
PR_WEIGHTS_PATH = "../../weights/place_recognition/multi-image_lidar_late-fusion_nclt.pth"
pr_model_config = OmegaConf.load(PR_MODEL_CONFIG_PATH)
pr_model = instantiate(pr_model_config)
pr_model.load_state_dict(torch.load(PR_WEIGHTS_PATH))
pr_model = pr_model.to(DEVICE)
pr_model.eval();

In [10]:
descriptors = []
with torch.no_grad():
    for batch in tqdm(dataloader):
        batch = {k: v.to(DEVICE) for k, v in batch.items()}
        final_descriptor = pr_model(batch)["final_descriptor"]
        descriptors.append(final_descriptor.detach().cpu().numpy())
descriptors = np.concatenate(descriptors, axis=0)

dataset_df = test_db_dataset.dataset_df
for track, indices in dataset_df.groupby("track").groups.items():
    track_descriptors = descriptors[indices]
    track_index = faiss.IndexFlatL2(track_descriptors.shape[1])
    track_index.add(track_descriptors)
    faiss.write_index(track_index, f"{DATASET_ROOT}/{track}/index.faiss")
    print(f"Saved index {DATASET_ROOT}/{track}/index.faiss")

100%|██████████| 153/153 [00:11<00:00, 13.29it/s]

Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/itlp_campus_outdoor_part2/05_2023-08-15-day/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/itlp_campus_outdoor_part2/06_2023-08-18-night/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/itlp_campus_outdoor_part2/07_2023-10-04-day/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/itlp_campus_outdoor_part2/08_2023-10-11-night/index.faiss





In [11]:

print(inference())

1it [00:00,  8.05it/s]

Detect Aruco with id [2] on image_front_cam
Utilize Aruco with id [2] on image_front_cam for pose estimation due min distanse


2it [00:00,  4.95it/s]
0it [00:00, ?it/s]
3it [00:00,  5.68it/s]

Detect Aruco with id [2] on image_back_cam
Utilize Aruco with id [2] on image_back_cam for pose estimation due min distanse



2it [00:00,  8.62it/s]


Detect Aruco with id [2] on image_back_cam
Utilize Aruco with id [2] on image_back_cam for pose estimation due min distanse


0it [00:00, ?it/s]
3it [00:00,  6.67it/s]

Detect Aruco with id [2] on image_back_cam
Utilize Aruco with id [2] on image_back_cam for pose estimation due min distanse



2it [00:00,  8.06it/s]


Detect Aruco with id [2] on image_back_cam
Utilize Aruco with id [2] on image_back_cam for pose estimation due min distanse


2it [00:00,  7.99it/s]

Detect Aruco with id [2] on image_front_cam
Utilize Aruco with id [2] on image_front_cam for pose estimation due min distanse



3it [00:00,  6.74it/s]

Detect Aruco with id [2] on image_back_cam
Utilize Aruco with id [2] on image_back_cam for pose estimation due min distanse



2it [00:00,  7.63it/s]


Detect Aruco with id [2] on image_back_cam
Utilize Aruco with id [2] on image_back_cam for pose estimation due min distanse


2it [00:00,  7.99it/s]

Detect Aruco with id [2] on image_front_cam
Utilize Aruco with id [2] on image_front_cam for pose estimation due min distanse



0it [00:00, ?it/s]

Average REG Recall@1:  100.00
        Average Mean RRE REG:  16.32
        Average Mean RTE REG:  4.81
        Average Median RRE REG:  15.89
        Average Median RTE REG:  4.73
        Mean inference time:     95.60 ms
        





In [12]:
# MIPT finetune

all_reg_recalls_aruco = []
all_mean_reg_rotation_errors_aruco = []
all_mean_reg_translation_errors_aruco = []
all_median_reg_rotation_errors_aruco = []
all_median_reg_translation_errors_aruco = []
all_times_aruco = []
RECALL_THRESHOLD = 25.0

PR_MODEL_CONFIG_PATH = "../../configs/model/place_recognition/multi-image_lidar_late-fusion.yaml"
PR_WEIGHTS_PATH = "../../weights/place_recognition/multi-image_lidar_late-fusion_itlp-finetune.pth"
pr_model_config = OmegaConf.load(PR_MODEL_CONFIG_PATH)
pr_model = instantiate(pr_model_config)
pr_model.load_state_dict(torch.load(PR_WEIGHTS_PATH))
pr_model = pr_model.to(DEVICE)
pr_model.eval();

In [13]:
descriptors = []
with torch.no_grad():
    for batch in tqdm(dataloader):
        batch = {k: v.to(DEVICE) for k, v in batch.items()}
        final_descriptor = pr_model(batch)["final_descriptor"]
        descriptors.append(final_descriptor.detach().cpu().numpy())
descriptors = np.concatenate(descriptors, axis=0)

dataset_df = test_db_dataset.dataset_df
for track, indices in dataset_df.groupby("track").groups.items():
    track_descriptors = descriptors[indices]
    track_index = faiss.IndexFlatL2(track_descriptors.shape[1])
    track_index.add(track_descriptors)
    faiss.write_index(track_index, f"{DATASET_ROOT}/{track}/index.faiss")
    print(f"Saved index {DATASET_ROOT}/{track}/index.faiss")

100%|██████████| 153/153 [00:08<00:00, 18.39it/s]

Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/itlp_campus_outdoor_part2/05_2023-08-15-day/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/itlp_campus_outdoor_part2/06_2023-08-18-night/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/itlp_campus_outdoor_part2/07_2023-10-04-day/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/itlp_campus_outdoor_part2/08_2023-10-11-night/index.faiss





In [14]:
print(inference())

0it [00:00, ?it/s]

Detect Aruco with id [2] on image_front_cam
Utilize Aruco with id [2] on image_front_cam for pose estimation due min distanse


2it [00:00,  6.42it/s]
0it [00:00, ?it/s]
3it [00:00,  6.36it/s]

Detect Aruco with id [2] on image_back_cam
Utilize Aruco with id [2] on image_back_cam for pose estimation due min distanse



2it [00:00,  6.92it/s]


Detect Aruco with id [2] on image_back_cam
Utilize Aruco with id [2] on image_back_cam for pose estimation due min distanse


0it [00:00, ?it/s]
3it [00:00,  6.89it/s]

Detect Aruco with id [2] on image_back_cam
Utilize Aruco with id [2] on image_back_cam for pose estimation due min distanse



2it [00:00,  9.26it/s]


Detect Aruco with id [2] on image_back_cam
Utilize Aruco with id [2] on image_back_cam for pose estimation due min distanse


2it [00:00,  7.82it/s]

Detect Aruco with id [2] on image_front_cam
Utilize Aruco with id [2] on image_front_cam for pose estimation due min distanse



3it [00:00,  6.14it/s]

Detect Aruco with id [2] on image_back_cam
Utilize Aruco with id [2] on image_back_cam for pose estimation due min distanse



2it [00:00,  9.06it/s]


Detect Aruco with id [2] on image_back_cam
Utilize Aruco with id [2] on image_back_cam for pose estimation due min distanse


2it [00:00,  8.47it/s]

Detect Aruco with id [2] on image_front_cam
Utilize Aruco with id [2] on image_front_cam for pose estimation due min distanse



0it [00:00, ?it/s]

Average REG Recall@1:  100.00
        Average Mean RRE REG:  14.99
        Average Mean RTE REG:  3.77
        Average Median RRE REG:  14.74
        Average Median RTE REG:  3.79
        Mean inference time:     90.14 ms
        



