In [1]:
import os
import copy
import random
from time import time

import cv2
import faiss
import torch
import numpy as np
from tqdm import tqdm
from pathlib import Path
from omegaconf import OmegaConf
from hydra.utils import instantiate
from torch.utils.data import DataLoader
from scipy.spatial.transform import Rotation
from geotransformer.utils.pointcloud import get_transform_from_rotation_translation

from opr.datasets.itlp import ITLPCampus
from opr.pipelines.localization import LocalizationPipeline
from opr.pipelines.place_recognition import PlaceRecognitionPipeline
from opr.pipelines.registration import PointcloudRegistrationPipeline

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
DATASET_ROOT = "/home/docker_opr/Datasets/OpenPlaceRecognition/itlp_campus_outdoor_part2"
SENSOR_SUITE = ["front_cam", "back_cam", "lidar"]
TRACK_LIST = sorted([str(subdir.name) for subdir in Path(DATASET_ROOT).iterdir() if subdir.is_dir()])[5:]

print("Test track list:")
print(TRACK_LIST)

BATCH_SIZE = 16
NUM_WORKERS = 4
DEVICE = "cuda:0"

Test track list:
['05_2023-08-15-day', '06_2023-08-18-night', '07_2023-10-04-day', '08_2023-10-11-night']


In [4]:
def set_seed(seed: int = 18) -> None:
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ["PYTHONHASHSEED"] = str(seed)
    print(f"Random seed set as {seed}")

set_seed()

Random seed set as 18


In [5]:
def pose_to_matrix(pose):
    """From the 6D poses in the [tx ty tz qx qy qz qw] format to 4x4 pose matrices."""
    position = pose[:3]
    orientation_quat = pose[3:]
    rotation = Rotation.from_quat(orientation_quat)
    pose_matrix = np.eye(4)
    pose_matrix[:3,:3] = rotation.as_matrix()
    pose_matrix[:3,3] = position
    return pose_matrix

def compute_error(estimated_pose, gt_pose):
    """For the 6D poses in the [tx ty tz qx qy qz qw] format."""
    estimated_pose = pose_to_matrix(estimated_pose)
    gt_pose = pose_to_matrix(gt_pose)
    error_pose = np.linalg.inv(estimated_pose) @ gt_pose
    dist_error = np.sum(error_pose[:3, 3]**2) ** 0.5
    r = Rotation.from_matrix(error_pose[:3, :3])
    rotvec = r.as_rotvec()
    angle_error = (np.sum(rotvec**2)**0.5) * 180 / np.pi
    angle_error = abs(90 - abs(angle_error-90))
    return angle_error, dist_error

def inference():
    for db_track in TRACK_LIST:
        pr_pipe = PlaceRecognitionPipeline(
            database_dir=Path(DATASET_ROOT) / db_track,
            model=pr_model,
            model_weights_path=PR_WEIGHTS_PATH,
            device=DEVICE,
        )
        for query_track in TRACK_LIST:
            if db_track == query_track:
                continue

            reg_pipe = PointcloudRegistrationPipeline(
                model=reg_model,
                model_weights_path=REGISTRATION_WEIGHTS_PATH,
                device=DEVICE,
                voxel_downsample_size=0.3,
                num_points_downsample=8192,
            )
            loc_pipe = LocalizationPipeline(
                place_recognition_pipeline=pr_pipe,
                registration_pipeline=reg_pipe,
                precomputed_reg_feats=True,
                pointclouds_subdir="lidar"
            )

            query_dataset = copy.deepcopy(dataset)
            query_dataset.dataset_df = query_dataset.dataset_df[query_dataset.dataset_df["track"] == query_track].reset_index(drop=True)
            query_df = query_dataset.dataset_df

            db_dataset = copy.deepcopy(dataset)
            db_dataset.dataset_df = db_dataset.dataset_df[db_dataset.dataset_df["track"] == db_track].reset_index(drop=True)
            db_df = db_dataset.dataset_df

            loc_pipe.pr_pipe.database_df = db_df
            loc_pipe.database_df = db_df

            reg_matches = []
            reg_rotation_errors = []
            reg_translation_errors = []
            times = []

            for q_i, query in tqdm(enumerate(query_dataset)):
                query_pose = query_df.iloc[q_i][["tx", "ty", "tz", "qx", "qy", "qz", "qw"]].to_numpy()
                start = time()
                estimated_pose = loc_pipe.infer(query)["estimated_pose"]
                torch.cuda.current_stream().synchronize()
                step_time = time() - start
                times.append(step_time)

                reg_rotation_error, reg_translation_error = compute_error(estimated_pose, query_pose)
                reg_correct = reg_translation_error < RECALL_THRESHOLD
                reg_matches.append(reg_correct)
                reg_rotation_errors.append(reg_rotation_error)
                reg_translation_errors.append(reg_translation_error)

            all_reg_recalls.append(np.nanmean(reg_matches))
            all_mean_reg_rotation_errors.append(np.nanmean(reg_rotation_errors))
            all_mean_reg_translation_errors.append(np.nanmean(reg_translation_errors))
            all_median_reg_rotation_errors.append(np.nanmedian(reg_rotation_errors))
            all_median_reg_translation_errors.append(np.nanmedian(reg_translation_errors))
            all_times.extend(times[1:])

        results_str = f"""Average REG Recall@1:  {np.nanmean(all_reg_recalls)*100:.2f}
        Average Mean RRE REG:  {np.nanmean(all_mean_reg_rotation_errors):.2f}
        Average Mean RTE REG:  {np.nanmean(all_mean_reg_translation_errors):.2f}
        Average Median RRE REG:  {np.nanmean(all_median_reg_rotation_errors):.2f}
        Average Median RTE REG:  {np.nanmean(all_median_reg_translation_errors):.2f}
        Mean inference time:     {np.nanmean(all_times)*1000:.2f} ms
        """
    return results_str

In [6]:
from albumentations.pytorch import ToTensorV2
import albumentations as A
from opr.datasets.augmentations import DefaultImageTransform

class ToTensorTransform:
    def __init__(self):
        transform_list = [ToTensorV2()]
        self.transform = A.Compose(transform_list)

    def __call__(self, img: np.ndarray):
        """Applies transformations to the given image."""
        return self.transform(image=img)["image"]

dataset = ITLPCampus(
    dataset_root=DATASET_ROOT,
    subset="test",
    csv_file="test.csv",
    sensors=SENSOR_SUITE,
    load_semantics=True,
    exclude_dynamic_classes=True,
    image_transform=ToTensorTransform(),
    semantic_transform=ToTensorTransform(),
    late_image_transform=DefaultImageTransform(resize=(320, 192), train=False)
)
dataset.dataset_df = dataset.dataset_df[dataset.dataset_df["track"].isin(TRACK_LIST)]
dataset.dataset_df.reset_index(inplace=True)

dataloader = DataLoader(
    dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    collate_fn=dataset.collate_fn,
)

In [7]:
REGISTRATION_MODEL_CONFIG_PATH = "../../configs/model/registration/hregnet_light_feats.yaml"
REGISTRATION_WEIGHTS_PATH = "../../weights/registration/hregnet_light_feats_nuscenes.pth"
reg_model_config = OmegaConf.load(REGISTRATION_MODEL_CONFIG_PATH)
reg_model = instantiate(reg_model_config)
reg_model.load_state_dict(torch.load(REGISTRATION_WEIGHTS_PATH))
reg_model = reg_model.to(DEVICE)
reg_model.eval();

In [8]:
# NCLT

all_reg_recalls = []
all_mean_reg_rotation_errors = []
all_mean_reg_translation_errors = []
all_median_reg_rotation_errors = []
all_median_reg_translation_errors = []
all_times = []
RECALL_THRESHOLD = 25.0

PR_MODEL_CONFIG_PATH = "../../configs/model/place_recognition/multi-image_lidar_late-fusion.yaml"
PR_WEIGHTS_PATH = "../../weights/place_recognition/multi-image_lidar_late-fusion_nclt.pth"
pr_model_config = OmegaConf.load(PR_MODEL_CONFIG_PATH)
pr_model = instantiate(pr_model_config)
pr_model.load_state_dict(torch.load(PR_WEIGHTS_PATH))
pr_model = pr_model.to(DEVICE)
pr_model.eval();

In [9]:
descriptors = []
with torch.no_grad():
    for batch in tqdm(dataloader):
        batch = {k: v.to(DEVICE) for k, v in batch.items()}
        final_descriptor = pr_model(batch)["final_descriptor"]
        descriptors.append(final_descriptor.detach().cpu().numpy())
descriptors = np.concatenate(descriptors, axis=0)

dataset_df = dataset.dataset_df
for track, indices in dataset_df.groupby("track").groups.items():
    track_descriptors = descriptors[indices]
    track_index = faiss.IndexFlatL2(track_descriptors.shape[1])
    track_index.add(track_descriptors)
    faiss.write_index(track_index, f"{DATASET_ROOT}/{track}/index.faiss")
    print(f"Saved index {DATASET_ROOT}/{track}/index.faiss")

100%|██████████| 39/39 [01:00<00:00,  1.55s/it]

Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/itlp_campus_outdoor_part2/05_2023-08-15-day/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/itlp_campus_outdoor_part2/06_2023-08-18-night/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/itlp_campus_outdoor_part2/07_2023-10-04-day/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/itlp_campus_outdoor_part2/08_2023-10-11-night/index.faiss





In [10]:

print(inference())

156it [00:59,  2.62it/s]
152it [00:49,  3.06it/s]
150it [00:46,  3.21it/s]
152it [00:49,  3.09it/s]
152it [00:49,  3.04it/s]
150it [00:48,  3.12it/s]
152it [00:48,  3.10it/s]
156it [00:48,  3.19it/s]
150it [00:47,  3.16it/s]
152it [00:48,  3.11it/s]
156it [00:49,  3.17it/s]
152it [00:48,  3.16it/s]

Average REG Recall@1:  96.61
        Average Mean RRE REG:  10.64
        Average Mean RTE REG:  5.90
        Average Median RRE REG:  5.31
        Average Median RTE REG:  2.62
        Mean inference time:     119.56 ms
        





In [11]:
# MIPT finetune

all_reg_recalls = []
all_mean_reg_rotation_errors = []
all_mean_reg_translation_errors = []
all_median_reg_rotation_errors = []
all_median_reg_translation_errors = []
all_times = []
RECALL_THRESHOLD = 25.0

PR_MODEL_CONFIG_PATH = "../../configs/model/place_recognition/multi-image_lidar_late-fusion.yaml"
PR_WEIGHTS_PATH = "../../weights/place_recognition/multi-image_lidar_late-fusion_itlp-finetune.pth"
pr_model_config = OmegaConf.load(PR_MODEL_CONFIG_PATH)
pr_model = instantiate(pr_model_config)
pr_model.load_state_dict(torch.load(PR_WEIGHTS_PATH))
pr_model = pr_model.to(DEVICE)
pr_model.eval();

In [12]:
descriptors = []
with torch.no_grad():
    for batch in tqdm(dataloader):
        batch = {k: v.to(DEVICE) for k, v in batch.items()}
        final_descriptor = pr_model(batch)["final_descriptor"]
        descriptors.append(final_descriptor.detach().cpu().numpy())
descriptors = np.concatenate(descriptors, axis=0)

dataset_df = dataset.dataset_df
for track, indices in dataset_df.groupby("track").groups.items():
    track_descriptors = descriptors[indices]
    track_index = faiss.IndexFlatL2(track_descriptors.shape[1])
    track_index.add(track_descriptors)
    faiss.write_index(track_index, f"{DATASET_ROOT}/{track}/index.faiss")
    print(f"Saved index {DATASET_ROOT}/{track}/index.faiss")

100%|██████████| 39/39 [01:05<00:00,  1.69s/it]

Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/itlp_campus_outdoor_part2/05_2023-08-15-day/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/itlp_campus_outdoor_part2/06_2023-08-18-night/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/itlp_campus_outdoor_part2/07_2023-10-04-day/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/itlp_campus_outdoor_part2/08_2023-10-11-night/index.faiss





In [13]:
print(inference())

156it [00:49,  3.15it/s]
152it [00:49,  3.05it/s]
150it [00:47,  3.15it/s]
152it [00:50,  3.03it/s]
152it [00:48,  3.10it/s]
150it [00:48,  3.11it/s]
152it [00:48,  3.13it/s]
156it [00:48,  3.22it/s]
150it [00:47,  3.16it/s]
152it [00:48,  3.15it/s]
156it [00:48,  3.19it/s]
152it [00:49,  3.10it/s]

Average REG Recall@1:  99.56
        Average Mean RRE REG:  9.12
        Average Mean RTE REG:  3.50
        Average Median RRE REG:  4.86
        Average Median RTE REG:  2.55
        Mean inference time:     119.55 ms
        



