In [1]:
%load_ext autoreload
%autoreload 2

# PlaceRecognitionPipeline with semantics

A module that implements an algorithm for optimizing the position and orientation of a vehicle in space based on a sequence of multimodal data using neural network methods.

In [2]:
from time import time

import faiss

from tqdm import tqdm

from hydra.utils import instantiate
import numpy as np
from omegaconf import OmegaConf
from scipy.spatial.transform import Rotation
import torch
from torch.utils.data import DataLoader

from opr.datasets.itlp import ITLPCampus
from opr.pipelines.place_recognition import PlaceRecognitionPipeline

In [3]:
DATABASE_TRACK_DIR = "/home/docker_opr/Datasets/ITLP-Campus-data/subsampled_data/indoor/00_2023-10-25-night"
QUERY_TRACK_DIR = "/home/docker_opr/Datasets/ITLP-Campus-data/subsampled_data/indoor/01_2023-11-09-twilight"

SENSOR_SUITE = ["front_cam", "back_cam", "lidar"]

BATCH_SIZE = 64
NUM_WORKERS = 4
DEVICE = "cuda"

MODEL_CONFIG_PATH = "../configs/model/place_recognition/multi-image_multi-semantic_lidar_late-fusion.yaml"
WEIGHTS_PATH = "../weights/place_recognition/multi-image_multi-semantic_lidar_late-fusion_nclt.pth"

In [4]:
def pose_to_matrix(pose):
    """From the 6D poses in the [tx ty tz qx qy qz qw] format to 4x4 pose matrices."""
    position = pose[:3]
    orientation_quat = pose[3:]
    rotation = Rotation.from_quat(orientation_quat)
    pose_matrix = np.eye(4)
    pose_matrix[:3,:3] = rotation.as_matrix()
    pose_matrix[:3,3] = position
    return pose_matrix


def compute_error(estimated_pose, gt_pose):
    """For the 6D poses in the [tx ty tz qx qy qz qw] format."""
    estimated_pose = pose_to_matrix(estimated_pose)
    gt_pose = pose_to_matrix(gt_pose)
    error_pose = np.linalg.inv(estimated_pose) @ gt_pose
    dist_error = np.sum(error_pose[:3, 3]**2) ** 0.5
    r = Rotation.from_matrix(error_pose[:3, :3])
    rotvec = r.as_rotvec()
    angle_error = (np.sum(rotvec**2)**0.5) * 180 / np.pi
    angle_error = abs(90 - abs(angle_error-90))
    return dist_error, angle_error

## Prepare faiss database index

In [10]:
db_dataset = ITLPCampus(
    dataset_root=DATABASE_TRACK_DIR,
    sensors=["front_cam", "back_cam", "lidar"],
    mink_quantization_size=0.5,
    load_semantics=True,
)

In [11]:
db_dataloader = DataLoader(
    db_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    collate_fn=db_dataset.collate_fn,
)


In [12]:
model_config = OmegaConf.load(MODEL_CONFIG_PATH)
model = instantiate(model_config)
model.load_state_dict(torch.load(WEIGHTS_PATH))
model = model.to(DEVICE)
model.eval();

In [13]:
descriptors = []
with torch.no_grad():
    for batch in tqdm(db_dataloader):
        batch = {k: v.to(DEVICE) for k, v in batch.items()}
        final_descriptor = model(batch)["final_descriptor"]
        descriptors.append(final_descriptor.detach().cpu().numpy())

descriptors = np.concatenate(descriptors, axis=0)

  0%|          | 0/20 [00:00<?, ?it/s]

100%|██████████| 20/20 [00:12<00:00,  1.60it/s]


In [14]:
index = faiss.IndexFlatL2(descriptors.shape[1])
index.add(descriptors)

In [15]:
faiss.write_index(index, DATABASE_TRACK_DIR + "/index.faiss")


## Test

In [16]:
pipe = PlaceRecognitionPipeline(
    database_dir=DATABASE_TRACK_DIR,
    model=model,
    model_weights_path=WEIGHTS_PATH,
    device=DEVICE,
)


In [19]:
query_dataset = ITLPCampus(
    dataset_root=QUERY_TRACK_DIR,
    sensors=SENSOR_SUITE,
    mink_quantization_size=0.5,
    load_semantics=True,
)


In [20]:
PR_THRESHOLD = 25.0

pr_matches = []
dist_errors = []
angle_errors = []
times = []

for query in tqdm(query_dataset):
    t = time()
    output = pipe.infer(query)
    times.append(time() - t)
    dist_error, angle_error = compute_error(output["pose"], query["pose"])
    pr_matches.append(dist_error < PR_THRESHOLD)
    dist_errors.append(dist_error)
    angle_errors.append(angle_error)

times = times[1:]  # the first query is always slower

  0%|          | 4/1310 [00:00<01:55, 11.33it/s]

100%|██████████| 1310/1310 [01:48<00:00, 12.11it/s]


In [21]:
print(f"Recall@1: {(np.mean(pr_matches))*100:.2f}")
print(f"Mean distance error: {np.mean(dist_errors):.2f}, mean angle error: {np.mean(angle_errors):.2f}")
print(f"Median distance error: {np.median(dist_errors):.2f}, median angle error: {np.median(angle_errors):.2f}")

Recall@1: 70.69
Mean distance error: 42.83, mean angle error: 8.94
Median distance error: 1.27, median angle error: 5.23


In [22]:
print(f"Mean inference time: {np.mean(times)*1000:.2f} ms, median inference time: {np.median(times)*1000:.2f} ms")

Mean inference time: 42.70 ms, median inference time: 42.57 ms
