In [1]:
%load_ext autoreload
%autoreload 2

# PlaceRecognitionPipeline invariant to weather changes

A module that implements an algorithm for generating global vector representations of multimodal data outdoors, invariant to changes in weather conditions and seasons.

This invariance is achieved by using semantic segmentation masks and highlighting special elements of the scene.

In [2]:
from copy import copy
from time import time
from pathlib import Path

import faiss

import pandas as pd

from tqdm import tqdm
import matplotlib.pyplot as plt

from hydra.utils import instantiate
import numpy as np
from omegaconf import OmegaConf
from scipy.spatial.transform import Rotation
import torch
from torch.utils.data import DataLoader

from opr.datasets import NCLTDataset
from opr.pipelines.place_recognition import PlaceRecognitionPipeline

In [3]:
def pose_to_matrix(pose):
    """From the 6D poses in the [tx ty tz qx qy qz qw] format to 4x4 pose matrices."""
    position = pose[:3]
    orientation_quat = pose[3:]
    rotation = Rotation.from_quat(orientation_quat)
    pose_matrix = np.eye(4)
    pose_matrix[:3,:3] = rotation.as_matrix()
    pose_matrix[:3,3] = position
    return pose_matrix


def compute_error(estimated_pose, gt_pose):
    """For the 6D poses in the [tx ty tz qx qy qz qw] format."""
    estimated_pose = pose_to_matrix(estimated_pose)
    gt_pose = pose_to_matrix(gt_pose)
    error_pose = np.linalg.inv(estimated_pose) @ gt_pose
    dist_error = np.sum(error_pose[:3, 3]**2) ** 0.5
    r = Rotation.from_matrix(error_pose[:3, :3])
    rotvec = r.as_rotvec()
    angle_error = (np.sum(rotvec**2)**0.5) * 180 / np.pi
    angle_error = abs(90 - abs(angle_error-90))
    return dist_error, angle_error


You can **download the dataset**:

- Kaggle:
  - [NCLT_OpenPlaceRecognition](https://www.kaggle.com/datasets/creatorofuniverses/nclt-iprofi-hack-23)
- Hugging Face:
  - [NCLT_OpenPlaceRecognition](https://huggingface.co/datasets/OPR-Project/NCLT_OpenPlaceRecognition)

To **download the model weights**, run the following command:

```bash
wget -O ../../weights/place_recognition/multimodal_semantic_with_soc_outdoor_nclt.pth https://huggingface.co/OPR-Project/PlaceRecognition-NCLT/resolve/main/multimodal_semantic_with_soc_outdoor_nclt.pth
```


In [4]:
!wget -O ../../weights/place_recognition/multimodal_semantic_with_soc_outdoor_nclt.pth https://huggingface.co/OPR-Project/PlaceRecognition-NCLT/resolve/main/multimodal_semantic_with_soc_outdoor_nclt.pth

--2025-04-18 15:01:30--  https://huggingface.co/OPR-Project/PlaceRecognition-NCLT/resolve/main/multimodal_semantic_with_soc_outdoor_nclt.pth
Resolving huggingface.co (huggingface.co)... 18.239.50.49, 18.239.50.16, 18.239.50.103, ...
Connecting to huggingface.co (huggingface.co)|18.239.50.49|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs-us-1.hf.co/repos/e8/30/e8306844a097b119f688c0cfcf564a9f584f52c28b0d3c5b11e560cb0c3e7eeb/36a58cec8b5434ade952afb95b565f8dee8e126cb24e36403648acbf2dda7bf6?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27multimodal_semantic_with_soc_outdoor_nclt.pth%3B+filename%3D%22multimodal_semantic_with_soc_outdoor_nclt.pth%22%3B&Expires=1744992090&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc0NDk5MjA5MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmhmLmNvL3JlcG9zL2U4LzMwL2U4MzA2ODQ0YTA5N2IxMTlmNjg4YzBjZmNmNTY0YTlmNTg0ZjUyYzI4YjBkM2M1YjExZTU2MGNiMGMzZTdlZWIvMzZhN

In [5]:
DATASET_ROOT = "/home/docker_opr/Datasets/OpenPlaceRecognition/NCLT_preprocessed"  # change to your dataset path

SEMANTIC_ANNO = "/home/docker_opr/OpenPlaceRecognition/configs/dataset/anno/oneformer.yaml"

SENSOR_SUITE = ["image_Cam5", "image_Cam2", "mask_Cam5", "mask_Cam2", "pointcloud_lidar"]

BATCH_SIZE = 32
NUM_WORKERS = 4
DEVICE = "cuda"

MODEL_CONFIG_PATH = "../../configs/model/place_recognition/multimodal_semantic_with_soc_outdoor.yaml"
WEIGHTS_PATH = "../../weights/place_recognition/multimodal_semantic_with_soc_outdoor_nclt.pth"

In [6]:
TRACK_LIST = sorted([str(subdir.name) for subdir in Path(DATASET_ROOT).iterdir() if subdir.is_dir()])
print(f"Found {len(TRACK_LIST)} tracks")
print(TRACK_LIST)
print("WARNING: track list limited")
TRACK_LIST = TRACK_LIST[:2]
print(TRACK_LIST)

Found 10 tracks
['2012-01-08', '2012-01-22', '2012-02-12', '2012-02-18', '2012-03-31', '2012-05-26', '2012-08-04', '2012-10-28', '2012-11-04', '2012-12-01']
['2012-01-08', '2012-01-22']


## Init model

In [7]:

model_config = OmegaConf.load(MODEL_CONFIG_PATH)
model = instantiate(model_config)
model.load_state_dict(torch.load(WEIGHTS_PATH))
model = model.to(DEVICE)
model.eval();



## Calculate descriptors for databases

In [8]:
semantic_anno_cfg = OmegaConf.load(SEMANTIC_ANNO)
dataset = NCLTDataset(
    dataset_root=DATASET_ROOT,
    subset="test",
    data_to_load=SENSOR_SUITE,
    pointcloud_quantization_size=0.5,
    max_point_distance=None,
    load_soc=True,
    anno=semantic_anno_cfg,
    top_k_soc=5,
)
dataloader = DataLoader(
    dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    collate_fn=dataset.collate_fn,
)


In [9]:
descriptors = []
with torch.no_grad():
    for batch in tqdm(dataloader):
        batch = {k: v.to(DEVICE) for k, v in batch.items()}
        final_descriptor = model(batch)["final_descriptor"]
        descriptors.append(final_descriptor.detach().cpu().numpy())

descriptors = np.concatenate(descriptors, axis=0)

  0%|          | 0/86 [00:00<?, ?it/s]

100%|██████████| 86/86 [01:58<00:00,  1.38s/it]


In [10]:
dataset_df = dataset.dataset_df

for track, indices in dataset_df.groupby("track").groups.items():
    track_descriptors = descriptors[indices]
    track_index = faiss.IndexFlatL2(track_descriptors.shape[1])
    track_index.add(track_descriptors)
    faiss.write_index(track_index, f"{DATASET_ROOT}/{track}/index.faiss")
    print(f"Saved index {DATASET_ROOT}/{track}/index.faiss")


Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/NCLT_preprocessed/2012-01-08/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/NCLT_preprocessed/2012-01-22/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/NCLT_preprocessed/2012-02-12/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/NCLT_preprocessed/2012-02-18/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/NCLT_preprocessed/2012-03-31/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/NCLT_preprocessed/2012-05-26/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/NCLT_preprocessed/2012-08-04/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/NCLT_preprocessed/2012-10-28/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/NCLT_preprocessed/2012-11-04/index.faiss
Saved index /home/docker_opr/Datasets/OpenPlaceRecognition/NCLT_preprocessed/2012-12-01/index.faiss


# Test

In [11]:
PR_THRESHOLD = 25.0

test_csv = pd.read_csv(Path(DATASET_ROOT) / "test.csv", index_col=0)

all_recalls = []
all_mean_dist_errors = []
all_mean_angle_errors = []
all_median_dist_errors = []
all_median_angle_errors = []
all_times = []

for db_track in TRACK_LIST:
    pipe = PlaceRecognitionPipeline(
        database_dir=Path(DATASET_ROOT) / db_track,
        model=model,
        device=DEVICE,
    )
    for query_track in TRACK_LIST:
        if db_track == query_track:
            continue
        query_dataset = copy(dataset)
        query_dataset.dataset_df = query_dataset.dataset_df[query_dataset.dataset_df["track"] == query_track]
        query_df = pd.read_csv(Path(DATASET_ROOT) / query_track / "track.csv", index_col=0)

        # filter out only test subset
        query_df = query_df[query_df['image'].isin(query_dataset.dataset_df['image'])].reset_index(drop=True)
        # and do not forget to change the database_df in the pipeline
        pipe.database_df = pipe.database_df[pipe.database_df['image'].isin(test_csv['image'])].reset_index(drop=True)

        pr_matches = []
        dist_errors = []
        angle_errors = []
        times = []

        true_pairs = []
        false_pairs = []

        for q_i, query in tqdm(enumerate(query_dataset)):
            query["pose"] = query_df.iloc[q_i][["tx", "ty", "tz", "qx", "qy", "qz", "qw"]].to_numpy()
            t = time()
            output = pipe.infer(query)
            times.append(time() - t)
            dist_error, angle_error = compute_error(output["pose"], query["pose"])
            pr_matches.append(dist_error < PR_THRESHOLD)
            dist_errors.append(dist_error)
            angle_errors.append(angle_error)
            if dist_error < 10:
                true_pairs.append((q_i, output["idx"]))
            elif dist_error > 100:
                false_pairs.append((q_i, output["idx"]))

        all_recalls.append(np.mean(pr_matches))
        all_mean_dist_errors.append(np.mean(dist_errors))
        all_mean_angle_errors.append(np.mean(angle_errors))
        all_median_dist_errors.append(np.median(dist_errors))
        all_median_angle_errors.append(np.median(angle_errors))
        all_times.extend(times[1:]) # drop the first iteration cause it is always slower

275it [00:39,  7.03it/s]
331it [00:43,  7.56it/s]


In [12]:
np.array(all_recalls).mean()


0.9313540236198847

In [13]:
np.mean(all_mean_dist_errors)

11.34987849526576

In [14]:
np.mean(all_median_dist_errors)


4.616232466285878

In [15]:
results_str = f"""Average Recall@1: {np.mean(all_recalls)*100:.2f}
Average mean dist error: {np.mean(all_mean_dist_errors):.2f}
Average mean angle error: {np.mean(all_mean_angle_errors):.2f}
Average median dist error: {np.mean(all_median_dist_errors):.2f}
Average median angle error: {np.mean(all_median_angle_errors):.2f}
"""

In [16]:
print(results_str)

Average Recall@1: 93.14
Average mean dist error: 11.35
Average mean angle error: 11.52
Average median dist error: 4.62
Average median angle error: 6.39

