In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import matplotlib.pyplot as plt


In [3]:
import pprint
import sys
from datetime import datetime
from pathlib import Path
from typing import Dict, Literal

import torch
import hydra
import wandb
from hydra.utils import instantiate
from loguru import logger
from omegaconf import DictConfig, OmegaConf
from opr.datasets.dataloader_factory import make_dataloaders
from opr.trainers.place_recognition import MultimodalPlaceRecognitionTrainer
from opr.utils import set_seed
from torch.utils.data import DataLoader
from opr.datasets.itlp import ITLPCampus

INFO:albumentations.check_version:A new version of Albumentations is available: 1.4.24 (you have 1.4.10). Upgrade using: pip install --upgrade albumentations


In [4]:
from hydra import compose, initialize
from omegaconf import OmegaConf

with initialize(version_base=None, config_path="../../configs"):
    cfg = compose(config_name="finetune_itlp_multimodal_with_soc_outdoor")

print(OmegaConf.to_yaml(cfg))

loss:
  _target_: opr.losses.BatchHardTripletMarginLoss
  margin: 0.2
optimizer:
  _target_: torch.optim.AdamW
  _convert_: all
  lr: 0.0001
  weight_decay: 0.0001
sampler:
  _target_: opr.samplers.BatchSampler
  batch_size: 32
  batch_size_limit: 64
  batch_expansion_rate: 1.4
  max_batches: null
  positives_per_group: 2
  seed: ${seed}
  drop_last: true
scheduler:
  _target_: torch.optim.lr_scheduler.MultiStepLR
  gamma: 0.1
  milestones:
  - 5
wandb:
  disabled: false
  project: OPR_MODEL_ZOO
modalities_weights:
  image: 1.0
  cloud: 1.0
  semantic: 1.0
  text: null
  final_descriptor: 1.0
debug: false
device: cuda
seed: 3121999
num_workers: 2
checkpoints_dir: checkpoints
exp_name: finetune_itlp_multimodal_semantic
epochs: 10
batch_expansion_threshold: 0.7
model:
  _target_: opr.models.place_recognition.base.LateFusionModel
  image_module:
    _target_: opr.models.place_recognition.base.ImageModel
    backbone:
      _target_: opr.modules.feature_extractors.ResNet18FPNFeatureExtract

You can download the dataset:

- Kaggle:
  - [ITLP Campus Outdoor](https://www.kaggle.com/datasets/alexandermelekhin/itlp-campus-outdoor)
- Hugging Face:
  - [ITLP Campus Outdoor](https://huggingface.co/datasets/OPR-Project/ITLP-Campus-Outdoor)


In [None]:
DATASET_ROOT = "/home/docker_opr/Datasets/OpenPlaceRecognition/itlp_campus_outdoor"

## Init wandb

In [6]:
if not cfg.debug and not cfg.wandb.disabled:
    config_dict = OmegaConf.to_container(cfg, resolve=True, throw_on_missing=True)
    wandb.init(
        name=cfg.exp_name,
        project=cfg.wandb.project,
        settings=wandb.Settings(start_method="thread"),
        config=config_dict,
    )
    run_name = wandb.run.name
else:
    run_name = "debug"

checkpoints_dir = (
    Path(cfg.checkpoints_dir) / f"{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}_{run_name}"
)
if not checkpoints_dir.exists():
    checkpoints_dir.mkdir(parents=True)

set_seed(seed=cfg.seed, make_deterministic=False)  # we cannot use determenistic operators here :(
print(f"=> Seed: {cfg.seed}")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mmelekhin-aa-work[0m. Use [1m`wandb login --relogin`[0m to force relogin


=> Seed: 3121999


## Init dataloaders

In [7]:
sensors_cfg = OmegaConf.load("/home/docker_opr/OpenPlaceRecognition/configs/dataset/sensors_cfg/husky.yaml")
anno_cfg = OmegaConf.load("/home/docker_opr/OpenPlaceRecognition/configs/dataset/anno/oneformer.yaml")

train_dataset = ITLPCampus(
    dataset_root=DATASET_ROOT,
    subset="train",
    csv_file="train.csv",
    sensors=["front_cam", "back_cam", "lidar"],
    load_semantics=False,
    load_soc=True,
    sensors_cfg=sensors_cfg,
    anno=anno_cfg,
)
val_dataset = ITLPCampus(
    dataset_root=DATASET_ROOT,
    subset="val",
    csv_file="val.csv",
    sensors=["front_cam", "back_cam", "lidar"],
    load_semantics=False,
    load_soc=True,
    sensors_cfg=sensors_cfg,
    anno=anno_cfg,
)
test_dataset = ITLPCampus(
    dataset_root=DATASET_ROOT,
    subset="test",
    csv_file="test.csv",
    sensors=["front_cam", "back_cam", "lidar"],
    load_semantics=False,
    load_soc=True,
    sensors_cfg=sensors_cfg,
    anno=anno_cfg,
)
# test_dataset.dataset_df = test_dataset.dataset_df[test_dataset.dataset_df["track"].isin(["05_2023-08-15-day", "07_2023-10-04-day"])].reset_index(drop=True)

no_masks = []
no_masks_filenames = []

for index, row in train_dataset.dataset_df.iterrows():
    mask_path = f"{DATASET_ROOT}/{row['track']}/masks/back_cam/{row['back_cam_ts']}.png"
    filename = f"{row['back_cam_ts']}.png"
    if not Path(mask_path).exists():
        no_masks.append(index)
        no_masks_filenames.append(filename)

train_dataset.dataset_df.drop(no_masks, inplace=True)
train_dataset.dataset_df.reset_index(inplace=True)

train_sampler = instantiate(cfg.sampler, dataset=train_dataset)
val_sampler = instantiate(cfg.sampler, dataset=val_dataset)

dataloaders = {}
dataloaders["train"] = DataLoader(
    train_dataset,
    batch_sampler=train_sampler,
    collate_fn=train_dataset.collate_fn,
    num_workers=cfg.num_workers,
    pin_memory=True,
)
dataloaders["val"] = DataLoader(
    val_dataset,
    batch_sampler=val_sampler,
    collate_fn=val_dataset.collate_fn,
    num_workers=cfg.num_workers,
    pin_memory=True,
)
dataloaders["test"] = DataLoader(
    test_dataset,
    batch_size=cfg.sampler.batch_size_limit,
    collate_fn=test_dataset.collate_fn,
    num_workers=cfg.num_workers,
    pin_memory=True,
)

In [8]:
len(dataloaders["test"].dataset.dataset_df)

610

## Init loss

In [9]:
loss_fn = instantiate(cfg.loss)

## Init model

In [10]:
model = instantiate(cfg.model)

# load pretrained NCLT checkpoint
ckpt = torch.load("/home/docker_opr/OpenPlaceRecognition/weights/place_recognition/multimodal_with_soc_outdoor_nclt.pth")
model.load_state_dict(ckpt)

INFO:faiss.loader:Loading faiss with AVX2 support.
INFO:faiss.loader:Could not load library with AVX2 support due to:
ModuleNotFoundError("No module named 'faiss.swigfaiss_avx2'")
INFO:faiss.loader:Loading faiss.
INFO:faiss.loader:Successfully loaded faiss.


<All keys matched successfully>

## Init optimizer and scheduler

In [11]:
optimizer = instantiate(cfg.optimizer, params=model.parameters())
scheduler = instantiate(cfg.scheduler, optimizer=optimizer)

In [12]:
trainer = MultimodalPlaceRecognitionTrainer(
    modalities_weights=cfg.modalities_weights,
    checkpoints_dir=checkpoints_dir,
    model=model,
    loss_fn=loss_fn,
    optimizer=optimizer,
    scheduler=scheduler,
    batch_expansion_threshold=cfg.batch_expansion_threshold,
    wandb_log=(not cfg.debug and not cfg.wandb.disabled),
    device=cfg.device,
)

In [13]:
trainer.train(epochs=cfg.epochs, train_dataloader=dataloaders["train"], val_dataloader=dataloaders["val"], test_dataloader=dataloaders["test"])

[32m2024-12-27 16:37:43.446[0m | [1mINFO    [0m | [36mopr.trainers.place_recognition.unimodal[0m:[36mtrain[0m:[36m113[0m - [1m=====> Epoch:   1/10:[0m
[32m2024-12-27 16:37:43.448[0m | [1mINFO    [0m | [36mopr.trainers.place_recognition.multimodal[0m:[36m_loop_epoch[0m:[36m34[0m - [1m=> Train stage:[0m
[32m2024-12-27 16:42:49.546[0m | [1mINFO    [0m | [36mopr.trainers.place_recognition.multimodal[0m:[36m_loop_epoch[0m:[36m83[0m - [1mTrain time: 05:06[0m
[32m2024-12-27 16:42:49.549[0m | [1mINFO    [0m | [36mopr.trainers.place_recognition.multimodal[0m:[36m_loop_epoch[0m:[36m84[0m - [1mTrain stats: {'image': {'loss': 0.20206407147955585, 'avg_embedding_norm': 4.351874698292125, 'num_triplets': 32.0, 'num_non_zero_triplets': 27.246753246753247, 'non_zero_rate': 0.851461038961039, 'max_pos_pair_dist': 0.9376843811629655, 'max_neg_pair_dist': 0.9481883799874937, 'mean_pos_pair_dist': 0.702401421286843, 'mean_neg_pair_dist': 0.7570957324721597, '

In [14]:
best_ckpt = torch.load(str(checkpoints_dir / "best.pth"))
trainer.model.load_state_dict(best_ckpt["model_state_dict"])

<All keys matched successfully>

In [15]:
trainer.test(dataloaders["test"])

[32m2024-12-27 17:50:31.267[0m | [1mINFO    [0m | [36mopr.trainers.place_recognition.unimodal[0m:[36mtest[0m:[36m172[0m - [1m=> Test stage:[0m
[32m2024-12-27 17:51:41.558[0m | [34m[1mDEBUG   [0m | [36mopr.trainers.place_recognition.unimodal[0m:[36mtest[0m:[36m194[0m - [34m[1mTest embeddings: (610, 768)[0m
[32m2024-12-27 17:51:41.844[0m | [1mINFO    [0m | [36mopr.trainers.place_recognition.unimodal[0m:[36mtest[0m:[36m235[0m - [1mTest time: 01:10[0m
[32m2024-12-27 17:51:41.847[0m | [1mINFO    [0m | [36mopr.trainers.place_recognition.unimodal[0m:[36mtest[0m:[36m236[0m - [1mMean Recall@N:
[0.99619714 0.99891757 1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.        ][0m
[32m2024-12-27 17:51:41.848[0m | [1mINFO    [0m | [36mopr.trainers.place_recognition.unimodal[0m:[3