In [1]:
%load_ext autoreload
%autoreload 2

from pathlib import Path
from pprint import pformat
import sys
sys.path.append('../')

from hloc import extract_features, match_features, localize_dma, visualization, pairs_from_exhaustive

# Pipeline for indoor localization

## Setup
Here we declare the paths to the dataset, image pairs, and we choose the feature extractor and the matcher. You need to download the [InLoc dataset](https://www.visuallocalization.net/datasets/) and put it in `datasets/inloc/`, or change the path.

In [2]:
data_folder = "drone_2"

dataset = Path(f'../datasets/{data_folder}/')  # change this if your dataset is somewhere else
images = Path(f'../datasets/{data_folder}/images')

outputs = Path(f'../outputs/{data_folder}/')  # where everything will be saved
results = outputs / 'InLoc_hloc_superpoint+superglue_netvlad40.txt'  # the result file

loc_pairs = outputs / 'pairs-query.txt'  # top 40 retrieved by NetVLAD
features = outputs / 'features.h5'
matches = outputs / 'matches.h5'

In [3]:
# list the standard configurations available
print(f'Configs for feature extractors:\n{pformat(extract_features.confs)}')
print(f'Configs for feature matchers:\n{pformat(match_features.confs)}')

Configs for feature extractors:
{'d2net-ss': {'model': {'multiscale': False, 'name': 'd2net'},
              'output': 'feats-d2net-ss',
              'preprocessing': {'grayscale': False, 'resize_max': 1600}},
 'dir': {'model': {'name': 'dir'},
         'output': 'global-feats-dir',
         'preprocessing': {'resize_max': 1024}},
 'disk': {'model': {'max_keypoints': 5000, 'name': 'disk'},
          'output': 'feats-disk',
          'preprocessing': {'grayscale': False, 'resize_max': 1600}},
 'netvlad': {'model': {'name': 'netvlad'},
             'output': 'global-feats-netvlad',
             'preprocessing': {'resize_max': 1024}},
 'openibl': {'model': {'name': 'openibl'},
             'output': 'global-feats-openibl',
             'preprocessing': {'resize_max': 1024}},
 'r2d2': {'model': {'max_keypoints': 5000, 'name': 'r2d2'},
          'output': 'feats-r2d2-n5000-r1024',
          'preprocessing': {'grayscale': False, 'resize_max': 1024}},
 'sift': {'model': {'name': 'dog'},
    

In [4]:
# pick one of the configurations for extraction and matching
# you can also simply write your own here!
feature_conf = extract_features.confs['superpoint_inloc']
matcher_conf = match_features.confs['superglue']

## Extract local features for database and query images

In [5]:
#feature_path = extract_features.main(feature_conf, dataset, pairs)
#extract_features.main(feature_conf, images, image_list=references, feature_path=features)
#pairs_from_exhaustive.main(sfm_pairs, image_list=references)
#match_features.main(matcher_conf, sfm_pairs, features=features, matches=matches);

references = [p.relative_to(images).as_posix() for p in (images / 'mapping/altitude_4/').iterdir()]

features = extract_features.main(feature_conf, images, 
                      image_list=references, 
                      feature_path=features)
print(features)

[2022/12/17 14:20:53 hloc INFO] Extracting local features with configuration:
{'model': {'max_keypoints': 4096, 'name': 'superpoint', 'nms_radius': 4},
 'output': 'feats-superpoint-n4096-r1600',
 'preprocessing': {'grayscale': True, 'resize_max': 1600}}


Loaded SuperPoint model


100%|███████████████████████████████████████████████████| 12/12 [00:02<00:00,  5.70it/s]
[2022/12/17 14:20:57 hloc INFO] Finished exporting features.


../outputs/drone_2/features.h5


## Match the query images
Here we assume that the localization pairs are already computed using image retrieval (NetVLAD). To generate new pairs from your own global descriptors, have a look at `hloc/pairs_from_retrieval.py`. These pairs are also used for the localization - see below.

In [6]:
#query_image_path = 'query/DJI_0057.JPG'

queries = [p.relative_to(images).as_posix() for p in (images / 'query/').iterdir()]

features = extract_features.main(feature_conf, images, 
                      image_list=queries, 
                      feature_path=features)

pairs_from_exhaustive.main(loc_pairs, image_list=queries, ref_list=references)

#match_path = match_features.main(matcher_conf, loc_pairs, feature_conf['output'], outputs)
print(loc_pairs, features, matches)
match_features.main(matcher_conf, loc_pairs, features=features, matches=matches)

[2022/12/17 14:21:02 hloc INFO] Extracting local features with configuration:
{'model': {'max_keypoints': 4096, 'name': 'superpoint', 'nms_radius': 4},
 'output': 'feats-superpoint-n4096-r1600',
 'preprocessing': {'grayscale': True, 'resize_max': 1600}}


Loaded SuperPoint model


100%|█████████████████████████████████████████████████████| 6/6 [00:00<00:00,  7.11it/s]
[2022/12/17 14:21:02 hloc INFO] Finished exporting features.
[2022/12/17 14:21:02 hloc INFO] Found 72 pairs.
[2022/12/17 14:21:02 hloc INFO] Matching local features with configuration:
{'model': {'name': 'superglue',
           'sinkhorn_iterations': 50,
           'weights': 'outdoor'},
 'output': 'matches-superglue'}


../outputs/drone_2/pairs-query.txt ../outputs/drone_2/features.h5 ../outputs/drone_2/matches.h5
Loaded SuperGlue model ("outdoor" weights)


100%|███████████████████████████████████████████████████| 72/72 [00:08<00:00,  8.29it/s]
[2022/12/17 14:21:12 hloc INFO] Finished exporting matches.


PosixPath('../outputs/drone_2/matches.h5')

## Localize!
Perform hierarchical localization using the precomputed retrieval and matches. Different from when localizing with Aachen, here we do not need a 3D SfM model here: the dataset already has 3D lidar scans. The file `InLoc_hloc_superpoint+superglue_netvlad40.txt` will contain the estimated query poses.

In [13]:
localize_dma.main(
    dataset, loc_pairs, features, matches, results,
    skip_matches=20)  # skip database images with too few matches

[2022/12/17 14:27:28 hloc INFO] Starting localization...


['query/altitude=4.0_01361.jpg', 'query/altitude=4.0_01362.jpg', 'query/altitude=4.0_01363.jpg', 'query/altitude=4.0_01364.jpg', 'query/altitude=4.0_01365.jpg', 'query/altitude=4.0_01366.jpg']


  0%|                                                             | 0/6 [00:00<?, ?it/s]

../datasets/drone_2/images
0 0
../datasets/drone_2/images/poses/0_camera_info.txt





ValueError: cannot reshape array of size 9 into shape (4,4)

## Visualization
We parse the localization logs and for each query image plot matches and inliers with a few database images.

In [None]:
%matplotlib inline
visualization.visualize_loc(results, images, n=len(queries), top_k_db=1, seed=2)

## Visualize camera poses

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def get_query_camera_poses(result_path):
    query_poses = {}
    with open(result_path) as file:
        q_name = ""
        for line in file:
            line_strip = line.rstrip()
            if line_strip.islower() or line_strip.isupper():
                splitted_line = line_strip.split(" ")
                name = splitted_line[0]
                q_name = name
                query_poses[q_name] = {}
            else:
                Rt = np.zeros((4,4))
                rt = line_strip.split(" ")
                rt = [float(r) for r in rt]
                rt = np.array(rt).reshape((4,4))
                Rt = rt
                #print(Rt)
                query_poses[q_name]["Rt"] = Rt
                query_poses[q_name]["R"] = Rt[:3, :3]
                query_poses[q_name]["t"] = Rt[:3, -1]
    return query_poses

gallery_camera_poses = localize_dma.get_all_camera_poses(images)
query_est_poses = get_query_camera_poses(results)

gal_translations = np.array([gallery_camera_poses[key]["translation"] for key in gallery_camera_poses.keys()])
q_translations = np.array([query_est_poses[key]["t"] for key in query_est_poses.keys()])
#print(gallery_camera_poses)
#print(q_translations)

%matplotlib notebook

gx = gal_translations[:, 0]
gy = gal_translations[:, 1]
gz = gal_translations[:, 2]

qx = q_translations[:, 0]
qy = q_translations[:, 1]
qz = q_translations[:, 2]

fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(projection='3d')

ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z')
ax.scatter(gx, gz, gy, color="b")
ax.scatter(qx, qz, qy, color="r")
plt.show()


In [None]:
%matplotlib --list

In [None]:
from hloc.utils.camera_pose_visualizer import CameraPoseVisualizer

%matplotlib notebook

# argument : the minimum/maximum value of x, y, z
keys = np.array(list(gallery_camera_poses.keys()))
print(np.sort(keys))
gal_ext = np.array([gallery_camera_poses[key]["worldPose"] for key in np.sort(keys)])

q_ext = np.array([query_est_poses[key]["Rt"] for key in query_est_poses.keys()])

visualizer = CameraPoseVisualizer([-4, 3], [-6, 2], [-1, 1])

# argument : extrinsic matrix, color, scaled focal length(z-axis length of frame body of camera
for ext in gal_ext:
    print(ext)
#     flip_YZ = np.eye(4)
#     flip_YZ[1,1] = -1
#     flip_YZ[2,2] = -1
#     ext = ext @ flip_YZ
    visualizer.extrinsic2pyramid(ext, 'c', 0.5, 0.5)
    
print("\nQUERY POSES:")
for ext in q_ext:
    print(ext)
#     flip_YZ = np.eye(4)
#     flip_YZ[1,1] = -1
#     flip_YZ[2,2] = -1
#     ext = ext @ flip_YZ
    visualizer.extrinsic2pyramid(ext, 'r', 0.5, 0.5)
#print(np.eye(4))
#visualizer.extrinsic2pyramid(gal_ext[0], 'c', 10)
visualizer.show()

In [None]:
from platform import python_version

print(python_version())

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.patches as patches
from matplotlib.patches import ConnectionPatch

import cv2

img_idx = '00'
img_path = f'./images/mapping/rgb_1920x1440_000{img_idx}.jpeg'

query_inliers = np.load(f"../third_party/PatchNetVLAD/patchnetvlad/inlier_indices/drone_2/query/inlier_indices_rgb_1920x1440_000{img_idx}.npy", allow_pickle=True)
print(f'../datasets/drone_2/images/mapping/rgb_1920x1440_000{img_idx}.jpeg')
qheight, qwidth = cv2.imread(f'../datasets/drone_2/images/mapping/rgb_1920x1440_000{img_idx}.jpeg').shape[:2]
result_path = "../third_party/PatchNetVLAD/patchnetvlad/results/drone_2/PatchNetVLAD_predictions.txt"

db_images = []
with open(result_path) as file:
    for line in file:
        #print(line)
        if line.__contains__(img_path):
            t = line.rstrip()
            t = t.split(", ")[1]
            t = t.split("./")[-1]
            best_img = t.split("/")[-1].replace(".jpg", "")
            db_images.append(best_img)
print(db_images)
for index, img_name in enumerate(db_images[:3]):
    
    db_inliers = np.load(f"../third_party/PatchNetVLAD/patchnetvlad/inlier_indices/drone_2/index/inlier_indices_rgb_1920x1440_000{img_idx}.npy", allow_pickle=True)
    print(f'../datasets/drone_2/images/query/{img_name}.jpg')
    dbheight, dbwidth = cv2.imread(f'../datasets/drone_2/images/query/{img_name}.jpg').shape[:2]
    kp_height, kp_width = 480, 640 # height, width
    
    print(db_inliers.shape)
    
    p2_size=2
    p5_size=5
    p8_size=8
    
    patchdb10 = np.array([int((p2_size / kp_height) * dbheight), int((p2_size / kp_width) * dbwidth)])
    patchdb15 = np.array([int((p5_size / kp_height) * dbheight), int((p5_size / kp_width) * dbwidth)])
    patchdb18 = np.array([int((p8_size / kp_height) * dbheight), int((p8_size / kp_width) * dbwidth)])

    resized_db_path_sizes = np.array([patchdb10, patchdb15, patchdb18])
    
    patchq10 = np.array([int((p2_size / kp_height) * qheight), int((p2_size / kp_width) * qwidth)])
    patchq15 = np.array([int((p5_size / kp_height) * qheight), int((p5_size / kp_width) * qwidth)])
    patchq18 = np.array([int((p8_size / kp_height) * qheight), int((p8_size / kp_width) * qwidth)])

    resized_q_path_sizes = np.array([patchq10, patchq15, patchq18])
    print(resized_path_sizes)
    print(img_name)

    qp2, qp5, qp8 = query_inliers[index][0], query_inliers[index][1], query_inliers[index][2]
    dbp2, dbp5, dbp8 = db_inliers[index][0], db_inliers[index][1], db_inliers[index][2]
    
    print(qp2.shape, qp5.shape, qp8.shape)
    print(dbp2.shape, dbp5.shape, dbp8.shape)
    
    if qp2.shape[0] == 0 or qp5.shape[0] == 0 or qp8.shape[0] == 0:
        continue

    # convert points from one resolution to another
    resized_qp2 = np.array([np.array([int((kp[1] / kp_height) * qheight), int((kp[0] / kp_width) * qwidth)]) for kp in qp2])
    resized_qp5 = np.array([np.array([int((kp[1] / kp_height) * qheight), int((kp[0] / kp_width) * qwidth)]) for kp in qp5])
    resized_qp8 = np.array([np.array([int((kp[1] / kp_height) * qheight), int((kp[0] / kp_width) * qwidth)]) for kp in qp8])

    resized_dbp2 = np.array([np.array([int((kp[1] / kp_height) * dbheight), int((kp[0] / kp_width) * dbwidth)]) for kp in dbp2])
    resized_dbp5 = np.array([np.array([int((kp[1] / kp_height) * dbheight), int((kp[0] / kp_width) * dbwidth)]) for kp in dbp5])
    resized_dbp8 = np.array([np.array([int((kp[1] / kp_height) * dbheight), int((kp[0] / kp_width) * dbwidth)]) for kp in dbp8])
    

    fig, axs = plt.subplots(1,2, figsize=(8, 6))
    qimg = mpimg.imread(f'../datasets/drone_2/images/mapping/rgb_1920x1440_000{img_idx}.jpeg')
    axs[0].set_title(f'query: rgb_1920x1440_000{img_idx}', fontsize=8)
    axs[0].imshow(qimg)
    
    xyp2As, xyp2Bs, xyp5As, xyp5Bs, xyp8As, xyp8Bs = [], [], [], [], [], []
    
    for kp in resized_qp2:
        xyp2As.append((kp[1], kp[0]))
        # Create a Rectangle patch
        rect = patches.Rectangle((kp[1]-(resized_q_path_sizes[0][1]/2), kp[0]-(resized_q_path_sizes[0][0]/2)), resized_q_path_sizes[0][1], resized_q_path_sizes[0][0], linewidth=1, edgecolor='r', facecolor='none')

        # Add the patch to the Axes
        axs[0].add_patch(rect)
        
    for kp in resized_qp5:
        xyp5As.append((kp[1], kp[0]))
        # Create a Rectangle patch
        rect = patches.Rectangle((kp[1]-(resized_q_path_sizes[1][1]/2), kp[0]-(resized_q_path_sizes[1][0]/2)), resized_q_path_sizes[1][1], resized_q_path_sizes[1][0], linewidth=1, edgecolor='c', facecolor='none')

        # Add the patch to the Axes
        axs[0].add_patch(rect)
        
    for kp in resized_qp8:
        xyp8As.append((kp[1], kp[0]))
        # Create a Rectangle patch
        rect = patches.Rectangle((kp[1]-(resized_q_path_sizes[2][1]/2), kp[0]-(resized_q_path_sizes[2][0]/2)), resized_q_path_sizes[2][1], resized_q_path_sizes[2][0], linewidth=1, edgecolor='g', facecolor='none')

        # Add the patch to the Axes
        axs[0].add_patch(rect)
        
    
    axs[0].scatter(resized_qp2[:,1], resized_qp2[:,0], marker="o", c='r', s=2)
    axs[0].scatter(resized_qp5[:,1], resized_qp5[:,0], marker="o", c='c', s=2)
    axs[0].scatter(resized_qp8[:,1], resized_qp8[:,0], marker="o", c='g', s=2)


    dbimg = mpimg.imread(f'../datasets/drone_2/images/query/{img_name}.jpg')
    axs[1].set_title(f'db: {img_name}', fontsize=8)
    axs[1].imshow(dbimg)
    
    for kp in resized_dbp2:
        xyp2Bs.append((kp[1], kp[0]))
        rect = patches.Rectangle((kp[1]-(resized_db_path_sizes[0][1]/2), kp[0]-(resized_db_path_sizes[0][0]/2)), resized_db_path_sizes[0][1], resized_db_path_sizes[0][0], linewidth=1, edgecolor='r', facecolor='none')
        axs[1].add_patch(rect)
        
    for kp in resized_dbp5:
        xyp5Bs.append((kp[1], kp[0]))
        rect = patches.Rectangle((kp[1]-(resized_db_path_sizes[1][1]/2), kp[0]-(resized_db_path_sizes[1][0]/2)), resized_db_path_sizes[1][1], resized_db_path_sizes[1][0], linewidth=1, edgecolor='c', facecolor='none')
        axs[1].add_patch(rect)
        
    for kp in resized_dbp8:
        xyp8Bs.append((kp[1], kp[0]))
        rect = patches.Rectangle((kp[1]-(resized_db_path_sizes[2][1]/2), kp[0]-(resized_db_path_sizes[2][0]/2)), resized_db_path_sizes[2][1], resized_db_path_sizes[2][0], linewidth=1, edgecolor='g', facecolor='none')
        axs[1].add_patch(rect)
        
    axs[1].scatter(resized_dbp2[:,1], resized_dbp2[:,0], marker=",", c='r', s=2)
    axs[1].scatter(resized_dbp5[:,1], resized_dbp5[:,0], marker=",", c='c', s=2)
    axs[1].scatter(resized_dbp8[:,1], resized_dbp8[:,0], marker=",", c='g', s=2)
    
    axs[0].set_xticks([])
    axs[0].set_yticks([])
    axs[1].set_xticks([])
    axs[1].set_yticks([])
    
    plt.tight_layout()
    
    for xyIdx, xy in enumerate(xyp2As):
        con = ConnectionPatch(linewidth=0.5, xyA=xy, xyB=xyp2Bs[xyIdx], coordsA="data", coordsB="data",
                              axesA=axs[0], axesB=axs[1], color="red")
        axs[1].add_artist(con)
        
    for xyIdx, xy in enumerate(xyp5As):
        con = ConnectionPatch(linewidth=0.5, xyA=xy, xyB=xyp5Bs[xyIdx], coordsA="data", coordsB="data",
                              axesA=axs[0], axesB=axs[1], color="cyan")
        axs[1].add_artist(con)
        
    for xyIdx, xy in enumerate(xyp8As):
        con = ConnectionPatch(linewidth=0.5, xyA=xy, xyB=xyp8Bs[xyIdx], coordsA="data", coordsB="data",
                              axesA=axs[0], axesB=axs[1], color="green")
        axs[1].add_artist(con)

#print(resized_p2, p2)

In [None]:
# import importlib  
# pnvlad_feature_extract = importlib.import_module("third_party.Patch-NetVLAD.feature_extract")

# pnvlad_feature_extract()

#sys.path.append(str('../third_party/PatchNetVLAD'))
#%pip install faiss-gpu


#from third_party.PatchNetVLAD import feature_extract as pnv_feature_extract


#pnv_feature_extract.main()

#python feature_extract.py \
#  --config_path patchnetvlad/configs/performance.ini \
#  --dataset_file_path=pitts30k_imageNames_index.txt \
#  --dataset_root_dir=/path/to/your/pitts/dataset \
#  --output_features_dir patchnetvlad/output_features/pitts30k_index