# Demo EfficientLoFTR on a single pair of images

This notebook shows how to use the eloftr matcher with different model type and numerical precision on the pretrained weights.

In [1]:
%load_ext autoreload
%autoreload 2

## Outdoor Example

We recommend using our pre-trained model for input in outdoor environments because our model has only been trained on MegaDepth, and there exists a domain gap between indoor and outdoor data.

In [2]:
# Load example images
import os
from copy import deepcopy
import torch
import cv2
import numpy as np
import matplotlib.cm as cm
from src_eloftr.utils.plotting import make_matching_figure
from src_eloftr.loftr import LoFTR, full_default_cfg, opt_default_cfg, reparameter
from PIL import Image


# You can choose model type in ['full', 'opt']
model_type = 'full' # 'full' for best quality, 'opt' for best efficiency

# You can choose numerical precision in ['fp32', 'mp', 'fp16']. 'fp16' for best efficiency
precision = 'fp16' # Enjoy near-lossless precision with Mixed Precision (MP) / FP16 computation if you have a modern GPU (recommended NVIDIA architecture >= SM_70).

# You can also change the default values like thr. and npe (based on input image size)

if model_type == 'full':
    _default_cfg = deepcopy(full_default_cfg)
elif model_type == 'opt':
    _default_cfg = deepcopy(opt_default_cfg)
    
if precision == 'mp':
    _default_cfg['mp'] = True
elif precision == 'fp16':
    _default_cfg['half'] = True
    
print(_default_cfg)
matcher = LoFTR(config=_default_cfg)

matcher.load_state_dict(torch.load("src_eloftr/weights/eloftr_outdoor.ckpt")['state_dict'])
matcher = reparameter(matcher) # no reparameterization will lead to low performance

if precision == 'fp16':
    matcher = matcher.half()

matcher = matcher.eval().cuda()

{'backbone_type': 'RepVGG', 'align_corner': False, 'resolution': (8, 1), 'fine_window_size': 8, 'mp': False, 'replace_nan': True, 'half': True, 'backbone': {'block_dims': [64, 128, 256]}, 'coarse': {'d_model': 256, 'd_ffn': 256, 'nhead': 8, 'layer_names': ['self', 'cross', 'self', 'cross', 'self', 'cross', 'self', 'cross'], 'agg_size0': 4, 'agg_size1': 4, 'no_flash': False, 'rope': True, 'npe': [832, 832, 832, 832]}, 'match_coarse': {'thr': 0.2, 'border_rm': 2, 'dsmax_temperature': 0.1, 'skip_softmax': False, 'fp16matmul': False, 'train_coarse_percent': 0.2, 'train_pad_num_gt_min': 200}, 'match_fine': {'local_regress_temperature': 10.0, 'local_regress_slicedim': 8}}


In [9]:
def extract_correspondences_original(img0_pth, img1_pth, matcher=matcher):

    img0_raw = cv2.imread(img0_pth, cv2.IMREAD_GRAYSCALE)
    img1_raw = cv2.imread(img1_pth, cv2.IMREAD_GRAYSCALE)

    # img0_raw = cv2.resize(img0_raw, (img0_raw.shape[1]//32*32, img0_raw.shape[0]//32*32))  # input size shuold be divisible by 32
    # img1_raw = cv2.resize(img1_raw, (img1_raw.shape[1]//32*32, img1_raw.shape[0]//32*32))
    img0_raw = cv2.resize(img0_raw, (224, 224))  # input size shuold be divisible by 32
    img1_raw = cv2.resize(img1_raw, (224, 224))

    if precision == 'fp16':
        img0 = torch.from_numpy(img0_raw)[None][None].half().cuda() / 255.
        img1 = torch.from_numpy(img1_raw)[None][None].half().cuda() / 255.
        # img1 = torch.concat((img1, img1), 0)
        # img0 = torch.from_numpy(img0_raw)[None].half().permute(0,3,1,2).cuda() / 255. # self.half() is equivalent to self.to(torch.float16)
        # img1 = torch.from_numpy(img1_raw)[None].half().permute(0,3,1,2).cuda() / 255.

    batch = {'image0': img0, 'image1': img1}

    # Inference with EfficientLoFTR and get prediction
    with torch.no_grad():
        if precision == 'mp':
            with torch.autocast(enabled=True, device_type='cuda'):
                matcher(batch)
        else:
            matcher(batch)
        mkpts0 = batch['mkpts0_f'].cpu().numpy()
        mkpts1 = batch['mkpts1_f'].cpu().numpy()
        mconf = batch['mconf'].cpu().numpy()
    
    # if model_type == 'opt':
    #     print(mconf.max())
    #     mconf = (mconf - min(20.0, mconf.min())) / (max(30.0, mconf.max()) - min(20.0, mconf.min()))

    # color = cm.jet(mconf)
    # text = [
    #     'LoFTR',
    #     'Matches: {}'.format(len(mkpts0)),
    # ]

    # fig = make_matching_figure(img0_raw, img1_raw, mkpts0, mkpts1, color, text=text)

    return len(mkpts0) # number of corresponeces between 2 images


def extract_correspondences(img0_pth, img1_pth, matcher=matcher):
    '''
    input templates as rgb images ( not transform) and sam proposals as rgb image
    output: max number of corres outof 42 templates- jsut 42 templates to save time
    '''

    img0_raw = torch.tensor(np.array(Image.open(img0_pth))/255.0).permute(2,0,1).cuda()
    img1_raw = torch.tensor(np.array(Image.open(img1_pth))/255.0).permute(2,0,1).cuda()

    img0_raw = 0.2989 * img0_raw[0] + 0.5870 * img0_raw[1] + 0.114 * img0_raw[2]
    img1_raw = 0.2989 * img1_raw[0] + 0.5870 * img1_raw[1] + 0.114 * img1_raw[2]

    # img0_raw = cv2.imread(img0_pth, cv2.IMREAD_GRAYSCALE)
    # img1_raw = cv2.imread(img1_pth, cv2.IMREAD_GRAYSCALE)

    # img0_raw = cv2.resize(img0_raw, (img0_raw.shape[1]//32*32, img0_raw.shape[0]//32*32))  # input size shuold be divisible by 32
    # img1_raw = cv2.resize(img1_raw, (img1_raw.shape[1]//32*32, img1_raw.shape[0]//32*32))

    if precision == 'fp16':
        img0 = img0_raw[None][None].half() # self.half() is equivalent to self.to(torch.float16)
        # img0 = torch.concat((img0, img0), 0)
        img1 = img1_raw[None][None].half()
        # img0 = torch.concatenate((img0, img0), dim = 0)
        # img1 = torch.concatenate((img1, img1), dim = 0)
        # img1 = torch.concat((img1, img1), 0)
        # img0 = torch.from_numpy(img0_raw)[None].half().permute(0,3,1,2).cuda() / 255. # self.half() is equivalent to self.to(torch.float16)
        # img1 = torch.from_numpy(img1_raw)[None].half().permute(0,3,1,2).cuda() / 255.

    batch = {'image0': img0, 'image1': img1}

    # Inference with EfficientLoFTR and get prediction
    with torch.no_grad():
        if precision == 'mp':
            with torch.autocast(enabled=True, device_type='cuda'):
                matcher(batch)
        else:
            matcher(batch)
        mkpts0 = batch['mkpts0_f'].cpu().numpy()
        mkpts1 = batch['mkpts1_f'].cpu().numpy()
        mconf = batch['mconf'].cpu().numpy()
    
    # if model_type == 'opt':
    #     print(mconf.max())
    #     mconf = (mconf - min(20.0, mconf.min())) / (max(30.0, mconf.max()) - min(20.0, mconf.min()))

    # color = cm.jet(mconf)
    # text = [
    #     'LoFTR',
    #     'Matches: {}'.format(len(mkpts0)),
    # ]

    # fig = make_matching_figure(np.array(Image.open(img0_pth)), np.array(Image.open(img1_pth)), mkpts0, mkpts1, color, text=text)

    return len(mkpts0) # number of corresponeces between 2 images


def extract_correspondences_batches(img0_pth_list, img1_pth_list, matcher=matcher):
    '''
    input templates as rgb images ( not transform) and sam proposals as rgb image
    output: max number of corres outof 42 templates- jsut 42 templates to save time
    '''

    img0_list = list()
    img1_list = list()
    for img0_pth in img0_pth_list:
        img0_raw = torch.tensor(np.array(Image.open(img0_pth))/255.0).permute(2,0,1).cuda()
        img0_raw = 0.2989 * img0_raw[0] + 0.5870 * img0_raw[1] + 0.114 * img0_raw[2]
        img0_list.append(img0_raw[None].half())

    for img1_pth in img1_pth_list:
        img1_raw = torch.tensor(np.array(Image.open(img1_pth))/255.0).permute(2,0,1).cuda()
        img1_raw = 0.2989 * img1_raw[0] + 0.5870 * img1_raw[1] + 0.114 * img1_raw[2]
        img1_list.append(img1_raw[None].half())

    # img0_raw = cv2.imread(img0_pth, cv2.IMREAD_GRAYSCALE)
    # img1_raw = cv2.imread(img1_pth, cv2.IMREAD_GRAYSCALE)

    # img0_raw = cv2.resize(img0_raw, (img0_raw.shape[1]//32*32, img0_raw.shape[0]//32*32))  # input size shuold be divisible by 32
    # img1_raw = cv2.resize(img1_raw, (img1_raw.shape[1]//32*32, img1_raw.shape[0]//32*32))


    batch = {'image0': img0, 'image1': img1}

    # Inference with EfficientLoFTR and get prediction
    with torch.no_grad():
        if precision == 'mp':
            with torch.autocast(enabled=True, device_type='cuda'):
                matcher(batch)
        else:
            matcher(batch)
        mkpts0 = batch['mkpts0_f'].cpu().numpy()
        mkpts1 = batch['mkpts1_f'].cpu().numpy()
        mconf = batch['mconf'].cpu().numpy()
    
    # if model_type == 'opt':
    #     print(mconf.max())
    #     mconf = (mconf - min(20.0, mconf.min())) / (max(30.0, mconf.max()) - min(20.0, mconf.min()))

    # color = cm.jet(mconf)
    # text = [
    #     'LoFTR',
    #     'Matches: {}'.format(len(mkpts0)),
    # ]

    # fig = make_matching_figure(np.array(Image.open(img0_pth)), np.array(Image.open(img1_pth)), mkpts0, mkpts1, color, text=text)

    return len(mkpts0) # number of corresponeces between 2 images


def extract_correspondences_original_batches(img0_pth_list, img1_pth_list, matcher=matcher):

    img0_list = list()
    img1_list = list()
    for img0_pth in img0_pth_list:
        img0_raw = cv2.imread(img0_pth, cv2.IMREAD_GRAYSCALE)
        # img0_raw = cv2.resize(img0_raw, (img0_raw.shape[1]//32*32, img0_raw.shape[0]//32*32))  # input size shuold be divisible by 32
        img0_raw = cv2.resize(img0_raw,(224,224))
        img0_raw = torch.from_numpy(img0_raw)[None].half().cuda() / 255.
        img0_list.append(img0_raw)

    for img1_pth in img1_pth_list:
        img1_raw = cv2.imread(img1_pth, cv2.IMREAD_GRAYSCALE)
        # img1_raw = cv2.resize(img1_raw, (img1_raw.shape[1]//32*32, img1_raw.shape[0]//32*32))
        img1_raw = cv2.resize(img1_raw, (224,224))
        img1_raw = torch.from_numpy(img1_raw)[None].half().cuda() / 255.
        img1_list.append(img1_raw)    

    img0 = torch.stack(img0_list)
    img1 = torch.stack(img1_list)

    batch = {'image0': img0, 'image1': img1}

    # Inference with EfficientLoFTR and get prediction
    with torch.no_grad():
        if precision == 'mp':
            with torch.autocast(enabled=True, device_type='cuda'):
                matcher(batch)
        else:
            matcher(batch)
        mkpts0 = batch['mkpts0_f'].cpu().numpy()
        mkpts1 = batch['mkpts1_f'].cpu().numpy()
        mconf = batch['mconf'].cpu().numpy()
    
    # if model_type == 'opt':
    #     print(mconf.max())
    #     mconf = (mconf - min(20.0, mconf.min())) / (max(30.0, mconf.max()) - min(20.0, mconf.min()))

    # color = cm.jet(mconf)
    # text = [
    #     'LoFTR',
    #     'Matches: {}'.format(len(mkpts0)),
    # ]

    # fig = make_matching_figure(img0_raw, img1_raw, mkpts0, mkpts1, color, text=text)

    return len(mkpts0) # number of corresponeces between 2 images

In [18]:
import glob
import os
import numpy as np
# Load example images

# crops = sorted(glob.glob("/home/cuong.van-dam/CuongVanDam/do_an_tot_nghiep/cnos/foundpose_analysis/daoliuzhao/crops/*.png"))
crops = ["foundpose_analysis/daoliuzhao/crops/crop1.png", "foundpose_analysis/daoliuzhao/crops/crop2.png", "foundpose_analysis/daoliuzhao/crops/crop3.png"]
templates = sorted(glob.glob("/home/cuong.van-dam/CuongVanDam/do_an_tot_nghiep/cnos/foundpose_analysis/daoliuzhao/templates/train_pbr/obj_000001_original/*.png"))

used_templates = templates[:3]
num_correspondences = extract_correspondences_original_batches(crops, used_templates)
num_correspondences
# print(f"max num correpodences: {max(num_corres_temp_list)}")
# print(f"top 5 max num correpodences: {num_corres_temp_list[np.argsort(num_corres_temp_list)][-5:]}")


60

In [14]:
import glob
import os
import numpy as np
# Load example images

# crops = sorted(glob.glob("/home/cuong.van-dam/CuongVanDam/do_an_tot_nghiep/cnos/foundpose_analysis/daoliuzhao/crops/*.png"))
# crops = sorted(glob.glob("/home/cuong.van-dam/CuongVanDam/do_an_tot_nghiep/cnos/foundpose_analysis/daoliuzhao/crops/*.png"))
crops = ["foundpose_analysis/daoliuzhao/crops/crop1.png", "foundpose_analysis/daoliuzhao/crops/crop2.png", "foundpose_analysis/daoliuzhao/crops/crop3.png"]
templates = sorted(glob.glob("/home/cuong.van-dam/CuongVanDam/do_an_tot_nghiep/cnos/foundpose_analysis/daoliuzhao/templates/train_pbr/obj_000001_original/*.png"))

used_templates = templates[:3]
num_corres_list = list()

for i in range(len(crops)):
    print("crop name: ", crops[i].split("/")[-1])
    num_corres_temp = extract_correspondences_original(crops[i], used_templates[i], matcher=matcher)
    print(f"Num correpodences: {num_corres_temp}")


crop name:  crop1.png
Num correpodences: 10
crop name:  crop2.png
Num correpodences: 38
crop name:  crop3.png
Num correpodences: 12


In [None]:
import glob
import os
import numpy as np
# Load example images

# crops = sorted(glob.glob("/home/cuong.van-dam/CuongVanDam/do_an_tot_nghiep/cnos/foundpose_analysis/daoliuzhao/crops/*.png"))
crops = ["/home/cuong.van-dam/CuongVanDam/do_an_tot_nghiep/cnos/xoa_3.png"]
templates = sorted(glob.glob("/home/cuong.van-dam/CuongVanDam/do_an_tot_nghiep/cnos/foundpose_analysis/daoliuzhao/templates/train_pbr/obj_000001_original/*.png"))

num_corres_list = list()

for crop in crops[0:1]:
    print("crop name: ", crop.split("/")[-1])
    num_corres_temp_list = np.array([extract_correspondences(crop, temp, matcher=matcher) for temp in templates])
    print(f"max num correpodences: {max(num_corres_temp_list)}")
    print(f"top 5 max num correpodences: {num_corres_temp_list[np.argsort(num_corres_temp_list)][-5:]}")


crop name:  xoa_3.png
max num correpodences: 82
top 5 max num correpodences: [50 54 56 64 82]


In [10]:
original_list = [1, 5, 7, 9, 4, 10, 11]

# Get the ranks of the elements in the original list
ranks = sorted(range(len(original_list)), key=lambda i: -original_list[i]) 
ranks = [rank + 1 for rank in ranks]

print(original_list)
print(ranks)

[1, 5, 7, 9, 4, 10, 11]
[7, 6, 4, 3, 2, 5, 1]


In [1]:
'''
also check eloftetr if it has better features- then can test it with cnos to see if we have better matching
other wirse just based the numer of corres to rank again- or to put weight on the scores
'''

# to do now is to load input the same way as in cnos- input as templates of rgb and sam proposals based on the detections not the mask_images- 

'\nalso check eloftetr if it has better features- then can test it with cnos to see if we have better matching\nother wirse just based the numer of corres to rank again- or to put weight on the scores\n'

crop name:  xoa_3.png


RuntimeError: The size of tensor a (224) must match the size of tensor b (3) at non-singleton dimension 0

In [6]:
import glob
import os
import numpy as np
# Load example images

crops = sorted(glob.glob("/home/cuong.van-dam/CuongVanDam/do_an_tot_nghiep/cnos/foundpose_analysis/daoliuzhao/crops/*.png"))
templates = sorted(glob.glob("/home/cuong.van-dam/CuongVanDam/do_an_tot_nghiep/cnos/datasets/bop23_challenge/datasets/templates_pyrender/daoliuzhao/obj_000001/*.png"))

num_corres_list = list()

for crop in crops:
    print("crop name: ", crop.split("/")[-1])
    num_corres_temp_list = np.array([extract_correspondences(crop, temp, matcher=matcher) for temp in templates])
    print(f"max num correpodences: {max(num_corres_temp_list)}")
    print(f"top 5 max num correpodences: {num_corres_temp_list[np.argsort(num_corres_temp_list)][-5:]}")


crop name:  crop0.png
max num correpodences: 13
top 5 max num correpodences: [11 11 12 12 13]
crop name:  crop1.png
max num correpodences: 18
top 5 max num correpodences: [13 13 14 17 18]
crop name:  crop10.png
max num correpodences: 7
top 5 max num correpodences: [5 6 6 6 7]
crop name:  crop11.png
max num correpodences: 12
top 5 max num correpodences: [10 11 11 11 12]
crop name:  crop12.png
max num correpodences: 7
top 5 max num correpodences: [6 6 6 6 7]
crop name:  crop13.png
max num correpodences: 8
top 5 max num correpodences: [6 6 7 7 8]
crop name:  crop14.png
max num correpodences: 29
top 5 max num correpodences: [23 24 24 25 29]
crop name:  crop15.png
max num correpodences: 0
top 5 max num correpodences: [0 0 0 0 0]
crop name:  crop16.png
max num correpodences: 14
top 5 max num correpodences: [11 12 12 13 14]
crop name:  crop17.png
max num correpodences: 8
top 5 max num correpodences: [6 7 7 7 8]
crop name:  crop18.png


KeyboardInterrupt: 