In [2]:
# Define your main function to encapsulate the script logic
def run_superglue_demo(input_dir, output_dir, resize, superglue_mode='indoor'):
    import cv2
    import matplotlib.cm as cm
    import torch
    from pathlib import Path
    from models.matching import Matching
    from models.utils import (AverageTimer, VideoStreamer, 
                              make_matching_plot_fast, frame2tensor)

    torch.set_grad_enabled(False)

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print('Running inference on device "{}"'.format(device))

    config = {
        'superpoint': {
            'nms_radius': 4,
            'keypoint_threshold': 0.005,
            'max_keypoints': -1
        },
        'superglue': {
            'weights': superglue_mode,
            'sinkhorn_iterations': 20,
            'match_threshold': 0.2,
        }
    }

    matching = Matching(config).eval().to(device)
    keys = ['keypoints', 'scores', 'descriptors']

    vs = VideoStreamer(input_dir, resize, 1, ['*.png', '*.jpg', '*.jpeg'], 1000000)
    frame, ret = vs.next_frame()
    assert ret, 'Error when reading the first frame (try different --input?)'

    frame_tensor = frame2tensor(frame, device)
    last_data = matching.superpoint({'image': frame_tensor})
    last_data = {k+'0': last_data[k] for k in keys}
    last_data['image0'] = frame_tensor
    last_frame = frame
    last_image_id = 0

    if output_dir is not None:
        print('==> Will write outputs to {}'.format(output_dir))
        Path(output_dir).mkdir(exist_ok=True)

    timer = AverageTimer()

    while True:
        frame, ret = vs.next_frame()
        if not ret:
            print('Finished demo_superglue.py')
            break
        timer.update('data')
        stem0, stem1 = last_image_id, vs.i - 1

        frame_tensor = frame2tensor(frame, device)
        pred = matching({**last_data, 'image1': frame_tensor})
        kpts0 = last_data['keypoints0'][0].cpu().numpy()
        kpts1 = pred['keypoints1'][0].cpu().numpy()
        matches = pred['matches0'][0].cpu().numpy()
        confidence = pred['matching_scores0'][0].cpu().numpy()
        timer.update('forward')

        valid = matches > -1
        mkpts0 = kpts0[valid]
        mkpts1 = kpts1[matches[valid]]
        color = cm.jet(confidence[valid])
        text = [
            'SuperGlue',
            'Keypoints: {}:{}'.format(len(kpts0), len(kpts1)),
            'Matches: {}'.format(len(mkpts0))
        ]
        k_thresh = matching.superpoint.config['keypoint_threshold']
        m_thresh = matching.superglue.config['match_threshold']
        small_text = [
            'Keypoint Threshold: {:.4f}'.format(k_thresh),
            'Match Threshold: {:.2f}'.format(m_thresh),
            'Image Pair: {:06}:{:06}'.format(stem0, stem1),
        ]
        out = make_matching_plot_fast(
            last_frame, frame, kpts0, kpts1, mkpts0, mkpts1, color, text,
            path=None, show_keypoints=True, small_text=small_text)

        timer.update('viz')
        timer.print()

        if output_dir is not None:
            stem = 'matches_{:06}_{:06}'.format(stem0, stem1)
            out_file = str(Path(output_dir, stem + '.png'))
            print('\nWriting image to {}'.format(out_file))
            cv2.imwrite(out_file, out)

    vs.cleanup()

# Define your parameters
input_dir = 'assets/freiburg_sequence/'
output_dir = 'dump_demo_sequence'
resize = [320, 240]

# Call the function with the parameters
run_superglue_demo(input_dir, output_dir, resize)


Running inference on device "cuda"
Loaded SuperPoint model
Loaded SuperGlue model ("indoor" weights)


  self.load_state_dict(torch.load(str(path)))
  self.load_state_dict(torch.load(str(path)))


==> Processing image directory input: assets/freiburg_sequence/
==> Will write outputs to dump_demo_sequence
[Timer] data=0.005 forward=0.192 viz=0.008 total=0.205 sec {4.9 FPS} 
Writing image to dump_demo_sequence\matches_000000_000001.png
[Timer] data=0.008 forward=0.159 viz=0.010 total=0.178 sec {5.6 FPS} 
Writing image to dump_demo_sequence\matches_000000_000002.png
[Timer] data=0.011 forward=0.136 viz=0.011 total=0.159 sec {6.3 FPS} 
Writing image to dump_demo_sequence\matches_000000_000003.png
[Timer] data=0.013 forward=0.121 viz=0.012 total=0.145 sec {6.9 FPS} 
Writing image to dump_demo_sequence\matches_000000_000004.png
[Timer] data=0.014 forward=0.109 viz=0.012 total=0.136 sec {7.4 FPS} 
Writing image to dump_demo_sequence\matches_000000_000005.png
[Timer] data=0.015 forward=0.102 viz=0.013 total=0.129 sec {7.7 FPS} 
Writing image to dump_demo_sequence\matches_000000_000006.png
[Timer] data=0.016 forward=0.096 viz=0.013 total=0.124 sec {8.0 FPS} 
Writing image to dump_demo_se

In [67]:
import torch
import torchvision.transforms as T
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
from models.superpoint import SuperPoint
from models.superglue import SuperGlue
import pandas as pd

# Initialize models
superpoint = SuperPoint(config={})
superglue = SuperGlue(config={})

def extract_feature_point(image):
    """Extract feature points and descriptors from the image"""
    transform = T.Compose([
        T.Grayscale(),  # Convert image to grayscale
        T.ToTensor()
    ])
    image_tensor = transform(image).unsqueeze(0)  # Add batch dimension

    with torch.no_grad():
        pred = superpoint({"image": image_tensor})
    
    keypoints = pred["keypoints"][0].clone().detach()
    descriptors = pred["descriptors"][0].clone().detach()
    scores = pred["scores"][0].clone().detach() if "scores" in pred else None

    return keypoints, descriptors, scores

def match_features(image0, image1):
    """Match feature points between two images"""
    K0, d0, s0 = extract_feature_point(image0)
    K1, d1, s1 = extract_feature_point(image1)

    if s0 is None or s1 is None:
        raise ValueError("SuperPoint did not return scores for keypoints.")

    data = {
        'keypoints0': K0.unsqueeze(0),
        'keypoints1': K1.unsqueeze(0),
        'descriptors0': d0.unsqueeze(0),
        'descriptors1': d1.unsqueeze(0),
        'scores0': s0.unsqueeze(0),
        'scores1': s1.unsqueeze(0),
        'image0': T.Grayscale()(T.ToTensor()(image0)).unsqueeze(0),
        'image1': T.Grayscale()(T.ToTensor()(image1)).unsqueeze(0)
    }

    with torch.no_grad():
        matches = superglue(data)

    return matches

def load_image(image_path):
    """Load an image from the path"""
    return Image.open(image_path).convert('RGB')

def display_keypoints(keypoints, title="keypoints"):
    df = pd.DataFrame(keypoints, columns = ['x', 'y'])
    display(df)
def keypoints_to_tensor(keypoints):
    return torch.tensor(keypoints)

def visualize_matches(image0, image1, keypoints0, keypoints1, matches):
    """Visualize the feature points and matches on the images"""
    # Convert images to numpy arrays for plotting
    image0_np = T.ToTensor()(image0).permute(1, 2, 0).numpy()
    image1_np = T.ToTensor()(image1).permute(1, 2, 0).numpy()

    plt.figure(figsize=(12,6))

    # Plot image0 with keypoints
    plt.subplot(1, 2, 1)
    plt.imshow(image0_np)
    plt.title('Image0 with Keypoints')
    plt.scatter(keypoints0[:, 0], keypoints0[:, 1], c='r', s=10, label='Keypoints')
    # for i, (x, y) in enumerate(keypoints0):
    #     plt.text(x,y, f'{i}', color = 'blue', fontsize = 8, ha = 'right')

    # Plot image1 with keypoints
    plt.subplot(1, 2, 2)
    plt.imshow(image1_np)
    plt.title('Image1 with Keypoints')
    plt.scatter(keypoints1[:, 0], keypoints1[:, 1], c='r', s=10, label='Keypoints')
    # for i, (x, y) in enumerate(keypoints1):
    #     plt.text(x,y, f'{i}', color = 'blue', fontsize = 8, ha = 'right')

    plt.savefig('feature_point.png')  # Save the image with keypoints
    plt.close()  # Close the plot

# Load images
image0 = load_image('assets/phototourism_sample_images/london_bridge_78916675_4568141288.jpg')
image1 = load_image('assets/scannet_sample_images/scene0711_00_frame-001995.jpg')

# Extract feature points and match features
K0, d0, s0 = extract_feature_point(image0)
K1, d1, s1 = extract_feature_point(image1)
matches = match_features(image0, image1)

# Visualize matches
visualize_matches(image0, image1, K0.cpu().numpy(), K1.cpu().numpy(), matches)
print("Image0 Keypoints:")
display_keypoints(K0)

print("Image1 Keypoints:")
display_keypoints(K1)


Loaded SuperPoint model
Loaded SuperGlue model ("indoor" weights)


  self.load_state_dict(torch.load(str(path)))
  self.load_state_dict(torch.load(str(path)))


Image0 Keypoints:


Unnamed: 0,x,y
0,134.0,14.0
1,158.0,14.0
2,577.0,14.0
3,337.0,15.0
4,353.0,15.0
...,...,...
1822,248.0,759.0
1823,334.0,759.0
1824,410.0,759.0
1825,544.0,759.0


Image1 Keypoints:


Unnamed: 0,x,y
0,191.0,8.0
1,211.0,8.0
2,365.0,8.0
3,1104.0,8.0
4,295.0,9.0
...,...,...
1040,658.0,959.0
1041,922.0,959.0
1042,1054.0,959.0
1043,1115.0,959.0


In [64]:
K0.squeeze()[:10], K1.squeeze()[:10]

(tensor([[134.,  14.],
         [158.,  14.],
         [577.,  14.],
         [337.,  15.],
         [353.,  15.],
         [754.,  16.],
         [400.,  22.],
         [967.,  22.],
         [513.,  23.],
         [575.,  24.]]),
 tensor([[ 191.,    8.],
         [ 211.,    8.],
         [ 365.,    8.],
         [1104.,    8.],
         [ 295.,    9.],
         [ 301.,    9.],
         [ 309.,    9.],
         [ 296.,   24.],
         [ 223.,   25.],
         [ 332.,   26.]]))

In [62]:
K0.shape, K1.shape

(torch.Size([1100, 2]), torch.Size([1045, 2]))

In [69]:
s0[:10]

tensor([0.0483, 0.0071, 0.0091, 0.0656, 0.0089, 0.0194, 0.0398, 0.0387, 0.0067,
        0.0231])