In [2]:
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from models.superpoint import SuperPoint
from models.superglue import SuperGlue
from torchvision import transforms


# Load models
def load_models(config):
    superpoint = SuperPoint(config['superpoint'])
    superpoint.eval()
    superglue = SuperGlue(config['superglue'])
    superglue.eval()
    return superpoint, superglue
# Configuration for models
config = {
    'superpoint': {
        'nms_radius': 4,
        'keypoint_threshold': 0.005,  # Adjusted threshold
        'max_keypoints': -1
    },
    'superglue': {
        'weights': 'indoor',
        'sinkhorn_iterations': 20,
        'match_threshold': 0.2
    }
}

# Convert image to tensor
def image_to_tensor(image, device):
    transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Grayscale(num_output_channels=1),  # Convert to grayscale
        transforms.ToTensor(),
    ])
    return transform(image).unsqueeze(0).to(device)

# Extract features and descriptors
def extract_features(model, image_tensor):
    with torch.inference_mode():
        output = model({'image': image_tensor})
    print("Keypoints shape:", output['keypoints'][0].shape)
    print("Scores shape:", output['scores'][0].shape)
    print("Descriptors shape:", output['descriptors'][0].shape)
    return output

# Match features between two sets of descriptors
def match_features(image1_tensor, keypoints1, desc1, scores1, image2_tensor, keypoints2, desc2, scores2, superglue_model):
    device = next(superglue_model.parameters()).device  # Get device of superglue_model

    # Move inputs to the same device as the model
    keypoints1 = torch.tensor(keypoints1, dtype=torch.float32, device=device).unsqueeze(0)
    keypoints2 = torch.tensor(keypoints2, dtype=torch.float32, device=device).unsqueeze(0)
    desc1 = torch.tensor(desc1, dtype=torch.float32, device=device).unsqueeze(0)
    desc2 = torch.tensor(desc2, dtype=torch.float32, device=device).unsqueeze(0)
    scores1 = torch.tensor(scores1, dtype=torch.float32, device=device).unsqueeze(0)
    scores2 = torch.tensor(scores2, dtype=torch.float32, device=device).unsqueeze(0)
    image1_tensor = image1_tensor.to(device)
    image2_tensor = image2_tensor.to(device)

    # Prepare data dictionary for SuperGlue
    data = {
        'keypoints0': keypoints1,
        'keypoints1': keypoints2,
        'descriptors0': desc1,
        'descriptors1': desc2,
        'scores0': scores1,
        'scores1': scores2,
        'image0': image1_tensor,
        'image1': image2_tensor
    }
    
    with torch.inference_mode():
        output = superglue_model(data)
    
    # Extract matches and confidences
    matches = output['matches0'][0].cpu().numpy()
    match_confidences = output['matching_scores0'][0].cpu().numpy()

    print(f"Number of matches found: {np.sum(matches > -1)}")
    print(f"Matches array: {matches}")

    num_matches = np.sum(matches > -1)
    
    return matches, num_matches, match_confidences

# Visualize keypoints and matches
def visualize_matches(image1, keypoints1, image2, keypoints2, matches, scores1, scores2, confidences):

    # trying to filter out unmatched keypoints so it easier to get the data we want
    matched_keypoints1 = keypoints1[matches > -1]
    matched_keypoints2 = keypoints2[matches[matches > -1]]
    matched_scores1 = scores1[matches > -1]
    matched_scores2 = scores2[matches[matches > -1]]
    matched_confidences = confidences[matches > -1]

    # print(f"matched_keypoint in image0: {matched_keypoints1[:10]}")
    # print(f"matched_keypoint in image1: {matched_keypoints2[:10]}")
    
    plt.figure(figsize=(15, 15))
    plt.imshow(np.hstack([image1, image2]))
    
    plt.scatter(matched_keypoints1[:, 0], matched_keypoints1[:, 1], c=matched_scores1, cmap='jet', s=10, label='Keypoints 1')
    plt.scatter(matched_keypoints2[:, 0] + image1.shape[1], matched_keypoints2[:, 1], c=matched_scores2, cmap='jet', s=10, label='Keypoints 2')

    for i, match in enumerate(matches):
        if match > -1:
            confidence = confidences[i]
            plt.plot([keypoints1[i, 0], keypoints2[match, 0] + image1.shape[1]],
                     [keypoints1[i, 1], keypoints2[match, 1]], color=plt.cm.jet(confidence), lw=1)
    
    plt.legend()
    plt.colorbar(label='Confidence')
    plt.savefig('heheheha.png')  # Save the image with matches
    plt.close()  # Close the plot

def display_keypoints(keypoints, title="keypoints"):
    df = pd.DataFrame(keypoints, columns = ['x', 'y'])
    display(df)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
superpoint, superglue = load_models(config)
superpoint = superpoint.to(device)
superglue = superglue.to(device)

# Load and preprocess images
image_path1 = 'assets/scannet_sample_images/scene0758_00_frame-000165.jpg'
image1 = cv2.imread(image_path1)
if image1 is None:
    raise FileNotFoundError(f"Image at {image_path1} could not be loaded.")
image1_rgb = cv2.cvtColor(image1, cv2.COLOR_BGR2RGB)
image_tensor1 = image_to_tensor(image1_rgb, device)

# Extract features from the first image
features1 = extract_features(superpoint, image_tensor1)
keypoints1 = features1['keypoints'][0].cpu().numpy()
scores1 = features1['scores'][0].cpu().numpy()
descriptors1 = features1['descriptors'][0].cpu().numpy()

# Load and preprocess another image for matching
image_path2 = 'assets/scannet_sample_images/scene0758_00_frame-000510.jpg'
image2 = cv2.imread(image_path2)
if image2 is None:
    raise FileNotFoundError(f"Image at {image_path2} could not be loaded.")
image2_rgb = cv2.cvtColor(image2, cv2.COLOR_BGR2RGB)
image_tensor2 = image_to_tensor(image2_rgb, device)

# Extract features from the second image
features2 = extract_features(superpoint, image_tensor2)
keypoints2 = features2['keypoints'][0].cpu().numpy()
scores2 = features2['scores'][0].cpu().numpy()
descriptors2 = features2['descriptors'][0].cpu().numpy()

# Match features
matches, num_matches, confidences = match_features(
    image_tensor1, keypoints1, descriptors1, scores1,
    image_tensor2, keypoints2, descriptors2, scores2,
    superglue
)

# Print the number of matches
print(f"Number of matched keypoints: {num_matches}")
print(f"value of confidences: {confidences}")

matched_keypoints1 = keypoints1[matches > -1]
matched_keypoints2 = keypoints2[matches[matches > -1]]
display_keypoints(matched_keypoints1)
display_keypoints(matched_keypoints2)

# Visualize matches
visualize_matches(image1_rgb, keypoints1, image2_rgb, keypoints2, matches, scores1, scores2, confidences)


  self.load_state_dict(torch.load(str(path)))
  self.load_state_dict(torch.load(str(path)))


Loaded SuperPoint model
Loaded SuperGlue model ("indoor" weights)
Keypoints shape: torch.Size([1272, 2])
Scores shape: torch.Size([1272])
Descriptors shape: torch.Size([256, 1272])
Keypoints shape: torch.Size([1052, 2])
Scores shape: torch.Size([1052])
Descriptors shape: torch.Size([256, 1052])
Number of matches found: 268
Matches array: [-1 -1 -1 ... -1 -1 -1]
Number of matched keypoints: 268
value of confidences: [0.000000e+00 9.582194e-08 0.000000e+00 ... 0.000000e+00 0.000000e+00
 0.000000e+00]


Unnamed: 0,x,y
0,1078.0,140.0
1,1142.0,140.0
2,1167.0,143.0
3,953.0,144.0
4,1023.0,146.0
...,...,...
263,1118.0,943.0
264,1183.0,943.0
265,979.0,944.0
266,1175.0,958.0


Unnamed: 0,x,y
0,428.0,8.0
1,479.0,8.0
2,502.0,8.0
3,300.0,8.0
4,383.0,9.0
...,...,...
263,1054.0,780.0
264,1068.0,762.0
265,1016.0,848.0
266,1101.0,768.0
