In [1]:
import os
import numpy as np
import cv2
from glob import glob
import matplotlib.pyplot as plt
from collections import namedtuple
from copy import deepcopy
from tqdm import tqdm
import random
import torch
import pandas as pd
from models.superglue import SuperGlue
from models.superpoint import SuperPoint

In [2]:
best_config = {
    'superpoint': {
    'descriptor_dim': 256,
    'nms_radius': 4,
    'keypoint_threshold': 0.005,
    'max_keypoints': -1,
    'remove_borders': 4,
    },
    'superglue': {
    'descriptor_dim': 256,
    'weights': 'outdoor',
    'keypoint_encoder': [32, 64, 128, 256],
    'GNN_layers': ['self', 'cross'] * 9,
    'sinkhorn_iterations': 100,
    'match_threshold': 0.2,
    'max_keypoints': -1 
    }
}

best_superglue_config = {
    'descriptor_dim': 256,
    'weights': 'outdoor',
    'keypoint_encoder': [32, 64, 128, 256],
    'GNN_layers': ['self', 'cross'] * 9,
    'sinkhorn_iterations': 100,
    'match_threshold': 0.2,
    'max_keypoints': -1 
    }

In [3]:
def TensorFromCVKps(kps):
    keypoint_data = []
    for kp in kps:
        # Each keypoint is represented as (x, y, size, response)
        keypoint_data.append((kp.pt[0], kp.pt[1]))
    kypoints_tensor = torch.tensor(keypoint_data)
    return kypoints_tensor

def ArrayFromCvKps(kps):
    '''Convenience function to convert OpenCV keypoints into a simple numpy array.'''
    
    return np.array([kp.pt for kp in kps])

def ExtractRootSiftFeatures(image, detector, num_features):
    '''Compute RootSIFT features for a given image.'''
    # Convert the image to grayscale

    gray = cv2.cvtColor(cv2.imread(image), cv2.COLOR_RGB2GRAY)
    
    # Detect SIFT keypoints and descriptors
    keypoints, desc = detector.detectAndCompute(gray, None)
    
    # Apply RootSIFT normalization if descriptors exist
    if desc is not None:
        desc /= (np.linalg.norm(desc, axis=1, keepdims=True) + 1e-7)
        desc = np.sqrt(desc)

    
    responses = np.array([kp.response for kp in keypoints])
    
    # Normalize the responses using min-max scaling
    scores = (responses - responses.min()) / (responses.max() - responses.min())

    return keypoints[:num_features], desc[:num_features] if desc is not None else [], scores

def get_string_fundamental_matrix(cur_kp_1, cur_kp_2):
    F, inlier_mask = cv2.findFundamentalMat(
        cur_kp_1,
        cur_kp_2, 
        cv2.USAC_MAGSAC, 
        ransacReprojThreshold=0.5,
        confidence=0.99999,
        maxIters=10000)
    
    F = np.array(F)
    if F is None:
        F = np.zeros(9)
    if F.shape != (3,3):
        print(F.shape)
    F = F.reshape(-1)
    F = F[:9]

    F_string_format = " ".join(f"{num:.5e}" for num in F)

    return F_string_format


def find_F_for_2_images_sift_bf(image0_path, image1_path, sift_detector, bf):
    
    keypoints_1, descriptors_1, scores1 = ExtractRootSiftFeatures(image0_path, sift_detector, 2000)
    keypoints_2, descriptors_2, scores2 = ExtractRootSiftFeatures(image1_path, sift_detector, 2000)
    
    cv_matches = bf.match(descriptors_1, descriptors_2)
    matches = np.array([[m.queryIdx, m.trainIdx] for m in cv_matches])
    cur_kp_1 = ArrayFromCvKps([keypoints_1[m[0]] for m in matches])
    cur_kp_2 = ArrayFromCvKps([keypoints_2[m[1]] for m in matches])

    F, inlier_mask = cv2.findFundamentalMat(
        cur_kp_1,
        cur_kp_2, 
        cv2.USAC_MAGSAC, 
        ransacReprojThreshold=0.5,
        confidence=0.99999,
        maxIters=10000)
    
    F = np.array(F)
    if F is None:
        F = np.zeros(9)
    if F.shape != (3,3):
        print(F.shape)
    F = F.reshape(-1)
    F = F[:9]

    F_string_format = " ".join(f"{num:.5e}" for num in F)

    return F_string_format


def create_sample_id_data(image0_name, image1_name, scene_name, F_string):
    sample_id = f"{scene_name};{image0_name}-{image1_name}"
    data_row = {"sample_id": sample_id, "fundamental_matrix": F_string}
    return data_row

def frame2tensor(frame, device):
    return torch.from_numpy(frame/255.).float()[None, None].to(device)

def find_F_sift_superglue(image0_path, image1_path, sift_detector, super_glue):
    keypoints_1, descriptors_1, scores1 = ExtractRootSiftFeatures(image0_path, sift_detector, 2000)
    keypoints_2, descriptors_2, scores2 = ExtractRootSiftFeatures(image1_path, sift_detector, 2000)

    gray1 = cv2.cvtColor(cv2.imread(image0_path), cv2.COLOR_RGB2GRAY)
    gray2 = cv2.cvtColor(cv2.imread(image1_path), cv2.COLOR_RGB2GRAY)
    image1_data, image2_data = frame2tensor(gray1, get_device()), frame2tensor(gray2, get_device())

    
    data = {}
    data['keypoints0'], data['keypoints1'] = TensorFromCVKps(keypoints_1), TensorFromCVKps(keypoints_2)
    data['descriptors0'], data['descriptors1'] = torch.tensor(descriptors_1), torch.tensor(descriptors_2)
    data['image0'], data['image1'] = image1_data, image2_data
    data['scores0'], data['scores1'] = torch.tensor(scores1), torch.tensor(scores2)

    pred = super_glue(data)
    pred = {k: v[0].cpu().numpy() for k, v in pred.items()}
    kpts0, kpts1 = pred['keypoints0'], pred['keypoints1']
    matches, match_conf = pred['matches0'], pred['matching_scores0']

    # Write the matches to disk.
    out_matches = {'keypoints0': kpts0, 'keypoints1': kpts1,    ### Edited - write to disc
                    'matches': matches, 'match_confidence': match_conf }
    #np.savez(str(matches_path), **out_matches)

    AMOUNT = 10
    THRESH = 0.95
    while cnt < AMOUNT and THRESH >= 0.1:
        mkpts0 = []
        mkpts1 = []
        cnt = 0
        for i in range(len(matches)):
            is_valid = matches[i] > -1 and match_conf[i] >= THRESH
            if is_valid:
                cnt += 1
                mkpts0.append(kpts0[i])
                mkpts1.append(kpts1[matches[i]])
        THRESH -= 0.05

    mkpts0 = np.array(mkpts0)
    mkpts1 = np.array(mkpts1)

    F_string = get_string_fundamental_matrix(mkpts0, mkpts1)

    return F_string


def get_device():
    if torch.cuda.is_available():
        device = 'cuda'
    else:
        device = 'cpu'
    return device


In [7]:
#### Sift Root + super Glue
import pandas as pd
input_csv = "/share/project_data/test.csv"
base_path = "/share/project_data/test_images"
test_samples = pd.read_csv(input_csv)
img_data_list = []
num_features = 5000
sift_detector = cv2.SIFT_create(num_features, contrastThreshold=-10000, edgeThreshold=-10000)
super_glue = SuperGlue(best_superglue_config).eval().to(get_device())


for index, row in tqdm(test_samples.iterrows(), total=len(test_samples)):
    image0_name, image1_name  = row['image_1_id'], row['image_2_id']
    scene_name = row['batch_id']
    image0_path = f"{base_path}/{scene_name}/{image0_name}.jpg"
    image1_path = f"{base_path}/{scene_name}/{image1_name}.jpg"

    F_string = find_F_sift_superglue(image0_path, image1_path, sift_detector, super_glue)
    img_data = create_sample_id_data(image0_name, image1_name, scene_name, F_string)
    img_data_list.append(img_data)

output = pd.DataFrame(img_data_list)
output.to_csv("/share/project_data/submit_result_bsift_super_glue.csv", index=False)
output

  self.load_state_dict(torch.load(str(path)))


Loaded SuperGlue model ("outdoor" weights)


  0%|          | 0/18840 [00:00<?, ?it/s]


[torch.Size([1, 2, 2000]), torch.Size([5000, 1])]


RuntimeError: Tensors must have same number of dimensions: got 3 and 2

In [11]:
num_features = 5000
gray1 = cv2.cvtColor(cv2.imread(image1_path), cv2.COLOR_RGB2GRAY)
image1_data = frame2tensor(gray1, get_device())
sift_detector = cv2.SIFT_create(num_features, contrastThreshold=-10000, edgeThreshold=-10000)
keypoints_1, descriptors_1, scores = ExtractRootSiftFeatures("/share/project_data/test_images/trevi_fountain/00644051_242819650.jpg", sift_detector, 10000)
torch.tensor(scores).shape


torch.Size([5000])

In [5]:
def create_super_point():
    default_config = {
        'descriptor_dim': 256,
        'nms_radius': 4,
        'keypoint_threshold': 0.005,
        'max_keypoints': -1,
        'remove_borders': 4,
    }
    super_point = SuperPoint(default_config)
    return super_point


def extract_feature_super_point(image_path, super_point):
    
    gray = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_RGB2GRAY)
    gray = gray.astype('float32')
    image1_data = frame2tensor(gray, 'cpu')
    pred = super_point({'image': image1_data})
    return pred                   


pred = extract_feature_super_point("/share/project_data/test_images/trevi_fountain/00644051_242819650.jpg", create_super_point())


  self.load_state_dict(torch.load(str(path)))


Loaded SuperPoint model


In [None]:
pred["scores"][0].unsqueeze(1).shape

  np.array(pred["scores"][0]).unsqueeze(1).shape


RuntimeError: Can't call numpy() on Tensor that requires grad. Use tensor.detach().numpy() instead.

In [10]:
pred_dict

{'keypoints0': [tensor([[ 339.,    8.],
          [ 425.,    8.],
          [ 713.,    8.],
          ...,
          [ 529., 1055.],
          [ 643., 1055.],
          [ 711., 1055.]])],
 'scores0': (tensor([0.0324, 0.0203, 0.0360,  ..., 0.0167, 0.0131, 0.0053],
         grad_fn=<IndexBackward0>),),
 'descriptors0': [tensor([[ 0.0043,  0.0456, -0.0140,  ..., -0.0109, -0.0232,  0.0585],
          [-0.0728,  0.0723, -0.0682,  ..., -0.0554, -0.1620, -0.1621],
          [-0.0619, -0.0498, -0.0140,  ...,  0.0285,  0.1492,  0.0531],
          ...,
          [-0.0204, -0.0708, -0.1483,  ...,  0.0700,  0.0280,  0.0339],
          [ 0.0628,  0.1057,  0.0418,  ..., -0.0289, -0.0469,  0.0078],
          [ 0.0849,  0.1272,  0.0585,  ...,  0.0896, -0.0176,  0.0112]],
         grad_fn=<SelectBackward0>)]}

In [None]:
import cv2
import torch
import numpy as np
import matplotlib.pyplot as plt
from dkm import DKM

def match_images(image_path1: str, image_path2: str):
    """
    Matches features between two images using Deep Kernel Matching (DKM).
    
    :param image_path1: Path to the first image
    :param image_path2: Path to the second image
    :return: matched keypoints (list of tuples)
    """
    # Load images
    img1 = cv2.imread(image_path1)
    img2 = cv2.imread(image_path2)
    img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2RGB)
    img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2RGB)
    
    # Load the pretrained DKM model
    model = DKM(pretrained=True).eval()
    
    # Convert images to torch tensors
    img1_tensor = torch.from_numpy(img1).permute(2, 0, 1).float() / 255.0
    img2_tensor = torch.from_numpy(img2).permute(2, 0, 1).float() / 255.0
    
    # Perform matching
    matches, _ = model.match(img1_tensor.unsqueeze(0), img2_tensor.unsqueeze(0))
    
    return matches

def draw_matches(image_path1: str, image_path2: str, matches):
    """
    Visualizes the matches between two images.
    
    :param image_path1: Path to the first image
    :param image_path2: Path to the second image
    :param matches: Matched keypoints
    """
    img1 = cv2.imread(image_path1)
    img2 = cv2.imread(image_path2)
    img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2RGB)
    img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2RGB)
    
    # Create a blank canvas to display both images side by side
    h1, w1 = img1.shape[:2]
    h2, w2 = img2.shape[:2]
    canvas = np.zeros((max(h1, h2), w1 + w2, 3), dtype=np.uint8)
    canvas[:h1, :w1, :] = img1
    canvas[:h2, w1:w1 + w2, :] = img2
    
    # Draw matches
    for (x1, y1), (x2, y2) in matches:
        x1, y1, x2, y2 = int(x1), int(y1), int(x2) + w1, int(y2)  # Offset x2 by w1
        cv2.circle(canvas, (x1, y1), 3, (0, 255, 0), -1)
        cv2.circle(canvas, (x2, y2), 3, (255, 0, 0), -1)
        cv2.line(canvas, (x1, y1), (x2, y2), (255, 255, 0), 1)
    
    plt.figure(figsize=(12, 6))
    plt.imshow(canvas)
    plt.axis('off')
    plt.show()

# Example usage
def main():
    image1 = "/share/project_data/test_images/trevi_fountain/00644051_242819650.jpg"
    image2 = "/share/project_data/test_images/trevi_fountain/03172778_3127678804.jpg"
    matches = match_images(image1, image2)
    draw_matches(image1, image2, matches)

if __name__ == "__main__":
    main()


ModuleNotFoundError: No module named 'dkm'