# convert weight to dict

In [26]:
import torch
torch_weight = torch.load("../superpoint_v1.pth", map_location="cpu")
tf_dict = {}
for k, v in torch_weight.items():
    k = k.replace(".", '/') + ':0'
    if len(v.shape) == 4:
        v = v.permute(2, 3, 1, 0)
    tf_dict[k] = v.numpy()
sorted(tf_dict.keys())

# test whether tf result matches torch

In [2]:
from superpoint_forward import SuperPointFrontend
net = SuperPointFrontend("../superpoint_v1.pth", "3", nms_dist=4, conf_thresh=0.015, nn_thresh=0.7)
image = cv2.imread("../result.jpg")[:, -200: -100, :]
outs_torch = net.run(image)

In [3]:
import cv2
import numpy as np
import tensorflow.compat.v1 as tf


class SuperPoint(object):
    INPUT_NAME = 'input:0'
    SEMI_NAME = 'semi:0'
    DESC_NAME = 'desc:0'

    def __init__(self, frozen_graph, device_id, memory_limit=1024):
        gpus = tf.config.experimental.list_physical_devices('GPU')
        tf.config.experimental.set_visible_devices(gpus[int(device_id)], 'GPU')
        tf.config.experimental.set_virtual_device_configuration(
            gpus[int(device_id)],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=memory_limit)])
        self.graph = tf.Graph()
        graph_def = tf.GraphDef()
        with tf.gfile.GFile(frozen_graph, 'rb') as fid:
            serialized_graph = fid.read()
            graph_def.ParseFromString(serialized_graph)

        with self.graph.as_default():
            tf.import_graph_def(graph_def, name='')

        self.sess = tf.Session(graph=self.graph)


    def run_single_image(self, image):
        img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        img = img.astype(np.float32) / 255
        h, w = img.shape[0], img.shape[1]
        inp = img.copy().reshape((1, h, w))
        inp = np.expand_dims(inp, 0)
        outs = self.sess.run([self.SEMI_NAME, self.DESC_NAME], feed_dict={
                                     self.INPUT_NAME: inp})
        return outs
        

In [12]:
tf_model = SuperPoint("./superpoint.pb", "3")
image = cv2.imread("../result.jpg")[:, -200: -100, :]
outs_tf = tf_model.run_single_image(image)

In [20]:
outs_torch[1]

tensor([[[[-4.9164e-02, -4.7101e-02, -3.6929e-02,  ..., -7.3134e-03,
           -7.6460e-03, -2.2979e-02],
          [-6.5429e-02, -9.6710e-02, -8.4849e-02,  ..., -5.3827e-02,
           -6.3304e-02, -4.8841e-02],
          [-2.3553e-02, -3.5312e-02, -1.9066e-02,  ...,  8.6734e-03,
            4.8708e-03,  5.3168e-03],
          ...,
          [ 2.0157e-02,  7.7801e-03,  3.1948e-02,  ...,  2.1021e-02,
            3.6994e-02,  2.5118e-03],
          [-1.1968e-02, -6.9366e-02, -4.9406e-02,  ..., -4.8562e-02,
           -4.0915e-02, -4.8467e-02],
          [ 3.4102e-02,  1.8187e-02,  2.6040e-02,  ...,  6.0794e-02,
            3.9990e-02,  1.9080e-02]],

         [[-1.2968e-01, -1.2205e-01, -1.2273e-01,  ..., -1.0544e-01,
           -1.0159e-01, -6.8733e-02],
          [-1.7958e-01, -1.5226e-01, -1.7932e-01,  ..., -1.6897e-01,
           -1.5500e-01, -1.2248e-01],
          [-1.0561e-01, -7.2009e-02, -9.1663e-02,  ..., -1.2706e-01,
           -1.2728e-01, -1.2605e-01],
          ...,
     

In [24]:
outs_tf[1].shape[1]

256

# test all

In [1]:
%load_ext autoreload
%autoreload 2
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '3'

In [2]:
from superpoint_forward import SuperPointFrontend
import torch
net = SuperPointFrontend("../superpoint_v1.pth", "0", nms_dist=4, conf_thresh=0.015, nn_thresh=0.7)
image = cv2.imread("../result.jpg")[:, -200: -100, :]
coarse_desc, samp_pts = net.run(image)
coarse_desc = coarse_desc[:, :1, :, :]
h, w = image.shape[0], image.shape[1]
samp_pts_not_normal = samp_pts.detach().clone()
# samp_pts_not_normal = samp_pts_not_normal.transpose(0, 1).contiguous()
# samp_pts_not_normal = samp_pts_not_normal.view(1, 1, -1, 2)
# torch grid_sample is using normalized grid!!!
samp_pts[0, :] = (samp_pts[0, :] / (float(w) / 2.)) - 1.
samp_pts[1, :] = (samp_pts[1, :] / (float(h) / 2.)) - 1.
samp_pts = samp_pts.transpose(0, 1).contiguous()
samp_pts = samp_pts.view(1, 1, -1, 2)
samp_pts = samp_pts.float()
desc = torch.nn.functional.grid_sample(coarse_desc, samp_pts, align_corners=False)
print(desc.squeeze()[:10])

tensor([ 0.0226,  0.0479,  0.0261,  0.0413,  0.0428, -0.0215, -0.0176,  0.0046,
        -0.0004, -0.0096], device='cuda:0', grad_fn=<SliceBackward>)


In [19]:
xy = samp_pts_not_normal.cpu().numpy().transpose()
yx = np.concatenate([xy[:, 1:], xy[:, :1]], axis=-1)

In [18]:
img = coarse_desc.cpu().detach().numpy().transpose([0, 2, 3, 1])[0]
img = cv2.resize(img, (w, h)) # to heatmap shape, torch handle this more elegantly! using normalized grid with period
img = np.expand_dims(img, (0, -1))
img = tf.constant(img)
coords = np.expand_dims(yx, (0, 1)).astype(np.float32)
coords = tf.constant(coords)
out = GridSample([img, coords], 'constant')
print(out.numpy().squeeze().transpose().tolist()[:10])

# img = coarse_desc.cpu().detach().numpy().transpose([0, 2, 3, 1])
# img = tf.constant(img)
# coords = np.expand_dims(yx/[h, w]*[73, 12], (0, 1)).astype(np.float32)
# coords = tf.constant(coords)
# out = GridSample([img, coords])
# print(out.numpy().squeeze().transpose()[:10])

[0.02591796964406967, 0.05062885209918022, 0.029128430411219597, 0.04363219439983368, 0.046342384070158005, -0.02533014863729477, -0.01879284344613552, 0.006482865661382675, -6.19070342509076e-05, -0.00987747497856617]


In [27]:
import cv2
img = coarse_desc.cpu().detach().numpy().transpose([0, 2, 3, 1])[0]
src = cv2.resize(img, (100, 590)) # to heatmap shape, torch handle this more elegantly! using normalized grid with period
src = src[:, :, None]
map_xy = xy[None, :, :].astype(np.float32)
desc = cv2.remap(src, map_xy, None, cv2.INTER_LINEAR, borderMode=0)
desc[0, :10]

array([ 2.5917970e-02,  5.0628852e-02,  2.9128430e-02,  4.3632194e-02,
        4.6342384e-02, -2.5330149e-02, -1.8792843e-02,  6.4828657e-03,
       -6.1907034e-05, -9.8774750e-03], dtype=float32)

In [33]:
import cv2
img = coarse_desc.cpu().detach().numpy().transpose([0, 2, 3, 1])[0]
src = cv2.resize(img, (100, 590)) # to heatmap shape, torch handle this more elegantly! using normalized grid with period
src = src[:, :, None]
map_xy = xy[None, :, :].astype(np.float32)
desc = cv2.remap(src, map_xy, None, cv2.INTER_LINEAR, borderMode=0)
desc[0, :10]

array([ 2.5917970e-02,  5.0628852e-02,  2.9128430e-02,  4.3632194e-02,
        4.6342384e-02, -2.5330149e-02, -1.8792843e-02,  6.4828657e-03,
       -6.1907034e-05, -9.8774750e-03], dtype=float32)

In [5]:
import tensorflow as tf

def sample(img, coords):
    """
    Args:
        img: bxhxwxc
        coords: bxh2xw2x2. each coordinate is (y, x) integer.
            Out of boundary coordinates will be clipped.
    Return:
        bxh2xw2xc image
    """
    shape = img.get_shape().as_list()[1:]   # h, w, c
    batch = tf.shape(img)[0]
    shape2 = coords.get_shape().as_list()[1:3]  # h2, w2
    assert None not in shape2, coords.get_shape()
    max_coor = tf.constant([shape[0] - 1, shape[1] - 1], dtype=tf.float32)

    coords = tf.clip_by_value(coords, 0., max_coor)  # borderMode==repeat
    coords = tf.cast(coords, tf.int32)

    batch_index = tf.range(batch, dtype=tf.int32)
    batch_index = tf.reshape(batch_index, [-1, 1, 1, 1])
    batch_index = tf.tile(batch_index, [1, shape2[0], shape2[1], 1])    # bxh2xw2x1
    indices = tf.concat([batch_index, coords], axis=3)  # bxh2xw2x3
    sampled = tf.gather_nd(img, indices)
    return sampled


def GridSample(inputs, borderMode='repeat'):
    """
    Sample the images using the given coordinates, by bilinear interpolation.
    This was described in the paper:
    `Spatial Transformer Networks <http://arxiv.org/abs/1506.02025>`_.
    This is equivalent to `torch.nn.functional.grid_sample`,
    up to some non-trivial coordinate transformation.
    This implementation returns pixel value at pixel (1, 1) for a floating point coordinate (1.0, 1.0).
    Note that this may not be what you need.
    Args:
        inputs (list): [images, coords]. images has shape NHWC.
            coords has shape (N, H', W', 2), where each pair of the last dimension is a (y, x) real-value
            coordinate.
        borderMode: either "repeat" or "constant" (zero-filled)
    Returns:
        tf.Tensor: a tensor named ``output`` of shape (N, H', W', C).
    """
    image, mapping = inputs
    assert image.get_shape().ndims == 4 and mapping.get_shape().ndims == 4
    input_shape = image.get_shape().as_list()[1:]
    assert None not in input_shape, \
        "Images in GridSample layer must have fully-defined shape"
    assert borderMode in ['repeat', 'constant']

    orig_mapping = mapping
    mapping = tf.maximum(mapping, 0.0)
    lcoor = tf.floor(mapping)
    ucoor = lcoor + 1

    diff = mapping - lcoor
    neg_diff = 1.0 - diff  # bxh2xw2x2

    lcoory, lcoorx = tf.split(lcoor, 2, 3)
    ucoory, ucoorx = tf.split(ucoor, 2, 3)

    lyux = tf.concat([lcoory, ucoorx], 3)
    uylx = tf.concat([ucoory, lcoorx], 3)

    diffy, diffx = tf.split(diff, 2, 3)
    neg_diffy, neg_diffx = tf.split(neg_diff, 2, 3)

    ret = tf.add_n([sample(image, lcoor) * neg_diffx * neg_diffy,
                    sample(image, ucoor) * diffx * diffy,
                    sample(image, lyux) * neg_diffy * diffx,
                    sample(image, uylx) * diffy * neg_diffx], name='sampled')
    if borderMode == 'constant':
        max_coor = tf.constant([input_shape[0] - 1, input_shape[1] - 1], dtype=tf.float32)
        mask = tf.greater_equal(orig_mapping, 0.0)
        mask2 = tf.less_equal(orig_mapping, max_coor)
        mask = tf.logical_and(mask, mask2)  # bxh2xw2x2
        mask = tf.reduce_all(mask, [3])  # bxh2xw2 boolean
        mask = tf.expand_dims(mask, 3)
        ret = ret * tf.cast(mask, tf.float32)
    return tf.identity(ret, name='output')