In [1]:
from trainer import *

with open('configs/test_config.yaml') as file:
    config = yaml.full_load(file)
 
trainer = Trainer(config)

  return torch._C._cuda_getDeviceCount() > 0


Saving checkpoint..
Initializing samples..


In [2]:
dataset = trainer.dataset
batch   = next(iter(trainer.train_loader))

# Create a sample

In [3]:
tgt_img  = batch['tgt']         # [b, 3, H, W]
ref_imgs = batch['ref_imgs']    # [[b, 3, H, W], [b, 3, H, W]]
K        = batch['intrinsics']  # [b, 3, 4]
depth    = batch['groundtruth'] # [b, H, W]

In [4]:
pose = batch['oxts']
pose[0]

tensor([[ 0.0368, -0.0061, -3.0574,  0.9661,  0.0742, -0.0188],
        [ 0.0295,  0.0196,  2.6650,  0.5989, -0.3135, -0.0055],
        [-0.0064,  0.0139,  2.1024, -0.0079,  0.0057,  0.0032],
        [ 0.0275,  0.0673,  3.0010,  1.1015, -0.1613,  0.0574]],
       dtype=torch.float64)

# The Inverse Warp

The goal is to create a target image by transforming source images. But to get around splatting, we have to:

1. Transform target to source
2. As transformed pixels are continuous, use bilinear sampling (differentiable) to sample from source images.
3. As we know which pixel in target corresponds to which "transformed-and-sampled" source image we can create a target image.

### Resources
- [X] https://www.youtube.com/watch?v=lNYhWBPEeaY&list=PLyqSpQzTE6M-T5ZrthkU763MHKIKCa0sX&index=7
- [X] https://github.com/nianticlabs/monodepth2/issues/87
- [ ] https://www.cse.huji.ac.il/course/2006/impr/lectures2006/Tirgul8_LK.pdf

In [5]:
def meshgrid(B, H, W, dtype, device, normalized=False):
    """
    Create meshgrid with a specific resolution
    Parameters
    ----------
    B : int
        Batch size
    H : int
        Height size
    W : int
        Width size
    dtype : torch.dtype
        Meshgrid type
    device : torch.device
        Meshgrid device
    normalized : bool
        True if grid is normalized between -1 and 1
    Returns
    -------
    xs : torch.Tensor [B,1,W]
        Meshgrid in dimension x
    ys : torch.Tensor [B,H,1]
        Meshgrid in dimension y
    """
    if normalized:
        xs = torch.linspace(-1, 1, W, device=device, dtype=dtype)
        ys = torch.linspace(-1, 1, H, device=device, dtype=dtype)
    else:
        xs = torch.linspace(0, W-1, W, device=device, dtype=dtype)
        ys = torch.linspace(0, H-1, H, device=device, dtype=dtype)
    ys, xs = torch.meshgrid([ys, xs])
    return xs.repeat([B, 1, 1]), ys.repeat([B, 1, 1])

def image_grid(B, H, W, dtype, device, normalized=False):
    """
    Create an image grid with a specific resolution
    Parameters
    ----------
    B : int
        Batch size
    H : int
        Height size
    W : int
        Width size
    dtype : torch.dtype
        Meshgrid type
    device : torch.device
        Meshgrid device
    normalized : bool
        True if grid is normalized between -1 and 1
    Returns
    -------
    grid : torch.Tensor [B,3,H,W]
        Image grid containing a meshgrid in x, y and 1
    """
    xs, ys = meshgrid(B, H, W, dtype, device, normalized=normalized)
    ones = torch.ones_like(xs)
    grid = torch.stack([xs, ys, ones], dim=1)
    return grid

In [6]:
flat_grid = image_grid(4, depth[0].shape[0], depth[0].shape[1], depth.dtype, depth.device, normalized=False).view(4, 3, -1)

In [7]:
def Kinv():
    """Inverse intrinsics (for lifting)"""
    Kinv = K.clone()
    Kinv[:, 0, 0] = 1. / K[:, 0, 0]
    Kinv[:, 1, 1] = 1. / K[:, 1, 1]
    Kinv[:, 0, 2] = -1. * K[:, 0, 2]/ K[:, 0, 0]
    Kinv[:, 1, 2] = -1. * K[:, 1, 2]/ K[:, 1, 1]
    return Kinv

In [8]:
Kinv()

tensor([[[ 0.0010,  0.0000, -0.7240],
         [ 0.0000,  0.0010, -0.2536],
         [ 0.0000,  0.0000,  1.0000]],

        [[ 0.0010,  0.0000, -0.7237],
         [ 0.0000,  0.0010, -0.2517],
         [ 0.0000,  0.0000,  1.0000]],

        [[ 0.0010,  0.0000, -0.7252],
         [ 0.0000,  0.0011, -0.2506],
         [ 0.0000,  0.0000,  1.0000]],

        [[ 0.0010,  0.0000, -0.7237],
         [ 0.0000,  0.0010, -0.2517],
         [ 0.0000,  0.0000,  1.0000]]], dtype=torch.float64)

In [9]:
K[..., :3, :3].inverse()

tensor([[[ 0.0010,  0.0000, -0.7240],
         [ 0.0000,  0.0010, -0.2536],
         [ 0.0000,  0.0000,  1.0000]],

        [[ 0.0010,  0.0000, -0.7237],
         [ 0.0000,  0.0010, -0.2517],
         [ 0.0000,  0.0000,  1.0000]],

        [[ 0.0010,  0.0000, -0.7252],
         [ 0.0000,  0.0011, -0.2506],
         [ 0.0000,  0.0000,  1.0000]],

        [[ 0.0010,  0.0000, -0.7237],
         [ 0.0000,  0.0010, -0.2517],
         [ 0.0000,  0.0000,  1.0000]]], dtype=torch.float64)

In [10]:
K

tensor([[[959.1977,   0.0000, 694.4383],
         [  0.0000, 952.9324, 241.6793],
         [  0.0000,   0.0000,   1.0000]],

        [[960.1149,   0.0000, 694.7923],
         [  0.0000, 954.8911, 240.3547],
         [  0.0000,   0.0000,   1.0000]],

        [[956.9475,   0.0000, 693.9767],
         [  0.0000, 952.2352, 238.6081],
         [  0.0000,   0.0000,   1.0000]],

        [[960.1149,   0.0000, 694.7923],
         [  0.0000, 954.8911, 240.3547],
         [  0.0000,   0.0000,   1.0000]]], dtype=torch.float64)

In [11]:
# Estimate the outward rays in the camera frame
xnorm = (Kinv.bmm(flat_grid)).view(B, 3, H, W)
# Scale rays to metric depth
Xc = xnorm * depth


AttributeError: 'function' object has no attribute 'bmm'