## inverse depth sample

In [2]:
import numpy as np
def inv_depths(start_depth, end_depth, num_depths):
    """Sample reversed, sorted inverse depths between a near and far plane.

    Args:
      start_depth: The first depth (i.e. near plane distance).
      end_depth: The last depth (i.e. far plane distance).
      num_depths: The total number of depths to create. start_depth and
          end_depth are always included and other depths are sampled
          between them uniformly according to inverse depth.
    Returns:
      The depths sorted in descending order (so furthest first). This order is
      useful for back to front compositing.
    """
    return 1.0 / np.linspace(1.0 / start_depth, 1.0 / end_depth, num_depths)[::-1]
inv_depths(1, 100, 10)

array([100.        ,   8.33333333,   4.34782609,   2.94117647,
         2.22222222,   1.78571429,   1.49253731,   1.28205128,
         1.12359551,   1.        ])

## construct a meshgrid

In [None]:
import torch
def meshgrid_torch(batch, height, width, device, permute):
    xs = torch.linspace(0.0, width - 1, width)
    ys = torch.linspace(0.0, height - 1, height)
    ys, xs = torch.meshgrid(ys, xs, indexing='ij')
    ones = torch.ones_like(xs)
    coords = torch.stack([xs, ys, ones], axis=0)

    grid = torch.unsqueeze(coords, 0).repeat(batch, 1, 1, 1).to(device=device)
    if permute:
        grid = grid.permute(0, 2, 3, 1)
    return grid

## transform coordinates from pixel to camera space

In [None]:
def pix2cam_torch(depth, pixel_coords, intrinsics, is_homogeneous=True):
    """
    Args:
    depth: [batch, height, width]
    pixel_coords: homogeneous pixel coordinates [batch, 3, height, width] (generated from above function)
    intrinsics: camera intrinsics [batch, 3, 3]
    is_homogeneous: return in homogeneous coordinates
    Returns:
    Coords in the camera frame [batch, 3 (4 if homogeneous), height, width]
    """
    batch, height, width = depth.shape
    depth = torch.reshape(depth, [batch, 1, -1])
    pixel_coords = torch.reshape(pixel_coords, [batch, 3, -1])
    cam_coords = torch.matmul(torch.inverse(intrinsics), pixel_coords) * depth

    if is_homogeneous:
        ones = torch.ones([batch, 1, height * width], device=pixel_coords.device)
    cam_coords = torch.cat([cam_coords, ones], axis=1)
    cam_coords = torch.reshape(cam_coords, [batch, -1, height, width])
    return cam_coords

## transform coordinates from camera to pixel space

In [None]:
def cam2pixel_torch(cam_coords, proj):
    """Transforms coordinates in a camera frame to the pixel frame.

    Args:
    cam_coords: [batch, 4, height, width]
    proj: [batch, 4, 4]
    Returns:
    Pixel coordinates projected from the camera frame [batch, height, width, 2]
    """
    batch, _, height, width = cam_coords.shape
    cam_coords = torch.reshape(cam_coords, [batch, 4, -1])
    unnormalized_pixel_coords = torch.matmul(proj, cam_coords)
    xy_u = unnormalized_pixel_coords[:, 0:2, :]
    z_u = unnormalized_pixel_coords[:, 2:3, :]

    pixel_coords = xy_u / (z_u + 1e-10)
    pixel_coords = torch.reshape(pixel_coords, [batch, 2, height, width])
    return pixel_coords.permute([0, 2, 3, 1])