Skip to content

Commit

Permalink
trilinear interp gradients by coords (#650)
Browse files Browse the repository at this point in the history
Signed-off-by: operel <operel@nvidia.com>

dtype bug fix

Signed-off-by: operel <operel@nvidia.com>

trigger CI

Signed-off-by: operel <operel@nvidia.com>

mr fixes

Signed-off-by: operel <operel@nvidia.com>

Signed-off-by: operel <operel@nvidia.com>
Co-authored-by: operel <operel@nvidia.com>
  • Loading branch information
orperel and operel committed Nov 9, 2022
1 parent be0d000 commit 17491c8
Show file tree
Hide file tree
Showing 2 changed files with 140 additions and 17 deletions.
71 changes: 55 additions & 16 deletions kaolin/ops/spc/points.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,66 +174,105 @@ class InterpolateTrilinear(torch.autograd.Function):
@staticmethod
def forward(ctx, coords, pidx, point_hierarchy, trinkets, feats, level):

feats_out = _C.ops.spc.interpolate_trilinear_cuda(coords.contiguous(), pidx.contiguous(),
point_hierarchy.contiguous(), trinkets.contiguous(),
feats_out = _C.ops.spc.interpolate_trilinear_cuda(coords.contiguous(), pidx.contiguous(),
point_hierarchy.contiguous(), trinkets.contiguous(),
feats.contiguous(), level)

ctx.save_for_backward(coords, pidx, point_hierarchy, trinkets)
ctx.save_for_backward(coords, pidx, point_hierarchy, trinkets, feats)
ctx.level = level
ctx.feats_shape = feats.shape
ctx.coords_shape = coords.shape
return feats_out

@staticmethod
def backward(ctx, grad_output):
coords, pidx, point_hierarchy, trinkets = ctx.saved_tensors
coords, pidx, point_hierarchy, trinkets, feats = ctx.saved_tensors

level = ctx.level
mask = pidx > -1
selected_points = point_hierarchy.index_select(0, pidx[mask])
selected_trinkets = trinkets.index_select(0, pidx[mask])

# TODO(ttakikawa): Support backprop with respect to coords
is_needs_grad_by_coords = ctx.needs_input_grad[0]
is_needs_grad_by_features = ctx.needs_input_grad[4]

grad_feats = None
if ctx.needs_input_grad[4]:
if is_needs_grad_by_features:
# TODO(ttakikawa): Write a fused kernel
grad_feats = torch.zeros(ctx.feats_shape, device=grad_output.device, dtype=grad_output.dtype)
coeffs = coords_to_trilinear_coeffs(coords[mask], selected_points[:, None].repeat(1, coords.shape[1], 1), level).type(grad_output.dtype)
grad_feats.index_add_(0, selected_trinkets.reshape(-1),
(coeffs[..., None] * grad_output[mask][..., None, :]).sum(1).reshape(-1, ctx.feats_shape[-1]))
return None, None, None, None, grad_feats, None
grad_per_corner = (coeffs[..., None] * grad_output[mask][..., None, :]).sum(1)
grad_feats.index_add_(0, selected_trinkets.reshape(-1),
grad_per_corner.reshape(-1, ctx.feats_shape[-1]).to(grad_feats.dtype))

# TODO (operel): May want to reimplement with CUDA
grad_coords = None
if is_needs_grad_by_coords:
# Let N be the number of intersected cells in a batch (e.g. pidx > -1)
# Let D be the features dimensionality
# Shape (N, 3), xyz coords of intersected cells in range [0, 2^lod]
coords_ = (2 ** level) * (coords[mask].reshape(-1, 3) * 0.5 + 0.5)
# Shape (N, 3), quantized xyz coords of intersected cells in range [0, 2^lod]
points_ = selected_points[:, None].repeat(1, coords.shape[1], 1).reshape(-1, 3)
# Shape (N, 3), local cell coordinates in range [0.0, 1.0]
x_ = coords_ - points_
# Shape (N, 3), 1.0 - local cell coordinates in range [0.0, 1.0]
_x = 1.0 - x_
# Shape (N, 8 x 3) tensor of @(coeffs)/@(xyz) where
# coeffs is the tensor of c000, c001, .. c111, the trilinear interp coefficients
# (see coords_to_trilinear_coeffs), and xyz is the coords
grad_coeffs_by_xyz = torch.stack([
-_x[:, 1] * _x[:, 2], -_x[:, 0] * _x[:, 2], -_x[:, 0] * _x[:, 1],
-_x[:, 1] * x_[:, 2], -_x[:, 0] * x_[:, 2], _x[:, 0] * _x[:, 1],
-x_[:, 1] * _x[:, 2], _x[:, 0] * _x[:, 2], -_x[:, 0] * x_[:, 1],
-x_[:, 1] * x_[:, 2], _x[:, 0] * x_[:, 2], _x[:, 0] * x_[:, 1],
_x[:, 1] * _x[:, 2], -x_[:, 0] * _x[:, 2], -x_[:, 0] * _x[:, 1],
_x[:, 1] * x_[:, 2], -x_[:, 0] * x_[:, 2], x_[:, 0] * _x[:, 1],
x_[:, 1] * _x[:, 2], x_[:, 0] * _x[:, 2], -x_[:, 0] * x_[:, 1],
x_[:, 1] * x_[:, 2], x_[:, 0] * x_[:, 2], x_[:, 0] * x_[:, 1]
], dim=1).to(dtype=grad_output.dtype, device=grad_output.device)
# Shape (N, 8, 3) tensor of @(coeffs)/@(xyz)
grad_coeffs_by_xyz = grad_coeffs_by_xyz.reshape(-1, 8, 3)
# Shape (N, D, 8) tensor of @(feats_out)/@(coeffs)
grad_fout_by_coeffs = feats[selected_trinkets.long()].permute(0,2,1)
# Shape (N, D, 3) tensor of @(feats_out)/@(xyz), after applying chain rule
grad_fout_by_xyz = grad_fout_by_coeffs @ grad_coeffs_by_xyz
# Shape (N, 1, 3) tensor of @(out)/@(xyz) applying chain rule again
grad_coords = grad_output @ grad_fout_by_xyz
return grad_coords, None, None, None, grad_feats, None

def unbatched_interpolate_trilinear(coords, pidx, point_hierarchy, trinkets, feats, level):
r"""Performs trilinear interpolation on a SPC feature grid.
Args:
coords (torch.FloatTensor): 3D coordinates of shape
:math:`(\text{num_coords}, \text{num_samples}, 3)`
:math:`(\text{num_coords}, \text{num_samples}, 3)`
in normalized space [-1, 1]. ``num_samples`` indicates the number of
coordinates that are grouped inside the same SPC node for performance
optimization purposes. In many cases the ``pidx`` is
optimization purposes. In many cases the ``pidx`` is
generated from :func:`kaolin.ops.spc.unbatched_query`
and so the ``num_samples`` will be 1.
pidx (torch.IntTensor): Index to the point hierarchy which contains the voxel
which the coords exists in. Tensor of shape
:math:`(\text{num_coords})`.
which the coords exists in. Tensor of shape
:math:`(\text{num_coords})`.
This can be computed with :func:`kaolin.ops.spc.unbatched_query`.
point_hierarchy (torch.ShortTensor):
point_hierarchy (torch.ShortTensor):
The point hierarchy of shape :math:`(\text{num_points}, 3)`.
See :ref:`point_hierarchies <spc_points>` for a detailed description.
trinkets (torch.IntTensor): An indirection pointer (in practice, an index) to the feature
tensor of shape :math:`(\text{num_points}, 8)`.
feats (torch.Tensor): Floating point feature vectors to interpolate of shape
feats (torch.Tensor): Floating point feature vectors to interpolate of shape
:math:`(\text{num_feats}, \text{feature_dim})`.
level (int): The level of SPC to interpolate on.
Returns:
(torch.FloatTensor):
(torch.FloatTensor):
Interpolated feature vectors of shape :math:`(\text{num_voxels}, \text{num_samples}, \text{feature_dim})`.
"""
return InterpolateTrilinear.apply(coords, pidx, point_hierarchy, trinkets, feats, level)
Expand Down
86 changes: 85 additions & 1 deletion tests/python/kaolin/ops/spc/test_points.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ def test_interpolate_trilinear_backward(self, points):
feats = torch.rand([pyramid_dual[0, level], 16], device='cuda')
feats.requires_grad_(True)
if feats.grad is not None:
feats.grad.deatch_()
feats.grad.detach()
feats.grad.zero_()

corner_feats = feats.index_select(0, trinkets[pidx].view(-1)).view(-1, 8, 16)
Expand All @@ -194,3 +194,87 @@ def test_interpolate_trilinear_backward(self, points):
grad = feats.grad.clone()

assert torch.allclose(grad, expected_grad, rtol=1e-5, atol=1e-5)

def test_interpolate_trilinear_by_coords_backward(self, points):
w = torch.rand(points.shape, device='cuda')
x = points + w

level = 3

octree = unbatched_points_to_octree(points, level)
length = torch.tensor([len(octree)], dtype=torch.int32)
_, pyramid, prefix = scan_octrees(octree, length)
point_hierarchy = generate_points(octree, pyramid, prefix)

pyramid = pyramid[0]
point_hierarchy_dual, pyramid_dual = unbatched_make_dual(point_hierarchy, pyramid)
trinkets, parents = unbatched_make_trinkets(point_hierarchy, pyramid, point_hierarchy_dual, pyramid_dual)

coords = (x / (2 ** level)) * 2.0 - 1.0
pidx = unbatched_query(octree, prefix, coords, level, with_parents=False)
feats = torch.rand([pyramid_dual[0, level], 16], device='cuda')

# w is the relative position inside a cell
w.requires_grad_(True)
if w.grad is not None:
w.grad.detach()
w.grad.zero_()

# (5, 8, 16)
corner_feats = feats.index_select(0, trinkets[pidx].view(-1)).view(-1, 8, 16)

# (5, 8)
expected_coeffs = torch.stack([
(1 - w[:, 0]) * (1 - w[:, 1]) * (1 - w[:, 2]),
(1 - w[:, 0]) * (1 - w[:, 1]) * w[:, 2],
(1 - w[:, 0]) * w[:, 1] * (1 - w[:, 2]),
(1 - w[:, 0]) * w[:, 1] * w[:, 2],
w[:, 0] * (1 - w[:, 1]) * (1 - w[:, 2]),
w[:, 0] * (1 - w[:, 1]) * w[:, 2],
w[:, 0] * w[:, 1] * (1 - w[:, 2]),
w[:, 0] * w[:, 1] * w[:, 2]
], dim=-1)
expected_coeffs = expected_coeffs.requires_grad_(True) # prevents element0 error
expected_results = (corner_feats * expected_coeffs[..., None]).sum(1)
loss = expected_results.sum()
loss.backward()
expected_grad = w.grad.clone()

coords.requires_grad_(True)
if coords.grad is not None:
coords.grad.detach()
coords.grad.zero_()
results = unbatched_interpolate_trilinear(coords[:, None], pidx.int(), point_hierarchy, trinkets, feats, level)
loss = results[:, 0].sum()
loss.backward()
coords_grad = coords.grad.clone()

assert torch.allclose(coords_grad, expected_grad, rtol=1e-4, atol=1e-3)

def test_interpolate_trilinear_by_coords_toggleable(self, points):
# Test that features only grad does not generate coords grad
w = torch.rand(points.shape, device='cuda')
x = points + w

level = 3

octree = unbatched_points_to_octree(points, level)
length = torch.tensor([len(octree)], dtype=torch.int32)
_, pyramid, prefix = scan_octrees(octree, length)
point_hierarchy = generate_points(octree, pyramid, prefix)

pyramid = pyramid[0]
point_hierarchy_dual, pyramid_dual = unbatched_make_dual(point_hierarchy, pyramid)
trinkets, parents = unbatched_make_trinkets(point_hierarchy, pyramid, point_hierarchy_dual, pyramid_dual)

coords = (x / (2 ** level)) * 2.0 - 1.0
pidx = unbatched_query(octree, prefix, coords, level, with_parents=False)
feats = torch.rand([pyramid_dual[0, level], 16], device='cuda')

feats.requires_grad_(True)
coords.requires_grad_(False)
results = unbatched_interpolate_trilinear(coords[:, None], pidx.int(), point_hierarchy, trinkets, feats, level)
loss = results[:, 0].sum()
loss.backward()

assert coords.grad is None

0 comments on commit 17491c8

Please sign in to comment.