Skip to content

Commit

Permalink
add mesh2spc (#696)
Browse files Browse the repository at this point in the history
Signed-off-by: Clement Fuji Tsang <cfujitsang@nvidia.com>
Co-authored-by: Charles Loop <cloop@nvidia.com>
  • Loading branch information
Caenorst and charlesloopNV committed Mar 15, 2023
1 parent 436826b commit ddb4ffe
Show file tree
Hide file tree
Showing 11 changed files with 952 additions and 606 deletions.
3 changes: 2 additions & 1 deletion kaolin/csrc/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
ops_mesh.def("unbatched_mesh_intersection_cuda", &unbatched_mesh_intersection_cuda);
py::module ops_conversions = ops.def_submodule("conversions");
ops_conversions.def("unbatched_mcube_forward_cuda", &unbatched_mcube_forward_cuda);
ops_conversions.def("mesh_to_spc", &mesh_to_spc);
ops_conversions.def("mesh_to_spc_cuda", &mesh_to_spc_cuda);
py::module ops_spc = ops.def_submodule("spc");
#if WITH_CUDA
ops_spc.def("query_cuda", &query_cuda);
Expand All @@ -57,6 +57,7 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
ops_spc.def("points_to_corners_cuda", &points_to_corners_cuda);
#endif // WITH_CUDA
ops_spc.def("points_to_octree", &points_to_octree);
ops_spc.def("morton_to_octree", &morton_to_octree);
ops_spc.def("scan_octrees_cuda", &scan_octrees_cuda);
ops_spc.def("generate_points_cuda", &generate_points_cuda);
ops_spc.def("Conv3d_forward", &Conv3d_forward);
Expand Down
139 changes: 14 additions & 125 deletions kaolin/csrc/ops/conversions/mesh_to_spc/mesh_to_spc.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES.
// Copyright (c) 2021,2023 NVIDIA CORPORATION & AFFILIATES.
// All rights reserved.

// Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -13,147 +13,36 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include <stdlib.h>
#include <stdio.h>

#include <ATen/ATen.h>

#include "../../../check.h"
#ifdef WITH_CUDA
#include "../../../spc_math.h"
#endif

namespace kaolin {

#define CHECK_TRIPLE(x) TORCH_CHECK(x.dim() == 2 && x.size(1) == 3, "input is not Nx3")
#define CHECK_PACKED_FLOAT3(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x); CHECK_FLOAT(x) CHECK_TRIPLE(x)
#define CHECK_PACKED_LONG3(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x); CHECK_LONG(x) CHECK_TRIPLE(x)

using namespace std;
using namespace at::indexing;

#ifdef WITH_CUDA
uint64_t GetStorageBytes(void* d_temp_storageA, morton_code* d_M0, morton_code* d_M1, uint32_t max_total_points);

uint32_t VoxelizeGPU(uint32_t npnts, float3* Pnts, uint32_t ntris, long3* Tris, uint32_t Level,
point_data* d_P, morton_code* d_M0, morton_code* d_M1,
uint32_t* d_Info, uint32_t* d_PrefixSum, uint32_t* d_info, uint32_t* d_psum,
float3* d_l0, float3* d_l1, float3* d_l2, float3* d_F,
uchar* d_axis, ushort* d_W, ushort2* d_pmin,
void* d_temp_storageA, uint64_t temp_storage_bytesA, uchar* d_Odata, int* d_Pyramid);

uint32_t PointToOctree(point_data* d_points, morton_code* d_morton, uint32_t* d_info, uint32_t* d_psum,
void* d_temp_storage, uint64_t temp_storage_bytes, uchar* d_octree, int* h_pyramid,
uint32_t psize, uint32_t level);
std::vector<at::Tensor> mesh_to_spc_cuda_impl(at::Tensor face_vertices, uint32_t Level);
#endif

at::Tensor points_to_octree(
at::Tensor points,
uint32_t level) {
#ifdef WITH_CUDA
uint32_t psize = points.size(0);
at::Tensor morton = at::zeros({KAOLIN_SPC_MAX_POINTS}, points.options().dtype(at::kLong));
at::Tensor info = at::zeros({KAOLIN_SPC_MAX_POINTS}, points.options().dtype(at::kInt));
at::Tensor psum = at::zeros({KAOLIN_SPC_MAX_POINTS}, points.options().dtype(at::kInt));
at::Tensor octree = at::zeros({KAOLIN_SPC_MAX_OCTREE}, points.options().dtype(at::kByte));
at::Tensor pyramid = at::zeros({2, level+2}, at::device(at::kCPU).dtype(at::kInt));

point_data* d_points = reinterpret_cast<point_data*>(points.data_ptr<short>());
morton_code* d_morton = reinterpret_cast<morton_code*>(morton.data_ptr<int64_t>());
uint32_t* d_info = reinterpret_cast<uint32_t*>(info.data_ptr<int>());
uint32_t* d_psum = reinterpret_cast<uint32_t*>(psum.data_ptr<int>());
uchar* d_octree = octree.data_ptr<uchar>();
int* h_pyramid = pyramid.data_ptr<int>();
void* d_temp_storage = NULL;
uint64_t temp_storage_bytes = GetStorageBytes(d_temp_storage, d_morton, d_morton, KAOLIN_SPC_MAX_POINTS);
at::Tensor temp_storage = at::zeros({(int64_t)temp_storage_bytes}, points.options().dtype(at::kByte));
d_temp_storage = (void*)temp_storage.data_ptr<uchar>();

uint32_t osize = PointToOctree(d_points, d_morton, d_info, d_psum, d_temp_storage, temp_storage_bytes,
d_octree, h_pyramid, psize, level);

return octree.index({Slice(KAOLIN_SPC_MAX_OCTREE - osize, None)});
#else
AT_ERROR("points_to_octree not built with CUDA");
#endif
}

at::Tensor mesh_to_spc(
at::Tensor vertices,
at::Tensor triangles,
std::vector<at::Tensor> mesh_to_spc_cuda(
at::Tensor face_vertices,
uint32_t Level) {
#ifdef WITH_CUDA
CHECK_PACKED_FLOAT3(vertices);
CHECK_PACKED_LONG3(triangles);

uint32_t npnts = vertices.size(0);
uint32_t ntris = triangles.size(0);

// allocate local GPU storage
at::Tensor P = at::zeros({KAOLIN_SPC_MAX_POINTS, 3}, vertices.options().dtype(at::kShort));

at::Tensor M0 = at::zeros({KAOLIN_SPC_MAX_POINTS}, vertices.options().dtype(at::kLong));
at::Tensor M1 = at::zeros({KAOLIN_SPC_MAX_POINTS}, vertices.options().dtype(at::kLong));

at::Tensor Info = at::zeros({KAOLIN_SPC_MAX_POINTS}, vertices.options().dtype(at::kInt));
at::Tensor PrefixSum = at::zeros({KAOLIN_SPC_MAX_POINTS}, vertices.options().dtype(at::kInt));
at::Tensor info = at::zeros({ntris}, vertices.options().dtype(at::kInt));
at::Tensor psum = at::zeros({ntris}, vertices.options().dtype(at::kInt));
CHECK_CUDA(face_vertices);
CHECK_CONTIGUOUS(face_vertices);
CHECK_DIMS(face_vertices, 3);
CHECK_SIZE(face_vertices, 1, 3);
CHECK_SIZE(face_vertices, 2, 3);

at::Tensor l0 = at::zeros({3*ntris}, vertices.options().dtype(at::kFloat));
at::Tensor l1 = at::zeros({3*ntris}, vertices.options().dtype(at::kFloat));
at::Tensor l2 = at::zeros({3*ntris}, vertices.options().dtype(at::kFloat));
at::Tensor F = at::zeros({3*ntris}, vertices.options().dtype(at::kFloat));
return mesh_to_spc_cuda_impl(face_vertices, Level);

at::Tensor axis = at::zeros({ntris}, vertices.options().dtype(at::kByte));
at::Tensor W = at::zeros({ntris}, vertices.options().dtype(at::kShort));
at::Tensor pmin = at::zeros({2*ntris}, vertices.options().dtype(at::kShort));

at::Tensor Odata = at::zeros({KAOLIN_SPC_MAX_OCTREE}, vertices.options().dtype(at::kByte));
at::Tensor Pyramid = at::zeros({2, Level+2}, at::device(at::kCPU).dtype(at::kInt));

// get tensor data pointers
point_data* d_P = reinterpret_cast<point_data*>(P.data_ptr<short>());
morton_code* d_M0 = reinterpret_cast<morton_code*>(M0.data_ptr<int64_t>());
morton_code* d_M1 = reinterpret_cast<morton_code*>(M1.data_ptr<int64_t>());

uint32_t* d_Info = reinterpret_cast<uint32_t*>(Info.data_ptr<int>());
uint32_t* d_PrefixSum = reinterpret_cast<uint32_t*>(PrefixSum.data_ptr<int>());
uint32_t* d_info = reinterpret_cast<uint32_t*>(info.data_ptr<int>());
uint32_t* d_psum = reinterpret_cast<uint32_t*>(psum.data_ptr<int>());

float3* d_Pnts = reinterpret_cast<float3*>(vertices.data_ptr<float>());
tri_index* d_Tris = reinterpret_cast<tri_index*>(triangles.data_ptr<int64_t>());

float3* d_l0 = reinterpret_cast<float3*>(l0.data_ptr<float>());
float3* d_l1 = reinterpret_cast<float3*>(l1.data_ptr<float>());
float3* d_l2 = reinterpret_cast<float3*>(l2.data_ptr<float>());
float3* d_F = reinterpret_cast<float3*>(F.data_ptr<float>());

uchar* d_axis = axis.data_ptr<uchar>();
ushort* d_W = reinterpret_cast<ushort*>(W.data_ptr<short>());
ushort2* d_pmin = reinterpret_cast<ushort2*>(pmin.data_ptr<short>());

uchar* d_Odata = Odata.data_ptr<uchar>();
int* h_Pyramid = Pyramid.data_ptr<int>();

// set up memory for DeviceScan and DeviceRadixSort calls
void* d_temp_storageA = NULL;
uint64_t temp_storage_bytesA = GetStorageBytes(d_temp_storageA, d_M0, d_M1, KAOLIN_SPC_MAX_POINTS);

at::Tensor temp_storageA = at::zeros({(int64_t)temp_storage_bytesA}, vertices.options().dtype(at::kByte));
d_temp_storageA = (void*)temp_storageA.data_ptr<uchar>();

// do cuda
uint32_t Osize = VoxelizeGPU(npnts, d_Pnts, ntris, d_Tris, Level,
d_P, d_M0, d_M1, d_Info, d_PrefixSum, d_info, d_psum,
d_l0, d_l1, d_l2, d_F,
d_axis, d_W, d_pmin, d_temp_storageA, temp_storage_bytesA, d_Odata, h_Pyramid);

return Odata.index({Slice(KAOLIN_SPC_MAX_OCTREE - Osize, None)});
#else
AT_ERROR("mesh_to_spc not built with CUDA");
KAOLIN_NO_CUDA_ERROR(__func__);
#endif

}



} // namespace kaolin
14 changes: 3 additions & 11 deletions kaolin/csrc/ops/conversions/mesh_to_spc/mesh_to_spc.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,15 @@
#ifndef KAOLIN_OPS_CONVERSIONS_MESH_TO_SPC_MESH_TO_SPC_H_
#define KAOLIN_OPS_CONVERSIONS_MESH_TO_SPC_MESH_TO_SPC_H_

#ifdef WITH_CUDA
#include "../../../spc_math.h"
#endif

#include <ATen/ATen.h>

namespace kaolin {

at::Tensor points_to_octree(
at::Tensor points,
uint32_t level);

at::Tensor mesh_to_spc(
at::Tensor vertices,
at::Tensor triangles,
std::vector<at::Tensor> mesh_to_spc_cuda(
at::Tensor face_vertices,
uint32_t Level) ;


} // namespace kaolin

#endif // KAOLIN_OPS_CONVERSIONS_MESH_TO_SPC_H_

0 comments on commit ddb4ffe

Please sign in to comment.