Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed kinfu_remake to run in CUDA_ARCH >= 500 and CUDA_VERSION >= 9 #33

Merged
merged 1 commit into from Feb 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
13 changes: 8 additions & 5 deletions CMakeLists.txt
Expand Up @@ -10,13 +10,13 @@ endif()
project(kfusion C CXX)

# ---[ utility
list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/Modules/")
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/")
include(cmake/Utils.cmake)
include(cmake/Targets.cmake)

# ---[ find dependencies
find_package(OpenCV 2.4.8 REQUIRED COMPONENTS core viz highgui)
find_package(CUDA 5.0 REQUIRED)
find_package(OpenCV REQUIRED COMPONENTS core viz highgui)
find_package(CUDA REQUIRED)
find_package(OpenNI REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS} ${CUDA_INCLUDE_DIRS} ${OPENNI_INCLUDE_DIR})

Expand All @@ -28,8 +28,11 @@ endif()

# ---[ cuda settings
set(HAVE_CUDA 1)
#list(APPEND CUDA_NVCC_FLAGS "-arch;compute_20")
list(APPEND CUDA_NVCC_FLAGS "-gencode;arch=compute_20,code=sm_20;-gencode;arch=compute_20,code=sm_21;-gencode;arch=compute_30,code=sm_30;-gencode;arch=compute_35,code=sm_35;-gencode;arch=compute_50,code=sm_50")
if (CUDA_VERSION_MAJOR GREATER 8)
list(APPEND CUDA_NVCC_FLAGS "-gencode;arch=compute_30,code=sm_30;-gencode;arch=compute_35,code=sm_35;-gencode;arch=compute_50,code=sm_50")
else()
list(APPEND CUDA_NVCC_FLAGS "-gencode;arch=compute_20,code=sm_20;-gencode;arch=compute_20,code=sm_21;-gencode;arch=compute_30,code=sm_30;-gencode;arch=compute_35,code=sm_35;-gencode;arch=compute_50,code=sm_50")
endif()

if(UNIX OR APPLE)
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler;-fPIC;")
Expand Down
2 changes: 1 addition & 1 deletion kfusion/CMakeLists.txt
@@ -1,3 +1,3 @@
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "--ftz=true;--prec-div=false;--prec-sqrt=false")
list(APPEND CUDA_NVCC_FLAGS "--ftz=true;--prec-div=false;--prec-sqrt=false")
add_module_library(kfusion)
target_link_libraries(kfusion ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY} ${OpenCV_LIBS} ${OPENNI_LIBRARY})
46 changes: 32 additions & 14 deletions kfusion/src/cuda/device.hpp
Expand Up @@ -50,16 +50,34 @@ __kf_device__ float3 kfusion::device::Reprojector::operator()(int u, int v, floa
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// packing/unpacking tsdf volume element

__kf_device__ ushort2 kfusion::device::pack_tsdf (float tsdf, int weight)
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 500
#define DIVISOR 32767

__kf_device__ kfusion::device::TsdfVolume::elem_type kfusion::device::pack_tsdf (float tsdf, int weight)
{
int fixedp = max (-DIVISOR, min (DIVISOR, __float2int_rz (tsdf * DIVISOR)));
return make_short2 (fixedp, weight);
}

__kf_device__ float kfusion::device::unpack_tsdf (TsdfVolume::elem_type value, int& weight)
{
weight = value.y;
return __int2float_rn (value.x) / DIVISOR;
}

__kf_device__ float kfusion::device::unpack_tsdf (TsdfVolume::elem_type value) { return static_cast<float>(value.x) / DIVISOR;}
#else
__kf_device__ kfusion::device::TsdfVolume::elem_type kfusion::device::pack_tsdf (float tsdf, int weight)
{ return make_ushort2 (__float2half_rn (tsdf), weight); }

__kf_device__ float kfusion::device::unpack_tsdf(ushort2 value, int& weight)
__kf_device__ float kfusion::device::unpack_tsdf (TsdfVolume::elem_type value, int& weight)
{
weight = value.y;
return __half2float (value.x);
weight = value.y;
return __half2float ((__half)value.x);
}
__kf_device__ float kfusion::device::unpack_tsdf (ushort2 value) { return __half2float (value.x); }

__kf_device__ float kfusion::device::unpack_tsdf (TsdfVolume::elem_type value) { return __half2float ((__half)value.x); }
#endif

////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// Utility
Expand All @@ -84,11 +102,11 @@ namespace kfusion
struct gmem
{
template<typename T> __kf_device__ static T LdCs(T *ptr);
template<typename T> __kf_device__ static void StCs(const T& val, T *ptr);
template<typename T> __kf_device__ static void StCs(const T& val, T*& ptr);
};

template<> __kf_device__ ushort2 gmem::LdCs(ushort2* ptr);
template<> __kf_device__ void gmem::StCs(const ushort2& val, ushort2* ptr);
template<> __kf_device__ TsdfVolume::elem_type gmem::LdCs(TsdfVolume::elem_type* ptr);
template<> __kf_device__ void gmem::StCs(const TsdfVolume::elem_type& val, TsdfVolume::elem_type*& ptr);
}
}

Expand All @@ -101,23 +119,23 @@ namespace kfusion
#define _ASM_PTR_ "r"
#endif

template<> __kf_device__ ushort2 kfusion::device::gmem::LdCs(ushort2* ptr)
template<> __kf_device__ kfusion::device::TsdfVolume::elem_type kfusion::device::gmem::LdCs(TsdfVolume::elem_type* ptr)
{
ushort2 val;
TsdfVolume::elem_type val;
asm("ld.global.cs.v2.u16 {%0, %1}, [%2];" : "=h"(reinterpret_cast<ushort&>(val.x)), "=h"(reinterpret_cast<ushort&>(val.y)) : _ASM_PTR_(ptr));
return val;
}

template<> __kf_device__ void kfusion::device::gmem::StCs(const ushort2& val, ushort2* ptr)
template<> __kf_device__ void kfusion::device::gmem::StCs(const TsdfVolume::elem_type& val, TsdfVolume::elem_type*& ptr)
{
short cx = val.x, cy = val.y;
asm("st.global.cs.v2.u16 [%0], {%1, %2};" : : _ASM_PTR_(ptr), "h"(reinterpret_cast<ushort&>(cx)), "h"(reinterpret_cast<ushort&>(cy)));
asm("st.global.cs.v2.u16 [%0], {%1, %2};" : "="_ASM_PTR_(ptr) : "h"(reinterpret_cast<ushort&>(cx)), "h"(reinterpret_cast<ushort&>(cy)));
}
#undef _ASM_PTR_

#else
template<> __kf_device__ ushort2 kfusion::device::gmem::LdCs(ushort2* ptr) { return *ptr; }
template<> __kf_device__ void kfusion::device::gmem::StCs(const ushort2& val, ushort2* ptr) { *ptr = val; }
template<> __kf_device__ kfusion::device::TsdfVolume::elem_type kfusion::device::gmem::LdCs(TsdfVolume::elem_type* ptr) { return *ptr; }
template<> __kf_device__ void kfusion::device::gmem::StCs(const TsdfVolume::elem_type& val, TsdfVolume::elem_type*& ptr) { *ptr = val; }
#endif


Expand Down
1 change: 0 additions & 1 deletion kfusion/src/cuda/imgproc.cu
Expand Up @@ -265,7 +265,6 @@ namespace kfusion
float xl = (x - c.x) * finv.x;
float yl = (y - c.y) * finv.y;
float lambda = sqrtf (xl * xl + yl * yl + 1);

dists(y, x) = __float2half_rn(depth(y, x) * lambda * 0.001f); //meters
}
}
Expand Down
9 changes: 4 additions & 5 deletions kfusion/src/cuda/tsdf_volume.cu
Expand Up @@ -17,10 +17,10 @@ namespace kfusion

if (x < tsdf.dims.x && y < tsdf.dims.y)
{
ushort2 *beg = tsdf.beg(x, y);
ushort2 *end = beg + tsdf.dims.x * tsdf.dims.y * tsdf.dims.z;
TsdfVolume::elem_type *beg = tsdf.beg(x, y);
TsdfVolume::elem_type *end = beg + tsdf.dims.x * tsdf.dims.y * tsdf.dims.z;

for(ushort2* pos = beg; pos != end; pos = tsdf.zstep(pos))
for(TsdfVolume::elem_type* pos = beg; pos != end; pos = tsdf.zstep(pos))
*pos = pack_tsdf (0.f, 0);
}
}
Expand Down Expand Up @@ -109,7 +109,7 @@ namespace kfusion
}
}

void kfusion::device::integrate(const PtrStepSz<ushort>& dists, TsdfVolume& volume, const Aff3f& aff, const Projector& proj)
void kfusion::device::integrate(const Dists& dists, TsdfVolume& volume, const Aff3f& aff, const Projector& proj)
{
TsdfIntegrator ti;
ti.dists_size = make_int2(dists.cols, dists.rows);
Expand Down Expand Up @@ -499,7 +499,6 @@ namespace kfusion
{
int W;
float F = fetch(x, y, z, W);

if (W != 0 && F != 1.f)
{
V.z = (z + 0.5f) * volume.voxel_size.z;
Expand Down
2 changes: 1 addition & 1 deletion kfusion/src/device_memory.cpp
Expand Up @@ -82,7 +82,7 @@ void kfusion::cuda::DeviceMemory::create(size_t sizeBytes_arg)

sizeBytes_ = sizeBytes_arg;

cudaSafeCall( cudaMalloc(&data_, sizeBytes_) );
cudaSafeCall( cudaMallocManaged(&data_, sizeBytes_) );

//refcount_ = (int*)cv::fastMalloc(sizeof(*refcount_));
refcount_ = new int;
Expand Down
18 changes: 13 additions & 5 deletions kfusion/src/internal.hpp
@@ -1,5 +1,6 @@
#pragma once

#include <cuda_fp16.h>
#include <kfusion/cuda/device_array.hpp>
#include "safe_call.hpp"

Expand All @@ -15,7 +16,11 @@ namespace kfusion
typedef unsigned short ushort;
typedef unsigned char uchar;

#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 500
typedef PtrStepSz<__half> Dists;
#else
typedef PtrStepSz<ushort> Dists;
#endif
typedef DeviceArray2D<ushort> Depth;
typedef DeviceArray2D<Normal> Normals;
typedef DeviceArray2D<Point> Points;
Expand All @@ -29,16 +34,19 @@ namespace kfusion
struct TsdfVolume
{
public:
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 500
typedef short2 elem_type;
#else
typedef ushort2 elem_type;

#endif
elem_type *const data;
const int3 dims;
const float3 voxel_size;
const float trunc_dist;
const int max_weight;

TsdfVolume(elem_type* data, int3 dims, float3 voxel_size, float trunc_dist, int max_weight);
//TsdfVolume(const TsdfVolume&);
TsdfVolume(const TsdfVolume&);

__kf_device__ elem_type* operator()(int x, int y, int z);
__kf_device__ const elem_type* operator() (int x, int y, int z) const ;
Expand Down Expand Up @@ -111,9 +119,9 @@ namespace kfusion
void raycast(const TsdfVolume& volume, const Aff3f& aff, const Mat3f& Rinv,
const Reprojector& reproj, Points& points, Normals& normals, float step_factor, float delta_factor);

__kf_device__ ushort2 pack_tsdf(float tsdf, int weight);
__kf_device__ float unpack_tsdf(ushort2 value, int& weight);
__kf_device__ float unpack_tsdf(ushort2 value);
__kf_device__ TsdfVolume::elem_type pack_tsdf(float tsdf, int weight);
__kf_device__ float unpack_tsdf(TsdfVolume::elem_type value, int& weight);
__kf_device__ float unpack_tsdf(TsdfVolume::elem_type value);


//image proc functions
Expand Down
3 changes: 3 additions & 0 deletions kfusion/src/precomp.cpp
Expand Up @@ -24,6 +24,9 @@ std::ostream& operator << (std::ostream& os, const kfusion::Intr& intr)
kfusion::device::TsdfVolume::TsdfVolume(elem_type* _data, int3 _dims, float3 _voxel_size, float _trunc_dist, int _max_weight)
: data(_data), dims(_dims), voxel_size(_voxel_size), trunc_dist(_trunc_dist), max_weight(_max_weight) {}

kfusion::device::TsdfVolume::TsdfVolume(const TsdfVolume& other)
: data(other.data), dims(other.dims), voxel_size(other.voxel_size), trunc_dist(other.trunc_dist), max_weight(other.max_weight) {}

//kfusion::device::TsdfVolume::elem_type* kfusionl::device::TsdfVolume::operator()(int x, int y, int z)
//{ return data + x + y*dims.x + z*dims.y*dims.x; }
//
Expand Down
13 changes: 6 additions & 7 deletions kfusion/src/tsdf_volume.cpp
Expand Up @@ -70,7 +70,7 @@ void kfusion::cuda::TsdfVolume::clear()
device::Vec3i dims = device_cast<device::Vec3i>(dims_);
device::Vec3f vsz = device_cast<device::Vec3f>(getVoxelSize());

device::TsdfVolume volume(data_.ptr<ushort2>(), dims, vsz, trunc_dist_, max_weight_);
device::TsdfVolume volume(data_.ptr<device::TsdfVolume::elem_type>(), dims, vsz, trunc_dist_, max_weight_);
device::clear_volume(volume);
}

Expand All @@ -84,7 +84,7 @@ void kfusion::cuda::TsdfVolume::integrate(const Dists& dists, const Affine3f& ca
device::Vec3f vsz = device_cast<device::Vec3f>(getVoxelSize());
device::Aff3f aff = device_cast<device::Aff3f>(vol2cam);

device::TsdfVolume volume(data_.ptr<ushort2>(), dims, vsz, trunc_dist_, max_weight_);
device::TsdfVolume volume(data_.ptr<device::TsdfVolume::elem_type>(), dims, vsz, trunc_dist_, max_weight_);
device::integrate(dists, volume, aff, proj);
}

Expand All @@ -102,9 +102,8 @@ void kfusion::cuda::TsdfVolume::raycast(const Affine3f& camera_pose, const Intr&
device::Vec3i dims = device_cast<device::Vec3i>(dims_);
device::Vec3f vsz = device_cast<device::Vec3f>(getVoxelSize());

device::TsdfVolume volume(data_.ptr<ushort2>(), dims, vsz, trunc_dist_, max_weight_);
device::TsdfVolume volume(data_.ptr<device::TsdfVolume::elem_type>(), dims, vsz, trunc_dist_, max_weight_);
device::raycast(volume, aff, Rinv, reproj, depth, n, raycast_step_factor_, gradient_delta_factor_);

}

void kfusion::cuda::TsdfVolume::raycast(const Affine3f& camera_pose, const Intr& intr, Cloud& points, Normals& normals)
Expand All @@ -122,7 +121,7 @@ void kfusion::cuda::TsdfVolume::raycast(const Affine3f& camera_pose, const Intr&
device::Vec3i dims = device_cast<device::Vec3i>(dims_);
device::Vec3f vsz = device_cast<device::Vec3f>(getVoxelSize());

device::TsdfVolume volume(data_.ptr<ushort2>(), dims, vsz, trunc_dist_, max_weight_);
device::TsdfVolume volume(data_.ptr<device::TsdfVolume::elem_type>(), dims, vsz, trunc_dist_, max_weight_);
device::raycast(volume, aff, Rinv, reproj, p, n, raycast_step_factor_, gradient_delta_factor_);
}

Expand All @@ -139,7 +138,7 @@ DeviceArray<Point> kfusion::cuda::TsdfVolume::fetchCloud(DeviceArray<Point>& clo
device::Vec3f vsz = device_cast<device::Vec3f>(getVoxelSize());
device::Aff3f aff = device_cast<device::Aff3f>(pose_);

device::TsdfVolume volume((ushort2*)data_.ptr<ushort2>(), dims, vsz, trunc_dist_, max_weight_);
device::TsdfVolume volume((device::TsdfVolume::elem_type*)data_.ptr<device::TsdfVolume::elem_type>(), dims, vsz, trunc_dist_, max_weight_);
size_t size = extractCloud(volume, aff, b);

return DeviceArray<Point>((Point*)cloud_buffer.ptr(), size);
Expand All @@ -155,6 +154,6 @@ void kfusion::cuda::TsdfVolume::fetchNormals(const DeviceArray<Point>& cloud, De
device::Aff3f aff = device_cast<device::Aff3f>(pose_);
device::Mat3f Rinv = device_cast<device::Mat3f>(pose_.rotation().inv(cv::DECOMP_SVD));

device::TsdfVolume volume((ushort2*)data_.ptr<ushort2>(), dims, vsz, trunc_dist_, max_weight_);
device::TsdfVolume volume((device::TsdfVolume::elem_type*)data_.ptr<device::TsdfVolume::elem_type>(), dims, vsz, trunc_dist_, max_weight_);
device::extractNormals(volume, c, aff, Rinv, gradient_delta_factor_, (float4*)normals.ptr());
}