Skip to content

Commit

Permalink
[registration] Add OMP based Multi-threading to IterativeClosestPoint (
Browse files Browse the repository at this point in the history
…#4520)

* Make ICP multi-threaded

* Use unsigned int for num_threads_

* Keep correspondences ordered with inplace_merge

* Apply clang-format to correspondence_estimation.(h|hpp) and icp.h

* Use move and emplace_back

* remove unnecessary include

* Resolve omp build errors on gcc (Class members cannot be shared variables)

* Wrap omp_get_thread_num() in ifdef _OPENMP

* Formatting

* Remove std::move per clang-tidy suggestion

* Fix typo

---------

Co-authored-by: Markus Vieth <39675748+mvieth@users.noreply.github.com>
  • Loading branch information
koide3 and mvieth committed May 5, 2024
1 parent eb76a6e commit 91bc0fd
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 12 deletions.
22 changes: 22 additions & 0 deletions registration/include/pcl/registration/correspondence_estimation.h
Expand Up @@ -137,6 +137,23 @@ class CorrespondenceEstimationBase : public PCLBase<PointSource> {
return (target_);
}

/** \brief Set the number of threads to use.
* \param nr_threads the number of hardware threads to use (0 sets the value back to
* automatic)
*/
void
setNumberOfThreads(unsigned int nr_threads)
{
#ifdef _OPENMP
num_threads_ = nr_threads != 0 ? nr_threads : omp_get_num_procs();
#else
if (nr_threads != 1) {
PCL_WARN("OpenMP is not available. Keeping number of threads unchanged at 1\n");
}
num_threads_ = 1;
#endif
}

/** \brief See if this rejector requires source normals */
virtual bool
requiresSourceNormals() const
Expand Down Expand Up @@ -362,6 +379,8 @@ class CorrespondenceEstimationBase : public PCLBase<PointSource> {
/** \brief A flag which, if set, means the tree operating on the source cloud
* will never be recomputed*/
bool force_no_recompute_reciprocal_{false};

unsigned int num_threads_{1};
};

/** \brief @b CorrespondenceEstimation represents the base class for
Expand Down Expand Up @@ -479,6 +498,9 @@ class CorrespondenceEstimation
Ptr copy(new CorrespondenceEstimation<PointSource, PointTarget, Scalar>(*this));
return (copy);
}

protected:
using CorrespondenceEstimationBase<PointSource, PointTarget, Scalar>::num_threads_;
};
} // namespace registration
} // namespace pcl
Expand Down
10 changes: 10 additions & 0 deletions registration/include/pcl/registration/icp.h
Expand Up @@ -261,6 +261,16 @@ class IterativeClosestPoint : public Registration<PointSource, PointTarget, Scal
return (use_reciprocal_correspondence_);
}

/** \brief Set the number of threads to use.
* \param nr_threads the number of hardware threads to use (0 sets the value back to
* automatic)
*/
void
setNumberOfThreads(unsigned int nr_threads)
{
correspondence_estimation_->setNumberOfThreads(nr_threads);
}

protected:
/** \brief Apply a rigid transform to a given dataset. Here we check whether
* the dataset has surface normals in addition to XYZ, and rotate normals as well.
Expand Down
Expand Up @@ -153,12 +153,18 @@ CorrespondenceEstimation<PointSource, PointTarget, Scalar>::determineCorresponde

pcl::Indices index(1);
std::vector<float> distance(1);
pcl::Correspondence corr;
unsigned int nr_valid_correspondences = 0;
std::vector<pcl::Correspondences> per_thread_correspondences(num_threads_);
for (auto& corrs : per_thread_correspondences) {
corrs.reserve(2 * indices_->size() / num_threads_);
}
double max_dist_sqr = max_distance * max_distance;

#pragma omp parallel for default(none) \
shared(max_dist_sqr, per_thread_correspondences) firstprivate(index, distance) \
num_threads(num_threads_)
// Iterate over the input set of source indices
for (const auto& idx : (*indices_)) {
for (int i = 0; i < static_cast<int>(indices_->size()); i++) {
const auto& idx = (*indices_)[i];
// Check if the template types are the same. If true, avoid a copy.
// Both point types MUST be registered using the POINT_CLOUD_REGISTER_POINT_STRUCT
// macro!
Expand All @@ -167,13 +173,44 @@ CorrespondenceEstimation<PointSource, PointTarget, Scalar>::determineCorresponde
if (distance[0] > max_dist_sqr)
continue;

pcl::Correspondence corr;
corr.index_query = idx;
corr.index_match = index[0];
corr.distance = distance[0];
correspondences[nr_valid_correspondences++] = corr;

#ifdef _OPENMP
const int thread_num = omp_get_thread_num();
#else
const int thread_num = 0;
#endif

per_thread_correspondences[thread_num].emplace_back(corr);
}

correspondences.resize(nr_valid_correspondences);
if (num_threads_ == 1) {
correspondences = std::move(per_thread_correspondences.front());
}
else {
const unsigned int nr_correspondences = std::accumulate(
per_thread_correspondences.begin(),
per_thread_correspondences.end(),
static_cast<unsigned int>(0),
[](const auto sum, const auto& corr) { return sum + corr.size(); });
correspondences.resize(nr_correspondences);

// Merge per-thread correspondences while keeping them ordered
auto insert_loc = correspondences.begin();
for (const auto& corrs : per_thread_correspondences) {
const auto new_insert_loc = std::move(corrs.begin(), corrs.end(), insert_loc);
std::inplace_merge(correspondences.begin(),
insert_loc,
insert_loc + corrs.size(),
[](const auto& lhs, const auto& rhs) {
return lhs.index_query < rhs.index_query;
});
insert_loc = new_insert_loc;
}
}
deinitCompute();
}

Expand All @@ -197,12 +234,18 @@ CorrespondenceEstimation<PointSource, PointTarget, Scalar>::
std::vector<float> distance(1);
pcl::Indices index_reciprocal(1);
std::vector<float> distance_reciprocal(1);
pcl::Correspondence corr;
unsigned int nr_valid_correspondences = 0;
int target_idx = 0;
std::vector<pcl::Correspondences> per_thread_correspondences(num_threads_);
for (auto& corrs : per_thread_correspondences) {
corrs.reserve(2 * indices_->size() / num_threads_);
}

#pragma omp parallel for default(none) \
shared(max_dist_sqr, per_thread_correspondences) \
firstprivate(index, distance, index_reciprocal, distance_reciprocal) \
num_threads(num_threads_)
// Iterate over the input set of source indices
for (const auto& idx : (*indices_)) {
for (int i = 0; i < static_cast<int>(indices_->size()); i++) {
const auto& idx = (*indices_)[i];
// Check if the template types are the same. If true, avoid a copy.
// Both point types MUST be registered using the POINT_CLOUD_REGISTER_POINT_STRUCT
// macro!
Expand All @@ -213,20 +256,53 @@ CorrespondenceEstimation<PointSource, PointTarget, Scalar>::
if (distance[0] > max_dist_sqr)
continue;

target_idx = index[0];
const auto target_idx = index[0];
const auto& pt_tgt =
detail::pointCopyOrRef<PointSource, PointTarget>(target_, target_idx);

tree_reciprocal_->nearestKSearch(pt_tgt, 1, index_reciprocal, distance_reciprocal);
if (distance_reciprocal[0] > max_dist_sqr || idx != index_reciprocal[0])
continue;

pcl::Correspondence corr;
corr.index_query = idx;
corr.index_match = index[0];
corr.distance = distance[0];
correspondences[nr_valid_correspondences++] = corr;

#ifdef _OPENMP
const int thread_num = omp_get_thread_num();
#else
const int thread_num = 0;
#endif

per_thread_correspondences[thread_num].emplace_back(corr);
}

if (num_threads_ == 1) {
correspondences = std::move(per_thread_correspondences.front());
}
else {
const unsigned int nr_correspondences = std::accumulate(
per_thread_correspondences.begin(),
per_thread_correspondences.end(),
static_cast<unsigned int>(0),
[](const auto sum, const auto& corr) { return sum + corr.size(); });
correspondences.resize(nr_correspondences);

// Merge per-thread correspondences while keeping them ordered
auto insert_loc = correspondences.begin();
for (const auto& corrs : per_thread_correspondences) {
const auto new_insert_loc = std::move(corrs.begin(), corrs.end(), insert_loc);
std::inplace_merge(correspondences.begin(),
insert_loc,
insert_loc + corrs.size(),
[](const auto& lhs, const auto& rhs) {
return lhs.index_query < rhs.index_query;
});
insert_loc = new_insert_loc;
}
}
correspondences.resize(nr_valid_correspondences);

deinitCompute();
}

Expand Down

0 comments on commit 91bc0fd

Please sign in to comment.