Skip to content

Commit

Permalink
Avoid additional memory allocation instead of calling predicate twice…
Browse files Browse the repository at this point in the history
… in parallel::partition_copy.
  • Loading branch information
taeguk committed Jun 25, 2017
1 parent 9644d14 commit 5a1d921
Showing 1 changed file with 17 additions and 21 deletions.
38 changes: 17 additions & 21 deletions hpx/parallel/algorithms/partition.hpp
Expand Up @@ -29,7 +29,6 @@
#include <hpx/parallel/util/loop.hpp>
#include <hpx/parallel/util/projection_identity.hpp>
#include <hpx/parallel/util/scan_partitioner.hpp>
#include <hpx/parallel/util/zip_iterator.hpp>

#include <algorithm>
#include <cstddef>
Expand Down Expand Up @@ -344,7 +343,6 @@ namespace hpx { namespace parallel { inline namespace v1
parallel(ExPolicy && policy, FwdIter1 first, FwdIter1 last,
FwdIter2 dest_true, FwdIter3 dest_false, Pred && pred, Proj && proj)
{
typedef hpx::util::zip_iterator<FwdIter1, bool*> zip_iterator;
typedef util::detail::algorithm_result<
ExPolicy, std::pair<FwdIter2, FwdIter3>
> result;
Expand All @@ -358,49 +356,45 @@ namespace hpx { namespace parallel { inline namespace v1

difference_type count = std::distance(first, last);

boost::shared_array<bool> flags(new bool[count]);
output_iterator_offset init = { 0, 0 };

using hpx::util::get;
using hpx::util::make_zip_iterator;
typedef util::scan_partitioner<
ExPolicy, std::pair<FwdIter2, FwdIter3>,
output_iterator_offset
> scan_partitioner_type;

auto f1 =
[pred, proj, flags, policy]
[pred, proj, policy]
(
zip_iterator part_begin, std::size_t part_size
FwdIter1 part_begin, std::size_t part_size
) -> output_iterator_offset
{
HPX_UNUSED(flags);
HPX_UNUSED(policy);

std::size_t true_count = 0;

// MSVC complains if pred or proj is captured by ref below
util::loop_n<ExPolicy>(
part_begin, part_size,
[pred, proj, &true_count](zip_iterator it) mutable
[pred, proj, &true_count](FwdIter1 it) mutable
{
using hpx::util::invoke;
bool f = invoke(pred, invoke(proj, get<0>(*it)));
bool f = invoke(pred, invoke(proj, *it));

if ((get<1>(*it) = f))
if (f)
++true_count;
});

return output_iterator_offset( true_count, part_size - true_count );
};
auto f3 =
[dest_true, dest_false, flags, policy](
zip_iterator part_begin, std::size_t part_size,
[pred, proj, policy, dest_true, dest_false](
FwdIter1 part_begin, std::size_t part_size,
hpx::shared_future<output_iterator_offset> curr,
hpx::shared_future<output_iterator_offset> next
) mutable
{
HPX_UNUSED(flags);
HPX_UNUSED(policy);

next.get(); // rethrow exceptions
Expand All @@ -411,20 +405,24 @@ namespace hpx { namespace parallel { inline namespace v1
std::advance(dest_true, count_true);
std::advance(dest_false, count_false);

// MSVC complains if pred or proj is captured by ref below
util::loop_n<ExPolicy>(
part_begin, part_size,
[&dest_true, &dest_false](zip_iterator it) mutable
[pred, proj, &dest_true, &dest_false](FwdIter1 it) mutable
{
if(get<1>(*it))
*dest_true++ = get<0>(*it);
using hpx::util::invoke;
bool f = invoke(pred, invoke(proj, *it));

if (f)
*dest_true++ = *it;
else
*dest_false++ = get<0>(*it);
*dest_false++ = *it;
});
};

return scan_partitioner_type::call(
std::forward<ExPolicy>(policy),
make_zip_iterator(first, flags.get()), count, init,
first, count, init,
// step 1 performs first part of scan algorithm
std::move(f1),
// step 2 propagates the partition results from left
Expand All @@ -441,15 +439,13 @@ namespace hpx { namespace parallel { inline namespace v1
// step 3 runs final accumulation on each partition
std::move(f3),
// step 4 use this return value
[dest_true, dest_false, count, flags](
[dest_true, dest_false, count](
std::vector<
hpx::shared_future<output_iterator_offset>
> && items,
std::vector<hpx::future<void> > &&) mutable
-> std::pair<FwdIter2, FwdIter3>
{
HPX_UNUSED(flags);

output_iterator_offset count_pair = items.back().get();
std::size_t count_true = get<0>(count_pair);
std::size_t count_false = get<1>(count_pair);
Expand Down

0 comments on commit 5a1d921

Please sign in to comment.