Skip to content

Commit

Permalink
Merge pull request #1396 from STEllAR-GROUP/scan_fix
Browse files Browse the repository at this point in the history
Parallel scan algorithms with different initial values
  • Loading branch information
hkaiser committed Mar 6, 2015
2 parents 46c855b + 0e04086 commit 3ae7b4c
Show file tree
Hide file tree
Showing 8 changed files with 213 additions and 158 deletions.
3 changes: 1 addition & 2 deletions hpx/parallel/algorithms/copy.hpp
Expand Up @@ -344,7 +344,7 @@ namespace hpx { namespace parallel { HPX_INLINE_NAMESPACE(v1)
Iter dest, F && f)
{
typedef hpx::util::zip_iterator<FwdIter, char*> zip_iterator;
typedef detail::algorithm_result<ExPolicy, FwdIter> result;
typedef detail::algorithm_result<ExPolicy, Iter> result;
typedef typename std::iterator_traits<FwdIter>::difference_type
difference_type;
typedef typename detail::algorithm_result<ExPolicy, Iter>::type
Expand All @@ -355,7 +355,6 @@ namespace hpx { namespace parallel { HPX_INLINE_NAMESPACE(v1)

difference_type count = std::distance(first, last);


boost::shared_array<char> flags(new char[count]);
std::size_t init = 0;

Expand Down
67 changes: 12 additions & 55 deletions hpx/parallel/algorithms/exclusive_scan.hpp
Expand Up @@ -13,6 +13,7 @@

#include <hpx/parallel/config/inline_namespace.hpp>
#include <hpx/parallel/execution_policy.hpp>
#include <hpx/parallel/algorithms/inclusive_scan.hpp>
#include <hpx/parallel/algorithms/detail/algorithm_result.hpp>
#include <hpx/parallel/algorithms/detail/dispatch.hpp>
#include <hpx/parallel/util/partitioner.hpp>
Expand Down Expand Up @@ -49,55 +50,6 @@ namespace hpx { namespace parallel { HPX_INLINE_NAMESPACE(v1)
return dest;
}

template <typename InIter, typename OutIter, typename T, typename Op>
T sequential_exclusive_scan_n(InIter first, std::size_t count,
OutIter dest, T init, Op && op)
{
for (/**/; count-- != 0; (void) ++first, ++dest)
{
*dest = init;
init = op(init, *first);
}
return init;
}

///////////////////////////////////////////////////////////////////////
template <typename ExPolicy, typename T, typename OutIter, typename Op>
typename detail::algorithm_result<ExPolicy, OutIter>::type
exclusive_scan_helper(ExPolicy const& policy,
std::vector<hpx::shared_future<T> >&& r,
boost::shared_array<T> data, std::size_t count,
OutIter dest, Op && op, std::vector<std::size_t> const& chunk_sizes)
{
typedef hpx::util::zip_iterator<T*, OutIter> zip_iterator;
typedef typename zip_iterator::reference reference;

using hpx::util::make_zip_iterator;
return
util::partitioner<ExPolicy, OutIter, void>::call_with_data(
policy, make_zip_iterator(data.get(), dest), count,
[=](hpx::shared_future<T>&& val,
zip_iterator part_begin, std::size_t part_size)
{
T const& v = val.get();
parallel::util::loop_n(part_begin, part_size,
[&](zip_iterator d)
{
using hpx::util::get;
*get<1>(d.get_iterator_tuple()) =
op(*get<0>(d.get_iterator_tuple()), v);
});
},
[dest, count, data](
std::vector<future<void> > && r) mutable -> OutIter
{
std::advance(dest, count);
return dest;
},
chunk_sizes, std::move(r)
);
}

///////////////////////////////////////////////////////////////////////
template <typename OutIter>
struct exclusive_scan
Expand Down Expand Up @@ -129,15 +81,18 @@ namespace hpx { namespace parallel { HPX_INLINE_NAMESPACE(v1)

typedef typename std::iterator_traits<FwdIter>::difference_type
difference_type;
difference_type count = std::distance(first, last) - 1;

*dest++ = init;
if (count == 0)
return result::get(std::move(dest));

difference_type count = std::distance(first, last);
boost::shared_array<T> data(new T[count]);

// The overall scan algorithm is performed by executing 2
// subsequent parallel steps. The first calculates the scan
// results for each partition and the second produces the
// overall result

using hpx::util::make_zip_iterator;
return
util::scan_partitioner<ExPolicy, OutIter, T>::call(
Expand All @@ -146,9 +101,11 @@ namespace hpx { namespace parallel { HPX_INLINE_NAMESPACE(v1)
[=](zip_iterator part_begin, std::size_t part_size) -> T
{
using hpx::util::get;
return sequential_exclusive_scan_n(
get<0>(part_begin.get_iterator_tuple()), part_size,
get<1>(part_begin.get_iterator_tuple()), init, op);
T part_init = get<0>(*part_begin);
get<1>(*part_begin++) = part_init;
return sequential_inclusive_scan_n(
get<0>(part_begin.get_iterator_tuple()), part_size-1,
get<1>(part_begin.get_iterator_tuple()), part_init, op);
},
// step 2 propagates the partition results from left
// to right
Expand All @@ -163,7 +120,7 @@ namespace hpx { namespace parallel { HPX_INLINE_NAMESPACE(v1)
{
// run the final copy step and produce the required
// result
return exclusive_scan_helper(policy, std::move(r),
return scan_copy_helper(policy, std::move(r),
data, count, dest, op, chunk_sizes);
}
);
Expand Down
44 changes: 24 additions & 20 deletions hpx/parallel/algorithms/find.hpp
Expand Up @@ -49,11 +49,15 @@ namespace hpx { namespace parallel { HPX_INLINE_NAMESPACE(v1)
parallel(ExPolicy const& policy, InIter first, InIter last,
T const& val)
{
typedef detail::algorithm_result<ExPolicy, InIter> result;
typedef typename std::iterator_traits<InIter>::value_type type;
typedef typename std::iterator_traits<InIter>::difference_type
difference_type;

difference_type count = std::distance(first, last);
if (count <= 0)
return result::get(std::move(last));

util::cancellation_token<std::size_t> tok(count);

return util::partitioner<ExPolicy, InIter, void>::
Expand Down Expand Up @@ -178,11 +182,15 @@ namespace hpx { namespace parallel { HPX_INLINE_NAMESPACE(v1)
static typename detail::algorithm_result<ExPolicy, FwdIter>::type
parallel(ExPolicy const& policy, FwdIter first, FwdIter last, F && f)
{
typedef typename std::iterator_traits<FwdIter>::value_type type;
typedef typename std::iterator_traits<FwdIter>::difference_type
typedef detail::algorithm_result<ExPolicy, FwdIter> result;
typedef typename std::iterator_traits<InIter>::value_type type;
typedef typename std::iterator_traits<InIter>::difference_type
difference_type;

difference_type count = std::distance(first, last);
if (count <= 0)
return result::get(std::move(last));

util::cancellation_token<std::size_t> tok(count);

return util::partitioner<ExPolicy, FwdIter, void>::
Expand Down Expand Up @@ -326,11 +334,15 @@ namespace hpx { namespace parallel { HPX_INLINE_NAMESPACE(v1)
static typename detail::algorithm_result<ExPolicy, FwdIter>::type
parallel(ExPolicy const& policy, FwdIter first, FwdIter last, F && f)
{
typedef typename std::iterator_traits<FwdIter>::value_type type;
typedef typename std::iterator_traits<FwdIter>::difference_type
typedef detail::algorithm_result<ExPolicy, FwdIter> result;
typedef typename std::iterator_traits<InIter>::value_type type;
typedef typename std::iterator_traits<InIter>::difference_type
difference_type;

difference_type count = std::distance(first, last);
if (count <= 0)
return result::get(std::move(last));

util::cancellation_token<std::size_t> tok(count);

return util::partitioner<ExPolicy, FwdIter, void>::
Expand Down Expand Up @@ -472,23 +484,18 @@ namespace hpx { namespace parallel { HPX_INLINE_NAMESPACE(v1)
parallel(ExPolicy const& policy, FwdIter first1, FwdIter last1,
FwdIter2 first2, FwdIter2 last2, Pred && op)
{
typedef detail::algorithm_result<ExPolicy, FwdIter> result;
typedef typename std::iterator_traits<FwdIter>::reference reference;
typedef typename std::iterator_traits<FwdIter>::difference_type
difference_type;

difference_type diff = std::distance(first2, last2);
if (diff <= 0)
{
return detail::algorithm_result<ExPolicy, FwdIter>::get(
std::move(last1));
}
return result::get(std::move(last1));

difference_type count = std::distance(first1, last1);
if (diff > count)
{
return detail::algorithm_result<ExPolicy, FwdIter>::get(
std::move(last1));
}
return result::get(std::move(last1));

util::cancellation_token<
difference_type, std::greater<difference_type>
Expand Down Expand Up @@ -764,23 +771,20 @@ namespace hpx { namespace parallel { HPX_INLINE_NAMESPACE(v1)
parallel(ExPolicy const& policy, InIter first, InIter last,
FwdIter s_first, FwdIter s_last, Pred && op)
{
typedef detail::algorithm_result<ExPolicy, InIter> result;
typedef typename std::iterator_traits<InIter>::reference reference;
typedef typename std::iterator_traits<InIter>::difference_type
difference_type;
typedef typename std::iterator_traits<FwdIter>::difference_type
s_difference_type;

s_difference_type diff = std::distance(s_first, s_last);
if(diff <= 0) {
return detail::algorithm_result<ExPolicy, InIter>::get(
std::move(last));
}
if(diff <= 0)
return result::get(std::move(last));

difference_type count = std::distance(first, last);
if(diff > count) {
return detail::algorithm_result<ExPolicy, InIter>::get(
std::move(last));
}
if(diff > count)
return result::get(std::move(last));

util::cancellation_token<difference_type> tok(count);

Expand Down
10 changes: 6 additions & 4 deletions hpx/parallel/algorithms/inclusive_scan.hpp
Expand Up @@ -64,7 +64,7 @@ namespace hpx { namespace parallel { HPX_INLINE_NAMESPACE(v1)
///////////////////////////////////////////////////////////////////////
template <typename ExPolicy, typename T, typename OutIter, typename Op>
typename detail::algorithm_result<ExPolicy, OutIter>::type
inclusive_scan_helper(ExPolicy const& policy,
scan_copy_helper(ExPolicy const& policy,
std::vector<hpx::shared_future<T> >&& r,
boost::shared_array<T> data, std::size_t count,
OutIter dest, Op && op, std::vector<std::size_t> const& chunk_sizes)
Expand Down Expand Up @@ -145,9 +145,11 @@ namespace hpx { namespace parallel { HPX_INLINE_NAMESPACE(v1)
[=](zip_iterator part_begin, std::size_t part_size) -> T
{
using hpx::util::get;
T part_init = get<0>(*part_begin);
get<1>(*part_begin++) = part_init;
return sequential_inclusive_scan_n(
get<0>(part_begin.get_iterator_tuple()), part_size,
get<1>(part_begin.get_iterator_tuple()), init, op);
get<0>(part_begin.get_iterator_tuple()), part_size-1,
get<1>(part_begin.get_iterator_tuple()), part_init, op);
},
// step 2 propagates the partition results from left
// to right
Expand All @@ -162,7 +164,7 @@ namespace hpx { namespace parallel { HPX_INLINE_NAMESPACE(v1)
{
// run the final copy step and produce the required
// result
return inclusive_scan_helper(policy, std::move(r),
return scan_copy_helper(policy, std::move(r),
data, count, dest, op, chunk_sizes);
}
);
Expand Down
34 changes: 14 additions & 20 deletions hpx/parallel/algorithms/transform_exclusive_scan.hpp
Expand Up @@ -15,7 +15,7 @@
#include <hpx/parallel/execution_policy.hpp>
#include <hpx/parallel/algorithms/detail/algorithm_result.hpp>
#include <hpx/parallel/algorithms/detail/dispatch.hpp>
#include <hpx/parallel/algorithms/exclusive_scan.hpp>
#include <hpx/parallel/algorithms/transform_inclusive_scan.hpp>
#include <hpx/parallel/util/partitioner.hpp>
#include <hpx/parallel/util/scan_partitioner.hpp>
#include <hpx/parallel/util/loop.hpp>
Expand Down Expand Up @@ -50,19 +50,6 @@ namespace hpx { namespace parallel { HPX_INLINE_NAMESPACE(v1)
return dest;
}

template <typename InIter, typename OutIter, typename Conv, typename T,
typename Op>
T sequential_transform_exclusive_scan_n(InIter first, std::size_t count,
OutIter dest, Conv && conv, T init, Op && op)
{
for (/**/; count-- != 0; (void) ++first, ++dest)
{
*dest = init;
init = op(init, conv(*first));
}
return init;
}

///////////////////////////////////////////////////////////////////////
template <typename OutIter>
struct transform_exclusive_scan
Expand Down Expand Up @@ -97,7 +84,12 @@ namespace hpx { namespace parallel { HPX_INLINE_NAMESPACE(v1)

typedef typename std::iterator_traits<FwdIter>::difference_type
difference_type;
difference_type count = std::distance(first, last);
difference_type count = std::distance(first, last) - 1;

*dest++ = init;
if (count == 0)
return result::get(std::move(dest));

boost::shared_array<T> data(new T[count]);

// The overall scan algorithm is performed by executing 2
Expand All @@ -113,10 +105,12 @@ namespace hpx { namespace parallel { HPX_INLINE_NAMESPACE(v1)
[=](zip_iterator part_begin, std::size_t part_size) -> T
{
using hpx::util::get;
return sequential_transform_exclusive_scan_n(
get<0>(part_begin.get_iterator_tuple()), part_size,
get<1>(part_begin.get_iterator_tuple()),
conv, init, op);
T part_init = conv(get<0>(*part_begin));
get<1>(*part_begin++) = part_init;
return sequential_transform_inclusive_scan_n(
get<0>(part_begin.get_iterator_tuple()), part_size-1,
get<1>(part_begin.get_iterator_tuple()), conv,
part_init, op);
},
// step 2 propagates the partition results from left
// to right
Expand All @@ -132,7 +126,7 @@ namespace hpx { namespace parallel { HPX_INLINE_NAMESPACE(v1)
{
// run the final copy step and produce the required
// result
return exclusive_scan_helper(policy, std::move(r),
return scan_copy_helper(policy, std::move(r),
data, count, dest, op, chunk_sizes);
}
);
Expand Down
14 changes: 8 additions & 6 deletions hpx/parallel/algorithms/transform_inclusive_scan.hpp
Expand Up @@ -92,12 +92,12 @@ namespace hpx { namespace parallel { HPX_INLINE_NAMESPACE(v1)
{
typedef detail::algorithm_result<ExPolicy, OutIter> result;
typedef hpx::util::zip_iterator<FwdIter, T*> zip_iterator;
typedef typename std::iterator_traits<FwdIter>::difference_type
difference_type;

if (first == last)
return result::get(std::move(dest));

typedef typename std::iterator_traits<FwdIter>::difference_type
difference_type;
difference_type count = std::distance(first, last);
boost::shared_array<T> data(new T[count]);

Expand All @@ -114,10 +114,12 @@ namespace hpx { namespace parallel { HPX_INLINE_NAMESPACE(v1)
[=](zip_iterator part_begin, std::size_t part_size) -> T
{
using hpx::util::get;
T part_init = conv(get<0>(*part_begin));
get<1>(*part_begin++) = part_init;
return sequential_transform_inclusive_scan_n(
get<0>(part_begin.get_iterator_tuple()), part_size,
get<1>(part_begin.get_iterator_tuple()),
conv, init, op);
get<0>(part_begin.get_iterator_tuple()), part_size-1,
get<1>(part_begin.get_iterator_tuple()), conv,
part_init, op);
},
// step 2 propagates the partition results from left
// to right
Expand All @@ -133,7 +135,7 @@ namespace hpx { namespace parallel { HPX_INLINE_NAMESPACE(v1)
{
// run the final copy step and produce the required
// result
return inclusive_scan_helper(policy, std::move(r),
return scan_copy_helper(policy, std::move(r),
data, count, dest, op, chunk_sizes);
}
);
Expand Down

0 comments on commit 3ae7b4c

Please sign in to comment.