From 334a80d10187582ede3eaebb36efc0c850ba2716 Mon Sep 17 00:00:00 2001 From: Maurizio Drocco Date: Wed, 30 Jan 2019 14:41:24 -0800 Subject: [PATCH 01/19] [pnnl/SHAD#152] local-parallel min/max algorithms --- include/shad/core/impl/minimum_maximum_ops.h | 118 +++++++++++++------ test/unit_tests/core/CMakeLists.txt | 4 +- 2 files changed, 86 insertions(+), 36 deletions(-) diff --git a/include/shad/core/impl/minimum_maximum_ops.h b/include/shad/core/impl/minimum_maximum_ops.h index a6bc0bc1..555993d9 100755 --- a/include/shad/core/impl/minimum_maximum_ops.h +++ b/include/shad/core/impl/minimum_maximum_ops.h @@ -36,6 +36,8 @@ #include "shad/distributed_iterator_traits.h" #include "shad/runtime/runtime.h" +#include "impl_patterns.h" + namespace shad { namespace impl { @@ -80,7 +82,6 @@ ForwardIt max_element(distributed_sequential_tag&& policy, ForwardIt first, return res_it->first; } -// todo drop DefaultConstructible requirement template ForwardIt max_element(distributed_parallel_tag&& policy, ForwardIt first, ForwardIt last, Compare comp) { @@ -100,17 +101,31 @@ ForwardIt max_element(distributed_parallel_tag&& policy, ForwardIt first, h, locality, [](rt::Handle&, const std::tuple& args, res_t* result) { + using local_iterator_t = typename itr_traits::local_iterator_type; auto gbegin = std::get<0>(args); auto gend = std::get<1>(args); - auto local_range = itr_traits::local_range(gbegin, gend); - auto begin = local_range.begin(); - auto end = local_range.end(); - auto lmax = std::max_element(begin, end, std::get<2>(args)); + auto op = std::get<2>(args); + + // map + auto nil_val = itr_traits::local_range(gbegin, gend).end(); + auto map_res = local_map( + gbegin, gend, + // map kernel + [&](local_iterator_t begin, local_iterator_t end) { + return std::max_element(begin, end, op); + }, + nil_val); + + // reduce + auto lmax = *std::max_element( + map_res.begin(), map_res.end(), + [&](const local_iterator_t& x, const local_iterator_t& y) { + return y != nil_val && op(*x, *y); + }); ForwardIt gres = itr_traits::iterator_from_local(gbegin, gend, lmax); - if (gres != gend) - *result = std::make_pair(gres, *lmax); - else - *result = std::make_pair(gres, value_t{}); + + // local solution + *result = std::make_pair(gres, lmax != nil_val ? *lmax : value_t{}); }, args, &res[i]); } @@ -162,7 +177,6 @@ ForwardIt min_element(distributed_sequential_tag&& policy, ForwardIt first, return res_it->first; } -// todo drop DefaultConstructible requirement template ForwardIt min_element(distributed_parallel_tag&& policy, ForwardIt first, ForwardIt last, Compare comp) { @@ -182,17 +196,31 @@ ForwardIt min_element(distributed_parallel_tag&& policy, ForwardIt first, h, locality, [](rt::Handle&, const std::tuple& args, res_t* result) { + using local_iterator_t = typename itr_traits::local_iterator_type; auto gbegin = std::get<0>(args); auto gend = std::get<1>(args); - auto local_range = itr_traits::local_range(gbegin, gend); - auto begin = local_range.begin(); - auto end = local_range.end(); - auto lmin = std::min_element(begin, end, std::get<2>(args)); + auto op = std::get<2>(args); + + // map + auto nil_val = itr_traits::local_range(gbegin, gend).end(); + auto map_res = local_map( + gbegin, gend, + // map kernel + [&](local_iterator_t begin, local_iterator_t end) { + return std::min_element(begin, end, op); + }, + nil_val); + + // reduce + auto lmin = *std::min_element( + map_res.begin(), map_res.end(), + [&](const local_iterator_t& x, const local_iterator_t& y) { + return x != nil_val && op(*x, *y); + }); ForwardIt gres = itr_traits::iterator_from_local(gbegin, gend, lmin); - if (gres != gend) - *result = std::make_pair(gres, *lmin); - else - *result = std::make_pair(gres, value_t{}); + + // local solution + *result = std::make_pair(gres, lmin != nil_val ? *lmin : value_t{}); }, args, &res[i]); } @@ -280,24 +308,46 @@ std::pair minmax_element( h, locality, [](rt::Handle&, const std::tuple& args, res_t* result) { + using local_iterator_t = typename itr_traits::local_iterator_type; auto gbegin = std::get<0>(args); auto gend = std::get<1>(args); - auto local_range = itr_traits::local_range(gbegin, gend); - auto begin = local_range.begin(); - auto end = local_range.end(); - auto lminmax = std::minmax_element(begin, end, std::get<2>(args)); - ForwardIt minit = - itr_traits::iterator_from_local(gbegin, gend, lminmax.first); - ForwardIt maxit = - itr_traits::iterator_from_local(gbegin, gend, lminmax.second); - if (minit != gend) { - *result = std::make_pair( - std::make_pair(*(lminmax.first), *(lminmax.second)), - std::make_pair(minit, maxit)); - } else { - *result = std::make_pair(std::make_pair(value_t{}, value_t{}), - std::make_pair(minit, maxit)); - } + auto op = std::get<2>(args); + + // map + auto nil_val = itr_traits::local_range(gbegin, gend).end(); + auto map_res = local_map( + gbegin, gend, + // map kernel + [&](local_iterator_t begin, local_iterator_t end) { + return std::minmax_element(begin, end, op); + }, + std::make_pair(nil_val, nil_val)); + + // reduce + auto map_res_min = + std::min_element( + map_res.begin(), map_res.end(), + [&](const std::pair& x, + const std::pair& y) { + return x.first != nil_val && op(*x.first, *y.first); + }) + ->first; + auto map_res_max = + std::max_element( + map_res.begin(), map_res.end(), + [&](const std::pair& x, + const std::pair& y) { + return y.second != nil_val && op(*x.second, *y.second); + }) + ->second; + + // local solution + *result = std::make_pair( + std::make_pair(map_res_min != nil_val ? *map_res_min : value_t{}, + map_res_max != nil_val ? *map_res_max : value_t{}), + std::make_pair( + itr_traits::iterator_from_local(gbegin, gend, map_res_min), + itr_traits::iterator_from_local(gbegin, gend, map_res_max))); }, args, &res[i]); } diff --git a/test/unit_tests/core/CMakeLists.txt b/test/unit_tests/core/CMakeLists.txt index 2bf2aa8c..af12cb18 100755 --- a/test/unit_tests/core/CMakeLists.txt +++ b/test/unit_tests/core/CMakeLists.txt @@ -4,8 +4,8 @@ set(tests shad_array_test unordered_set_test unordered_map_test - std_algorithm_test -# std_numeric_test +# std_algorithm_test +# std_numeric_test shad_algorithm_test ) From d8eb472c49ab362570e2c87fd168cb12d6ba54c4 Mon Sep 17 00:00:00 2001 From: Maurizio Drocco Date: Wed, 30 Jan 2019 14:56:38 -0800 Subject: [PATCH 02/19] [pnnl/SHAD#152] missing header --- include/shad/core/impl/impl_patterns.h | 97 ++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100755 include/shad/core/impl/impl_patterns.h diff --git a/include/shad/core/impl/impl_patterns.h b/include/shad/core/impl/impl_patterns.h new file mode 100755 index 00000000..2dc4c88d --- /dev/null +++ b/include/shad/core/impl/impl_patterns.h @@ -0,0 +1,97 @@ +//===------------------------------------------------------------*- C++ -*-===// +// +// SHAD +// +// The Scalable High-performance Algorithms and Data Structure Library +// +//===----------------------------------------------------------------------===// +// +// Copyright 2018 Battelle Memorial Institute +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy +// of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. +// +//===----------------------------------------------------------------------===// + +#ifndef INCLUDE_SHAD_CORE_IMPL_IMPL_PATTERNS_H +#define INCLUDE_SHAD_CORE_IMPL_IMPL_PATTERNS_H + +#include + +#include "shad/distributed_iterator_traits.h" +#include "shad/runtime/runtime.h" + +namespace shad { +namespace impl { + +template +struct mapped_t { + using itr_traits = distributed_iterator_traits; + using local_iterator_t = typename itr_traits::local_iterator_type; + using type = typename std::result_of::type; +}; + +template +auto local_map(ForwardIt gbegin, ForwardIt gend, MapF&& map_kernel, + const typename mapped_t::type& init) { + using itr_traits = distributed_iterator_traits; + auto local_range = itr_traits::local_range(gbegin, gend); + auto begin = local_range.begin(); + auto end = local_range.end(); + + // allocate partial results + using mapped_t = typename mapped_t::type; + auto range_len = std::distance(begin, end); + auto max_n_blocks = rt::impl::getConcurrency(); + auto block_size = (range_len + max_n_blocks - 1) / max_n_blocks; + std::vector map_res(max_n_blocks, init); + + rt::Handle map_h; + for (size_t block_id = 0, first = 0; + block_id < max_n_blocks && first < range_len; + ++block_id, first += block_size) { + auto map_args = std::make_tuple(block_id, block_size, begin, end, + map_kernel, &map_res[block_id]); + rt::asyncExecuteAt( + map_h, rt::thisLocality(), + [](rt::Handle&, const typeof(map_args)& map_args) { + size_t block_id = std::get<0>(map_args); + size_t block_size = std::get<1>(map_args); + auto begin = std::get<2>(map_args); + auto end = std::get<3>(map_args); + auto map_kernel = std::get<4>(map_args); + auto res_unit = std::get<5>(map_args); + // iteration-block boundaries + auto block_begin = begin; + std::advance(block_begin, block_id * block_size); + auto block_end = block_begin; + if (std::distance(block_begin, end) < block_size) + block_end = end; + else + std::advance(block_end, block_size); + // map over the block + auto m = map_kernel(block_begin, block_end); + *res_unit = m; + }, + map_args); + } + rt::waitForCompletion(map_h); + + // reduce + return map_res; +} + +} // namespace impl +} // namespace shad + +#endif /* INCLUDE_SHAD_CORE_IMPL_IMPL_PATTERNS_H */ From 86285630d68bcf1a6b7a0f804d24e23293dc788c Mon Sep 17 00:00:00 2001 From: Maurizio Drocco Date: Fri, 1 Feb 2019 10:15:02 -0800 Subject: [PATCH 03/19] [pnnl/SHAD#152] pattern-based min/max --- include/shad/core/impl/impl_patterns.h | 193 +++++-- include/shad/core/impl/minimum_maximum_ops.h | 529 +++++++++---------- 2 files changed, 405 insertions(+), 317 deletions(-) diff --git a/include/shad/core/impl/impl_patterns.h b/include/shad/core/impl/impl_patterns.h index 2dc4c88d..8c06c6d5 100755 --- a/include/shad/core/impl/impl_patterns.h +++ b/include/shad/core/impl/impl_patterns.h @@ -25,7 +25,11 @@ #ifndef INCLUDE_SHAD_CORE_IMPL_IMPL_PATTERNS_H #define INCLUDE_SHAD_CORE_IMPL_IMPL_PATTERNS_H +#include #include +#include +#include +#include #include "shad/distributed_iterator_traits.h" #include "shad/runtime/runtime.h" @@ -33,61 +37,156 @@ namespace shad { namespace impl { -template -struct mapped_t { - using itr_traits = distributed_iterator_traits; - using local_iterator_t = typename itr_traits::local_iterator_type; - using type = typename std::result_of::type; +#include +#include +#include +#include + +template +struct Apply { + template + static inline auto apply(F&& f, T&& t, A&&... a) { + return Apply::apply(::std::forward(f), ::std::forward(t), + ::std::get(::std::forward(t)), + ::std::forward(a)...); + } }; +template +struct Apply { + template + static inline auto apply(F&& f, T&&, A&&... a) { + return ::std::forward(f)(::std::forward(a)...); + } +}; + +template +inline auto apply_from(F&& f, T&& t) { + return Apply >::value>::apply( + ::std::forward(f), ::std::forward(t)); +} + +//////////////////////////////////////////////////////////////////////////////// +// +// distributed_folding_map applies map_kernel sequentially to each local +// portion, forwarding the solution from portion i to portion i + 1. +// +//////////////////////////////////////////////////////////////////////////////// +template +auto distributed_folding_map(ForwardIt first, ForwardIt last, MapF&& map_kernel, + const S& init_sol, Args&&... args) { + using itr_traits = distributed_iterator_traits; + auto localities = itr_traits::localities(first, last); + auto res = init_sol; + for (auto locality = localities.begin(), end = localities.end(); + locality != end; ++locality) { + auto d_args = std::make_tuple(map_kernel, first, last, res, args...); + rt::executeAtWithRet( + locality, + [](const typeof(d_args)& d_args, S* result) { + *result = apply_from<1>(::std::get<0>(d_args), + ::std::forward(d_args)); + }, + d_args, &res); + } + return res; +} + +//////////////////////////////////////////////////////////////////////////////// +// +// local_folding_map applies map_kernel over the local portion. +// +//////////////////////////////////////////////////////////////////////////////// template -auto local_map(ForwardIt gbegin, ForwardIt gend, MapF&& map_kernel, - const typename mapped_t::type& init) { +auto local_folding_map(ForwardIt first, ForwardIt last, MapF&& map_kernel) { + return map_kernel(first, last); +} + +//////////////////////////////////////////////////////////////////////////////// +// +// distributed_map applies map_kernel to each local portion and returns an +// iterable collection of partial results (one for each locality that owns a +// portion of the input range). +// +// The return type of map_kernel must be DefaultConstructible. +// +//////////////////////////////////////////////////////////////////////////////// +template +auto distributed_map(ForwardIt first, ForwardIt last, MapF&& map_kernel, + Args&&... args) { using itr_traits = distributed_iterator_traits; - auto local_range = itr_traits::local_range(gbegin, gend); - auto begin = local_range.begin(); - auto end = local_range.end(); + auto localities = itr_traits::localities(first, last); + using mapped_t = typename std::result_of::type; + size_t i = 0; + rt::Handle h; + auto d_args = std::make_tuple(map_kernel, first, last, args...); + std::vector res(localities.size()); + for (auto locality = localities.begin(), end = localities.end(); + locality != end; ++locality, ++i) { + rt::asyncExecuteAtWithRet( + h, locality, + [](rt::Handle&, const typeof(d_args)& d_args, mapped_t* result) { + *result = apply_from<1>(::std::get<0>(d_args), + ::std::forward(d_args)); + }, + d_args, &res[i]); + } + rt::waitForCompletion(h); + return res; +} + +//////////////////////////////////////////////////////////////////////////////// +// +// local_map applies map_kernel over a partitioning of a local portion and +// returns an iterable collection of partial results. +// +// The return type of map_kernel must be DefaultConstructible. +// +//////////////////////////////////////////////////////////////////////////////// +template +auto local_map(ForwardIt first, ForwardIt last, MapF&& map_kernel) { + using mapped_t = + typename std::result_of::type; // allocate partial results - using mapped_t = typename mapped_t::type; - auto range_len = std::distance(begin, end); - auto max_n_blocks = rt::impl::getConcurrency(); - auto block_size = (range_len + max_n_blocks - 1) / max_n_blocks; - std::vector map_res(max_n_blocks, init); - - rt::Handle map_h; - for (size_t block_id = 0, first = 0; - block_id < max_n_blocks && first < range_len; - ++block_id, first += block_size) { - auto map_args = std::make_tuple(block_id, block_size, begin, end, - map_kernel, &map_res[block_id]); - rt::asyncExecuteAt( - map_h, rt::thisLocality(), - [](rt::Handle&, const typeof(map_args)& map_args) { - size_t block_id = std::get<0>(map_args); - size_t block_size = std::get<1>(map_args); - auto begin = std::get<2>(map_args); - auto end = std::get<3>(map_args); - auto map_kernel = std::get<4>(map_args); - auto res_unit = std::get<5>(map_args); - // iteration-block boundaries - auto block_begin = begin; - std::advance(block_begin, block_id * block_size); - auto block_end = block_begin; - if (std::distance(block_begin, end) < block_size) - block_end = end; - else - std::advance(block_end, block_size); - // map over the block - auto m = map_kernel(block_begin, block_end); - *res_unit = m; - }, - map_args); + auto range_len = std::distance(first, last); + auto n_blocks = std::min(rt::impl::getConcurrency(), (size_t)range_len); + std::vector map_res(n_blocks); + + if (n_blocks) { + auto block_size = (range_len + n_blocks - 1) / n_blocks; + + rt::Handle map_h; + for (size_t block_id = 0; block_id < n_blocks; ++block_id) { + auto map_args = std::make_tuple(block_id, block_size, first, last, + map_kernel, &map_res[block_id]); + rt::asyncExecuteAt( + map_h, rt::thisLocality(), + [](rt::Handle&, const typeof(map_args)& map_args) { + size_t block_id = std::get<0>(map_args); + size_t block_size = std::get<1>(map_args); + auto begin = std::get<2>(map_args); + auto end = std::get<3>(map_args); + auto map_kernel = std::get<4>(map_args); + auto res_unit = std::get<5>(map_args); + // iteration-block boundaries + auto block_begin = begin; + std::advance(block_begin, block_id * block_size); + auto block_end = block_begin; + if (std::distance(block_begin, end) < block_size) + block_end = end; + else + std::advance(block_end, block_size); + // map over the block + auto m = map_kernel(block_begin, block_end); + *res_unit = m; + }, + map_args); + } + rt::waitForCompletion(map_h); } - rt::waitForCompletion(map_h); - // reduce return map_res; } diff --git a/include/shad/core/impl/minimum_maximum_ops.h b/include/shad/core/impl/minimum_maximum_ops.h index 555993d9..f77af77f 100755 --- a/include/shad/core/impl/minimum_maximum_ops.h +++ b/include/shad/core/impl/minimum_maximum_ops.h @@ -41,326 +41,315 @@ namespace shad { namespace impl { -// todo drop DefaultConstructible requirement +// contract: template ForwardIt max_element(distributed_sequential_tag&& policy, ForwardIt first, ForwardIt last, Compare comp) { - if (first == last) return last; using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - using value_t = - typename std::remove_const::type; - using res_t = std::pair; - auto args = std::make_tuple(first, last, comp); - std::vector res(localities.size()); - size_t i = 0; - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality, ++i) { - rt::executeAtWithRet( - locality, - [](const std::tuple& args, - res_t* result) { - auto gbegin = std::get<0>(args); - auto gend = std::get<1>(args); - auto local_range = itr_traits::local_range(gbegin, gend); - auto begin = local_range.begin(); - auto end = local_range.end(); - auto lmax = std::max_element(begin, end, std::get<2>(args)); - ForwardIt gres = itr_traits::iterator_from_local(gbegin, gend, lmax); - if (gres != gend) - *result = std::make_pair(gres, *lmax); - else - *result = std::make_pair(gres, value_t{}); - }, - args, &res[i]); - } + using value_t = typename itr_traits::value_type; + static_assert(std::is_default_constructible::value); - auto res_it = std::max_element( - res.begin(), res.end(), [&](const res_t& x, const res_t& y) { - return y.first != last && comp(x.second, y.second); - }); - return res_it->first; + if (first == last) return last; + + auto map_res = distributed_folding_map( + // range + first, last, + // kernel + [](ForwardIt first, ForwardIt last, + const std::pair& partial_solution, Compare comp) { + using local_iterator_t = typename itr_traits::local_iterator_type; + + // local processing + auto lrange = itr_traits::local_range(first, last); + auto lmax = std::max_element(lrange.begin(), lrange.end(), comp); + + // update the partial solution + auto nil_val = itr_traits::local_range(first, last).end(); + if (lmax != nil_val && (partial_solution.first == last || + comp(partial_solution.second, *lmax))) { + auto gmax = itr_traits::iterator_from_local(first, last, lmax); + return std::make_pair(gmax, *lmax); + } + return partial_solution; + }, + // initial solution + std::make_pair(last, value_t{}), + // map arguments + comp); + + return map_res.first; } template ForwardIt max_element(distributed_parallel_tag&& policy, ForwardIt first, ForwardIt last, Compare comp) { - if (first == last) return last; using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - using value_t = - typename std::remove_const::type; - using res_t = std::pair; - std::vector res(localities.size()); - size_t i = 0; - rt::Handle h; - auto args = std::make_tuple(first, last, comp); - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality, ++i) { - rt::asyncExecuteAtWithRet( - h, locality, - [](rt::Handle&, const std::tuple& args, - res_t* result) { - using local_iterator_t = typename itr_traits::local_iterator_type; - auto gbegin = std::get<0>(args); - auto gend = std::get<1>(args); - auto op = std::get<2>(args); - - // map - auto nil_val = itr_traits::local_range(gbegin, gend).end(); - auto map_res = local_map( - gbegin, gend, - // map kernel - [&](local_iterator_t begin, local_iterator_t end) { - return std::max_element(begin, end, op); - }, - nil_val); - - // reduce - auto lmax = *std::max_element( - map_res.begin(), map_res.end(), - [&](const local_iterator_t& x, const local_iterator_t& y) { - return y != nil_val && op(*x, *y); - }); - ForwardIt gres = itr_traits::iterator_from_local(gbegin, gend, lmax); - - // local solution - *result = std::make_pair(gres, lmax != nil_val ? *lmax : value_t{}); - }, - args, &res[i]); - } - rt::waitForCompletion(h); + using value_t = typename itr_traits::value_type; + static_assert(std::is_default_constructible::value); + + if (first == last) return last; + + // distributed map + auto map_res = distributed_map( + // range + first, last, + // kernel + [](ForwardIt first, ForwardIt last, Compare comp) { + using local_iterator_t = typename itr_traits::local_iterator_type; + + // local map + auto lrange = itr_traits::local_range(first, last); + auto map_res = local_map( + // range + lrange.begin(), lrange.end(), + // kernel + [&](local_iterator_t b, local_iterator_t e) { + return std::max_element(b, e, comp); + }); + + // local reduce + auto nil_val = itr_traits::local_range(first, last).end(); + auto lmax_it = std::max_element( + map_res.begin(), map_res.end(), + [&](const local_iterator_t& x, const local_iterator_t& y) { + return y != nil_val && comp(*x, *y); + }); + + // local solution + auto lmax = lmax_it != map_res.end() ? *lmax_it : nil_val; + ForwardIt gres = itr_traits::iterator_from_local(first, last, lmax); + return std::make_pair(gres, lmax != nil_val ? *lmax : value_t{}); + }, + // map arguments + comp); + + // reduce + using map_res_t = typeof(map_res); + using res_t = typename map_res_t::value_type; auto res_it = std::max_element( - res.begin(), res.end(), [&](const res_t& x, const res_t& y) { + map_res.begin(), map_res.end(), [&](const res_t& x, const res_t& y) { return y.first != last && comp(x.second, y.second); }); return res_it->first; } -// todo drop DefaultConstructible requirement template ForwardIt min_element(distributed_sequential_tag&& policy, ForwardIt first, ForwardIt last, Compare comp) { - if (first == last) return last; using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - using value_t = - typename std::remove_const::type; - using res_t = std::pair; - auto args = std::make_tuple(first, last, comp); - std::vector res(localities.size()); - size_t i = 0; - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality, ++i) { - rt::executeAtWithRet( - locality, - [](const std::tuple& args, - res_t* result) { - auto gbegin = std::get<0>(args); - auto gend = std::get<1>(args); - auto local_range = itr_traits::local_range(gbegin, gend); - auto begin = local_range.begin(); - auto end = local_range.end(); - auto lmin = std::min_element(begin, end, std::get<2>(args)); - ForwardIt gres = itr_traits::iterator_from_local(gbegin, gend, lmin); - if (gres != gend) - *result = std::make_pair(gres, *lmin); - else - *result = std::make_pair(gres, value_t{}); - }, - args, &res[i]); - } - auto res_it = std::min_element( - res.begin(), res.end(), [&](const res_t& x, const res_t& y) { - return x.first != last && comp(x.second, y.second); - }); - return res_it->first; + using value_t = typename itr_traits::value_type; + static_assert(std::is_default_constructible::value); + + if (first == last) return last; + + auto map_res = distributed_folding_map( + // range + first, last, + // kernel + [](ForwardIt first, ForwardIt last, + const std::pair& partial_solution, Compare comp) { + using local_iterator_t = typename itr_traits::local_iterator_type; + + // local processing + auto lrange = itr_traits::local_range(first, last); + auto lmin = std::min_element(lrange.begin(), lrange.end(), comp); + + // update the partial solution + auto nil_val = itr_traits::local_range(first, last).end(); + if (lmin != nil_val && (partial_solution.first == last || + comp(*lmin, partial_solution.second))) { + auto gmin = itr_traits::iterator_from_local(first, last, lmin); + return std::make_pair(gmin, *lmin); + } + return partial_solution; + }, + // initial solution + std::make_pair(last, value_t{}), + // map arguments + comp); + + return map_res.first; } template ForwardIt min_element(distributed_parallel_tag&& policy, ForwardIt first, ForwardIt last, Compare comp) { - if (first == last) return last; using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - using value_t = - typename std::remove_const::type; - using res_t = std::pair; - size_t i = 0; - rt::Handle h; - auto args = std::make_tuple(first, last, comp); - std::vector res(localities.size()); - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality, ++i) { - rt::asyncExecuteAtWithRet( - h, locality, - [](rt::Handle&, const std::tuple& args, - res_t* result) { - using local_iterator_t = typename itr_traits::local_iterator_type; - auto gbegin = std::get<0>(args); - auto gend = std::get<1>(args); - auto op = std::get<2>(args); - - // map - auto nil_val = itr_traits::local_range(gbegin, gend).end(); - auto map_res = local_map( - gbegin, gend, - // map kernel - [&](local_iterator_t begin, local_iterator_t end) { - return std::min_element(begin, end, op); - }, - nil_val); - - // reduce - auto lmin = *std::min_element( - map_res.begin(), map_res.end(), - [&](const local_iterator_t& x, const local_iterator_t& y) { - return x != nil_val && op(*x, *y); - }); - ForwardIt gres = itr_traits::iterator_from_local(gbegin, gend, lmin); - - // local solution - *result = std::make_pair(gres, lmin != nil_val ? *lmin : value_t{}); - }, - args, &res[i]); - } - rt::waitForCompletion(h); + using value_t = typename itr_traits::value_type; + static_assert(std::is_default_constructible::value); + + if (first == last) return last; + + // distributed map + auto map_res = distributed_map( + // range + first, last, + // kernel + [](ForwardIt first, ForwardIt last, Compare comp) { + using local_iterator_t = typename itr_traits::local_iterator_type; + + // local map + auto lrange = itr_traits::local_range(first, last); + auto map_res = local_map( + // range + lrange.begin(), lrange.end(), + // kernel + [&](local_iterator_t b, local_iterator_t e) { + return std::min_element(b, e, comp); + }); + + // local reduce + auto nil_val = itr_traits::local_range(first, last).end(); + auto lmin_it = std::min_element( + map_res.begin(), map_res.end(), + [&](const local_iterator_t& x, const local_iterator_t& y) { + return x != nil_val && comp(*x, *y); + }); + + // local solution + auto lmin = lmin_it != map_res.end() ? *lmin_it : nil_val; + ForwardIt gres = itr_traits::iterator_from_local(first, last, lmin); + return std::make_pair(gres, lmin != nil_val ? *lmin : value_t{}); + }, + // map arguments + comp); + + // reduce + using map_res_t = typeof(map_res); + using res_t = typename map_res_t::value_type; auto res_it = std::min_element( - res.begin(), res.end(), [&](const res_t& x, const res_t& y) { + map_res.begin(), map_res.end(), [&](const res_t& x, const res_t& y) { return x.first != last && comp(x.second, y.second); }); return res_it->first; } -// todo drop DefaultConstructible requirement template std::pair minmax_element( distributed_sequential_tag&& policy, ForwardIt first, ForwardIt last, Compare comp) { - if (first == last) return std::make_pair(last, last); using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - using value_t = - typename std::remove_const::type; - using res_t = - std::pair, std::pair>; - auto args = std::make_tuple(first, last, comp); - std::vector res(localities.size()); - size_t i = 0; - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality, ++i) { - rt::executeAtWithRet( - locality, - [](const std::tuple& args, - res_t* result) { - auto gbegin = std::get<0>(args); - auto gend = std::get<1>(args); - auto local_range = itr_traits::local_range(gbegin, gend); - auto begin = local_range.begin(); - auto end = local_range.end(); - auto lminmax = std::minmax_element(begin, end, std::get<2>(args)); - ForwardIt minit = - itr_traits::iterator_from_local(gbegin, gend, lminmax.first); - ForwardIt maxit = - itr_traits::iterator_from_local(gbegin, gend, lminmax.second); - - if (minit != gend) { - *result = std::make_pair( - std::make_pair(*(lminmax.first), *(lminmax.second)), - std::make_pair(minit, maxit)); - } else { - *result = std::make_pair(std::make_pair(value_t{}, value_t{}), - std::make_pair(minit, maxit)); - } - }, - args, &res[i]); - } - auto res_min = std::min_element( - res.begin(), res.end(), [&](const res_t& x, const res_t& y) { - return x.second.first != last && comp(x.first.first, y.first.first); - }); - auto res_max = std::max_element( - res.begin(), res.end(), [&](const res_t& x, const res_t& y) { - return y.second.first != last && comp(x.first.second, y.first.second); - }); - return std::make_pair(res_min->second.first, res_max->second.second); + using value_t = typename itr_traits::value_type; + static_assert(std::is_default_constructible::value); + + struct sol_t { + ForwardIt min, max; + value_t min_val, max_val; + }; + + if (first == last) return std::make_pair(last, last); + + auto map_res = distributed_folding_map( + // range + first, last, + // kernel + [](ForwardIt first, ForwardIt last, const sol_t& partial_solution, + Compare comp) { + using local_iterator_t = typename itr_traits::local_iterator_type; + + // local processing + auto lrange = itr_traits::local_range(first, last); + auto lminmax = std::minmax_element(lrange.begin(), lrange.end(), comp); + + // update the partial solution + auto nil_val = itr_traits::local_range(first, last).end(); + auto res = partial_solution; + if (lminmax.first != nil_val && + (partial_solution.min == last || + comp(*lminmax.first, partial_solution.min_val))) { + auto gmin = + itr_traits::iterator_from_local(first, last, lminmax.first); + res.min = gmin; + res.min_val = *lminmax.first; + } + if (lminmax.second != nil_val && + (partial_solution.max == last || + comp(*lminmax.second, partial_solution.max_val))) { + auto gmax = + itr_traits::iterator_from_local(first, last, lminmax.second); + res.max = gmax; + res.max_val = *lminmax.second; + } + return res; + }, + // initial solution + sol_t{last, last, value_t{}, value_t{}}, + // map arguments + comp); + + return std::make_pair(map_res.min, map_res.max); } -// todo drop DefaultConstructible requirement template std::pair minmax_element( distributed_parallel_tag&& policy, ForwardIt first, ForwardIt last, Compare comp) { if (first == last) return std::make_pair(last, last); using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - using value_t = - typename std::remove_const::type; - using res_t = - std::pair, std::pair>; - std::vector res(localities.size()); - size_t i = 0; - rt::Handle h; - auto args = std::make_tuple(first, last, comp); - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality, ++i) { - rt::asyncExecuteAtWithRet( - h, locality, - [](rt::Handle&, const std::tuple& args, - res_t* result) { - using local_iterator_t = typename itr_traits::local_iterator_type; - auto gbegin = std::get<0>(args); - auto gend = std::get<1>(args); - auto op = std::get<2>(args); - - // map - auto nil_val = itr_traits::local_range(gbegin, gend).end(); - auto map_res = local_map( - gbegin, gend, - // map kernel - [&](local_iterator_t begin, local_iterator_t end) { - return std::minmax_element(begin, end, op); - }, - std::make_pair(nil_val, nil_val)); - - // reduce - auto map_res_min = - std::min_element( - map_res.begin(), map_res.end(), - [&](const std::pair& x, - const std::pair& y) { - return x.first != nil_val && op(*x.first, *y.first); - }) - ->first; - auto map_res_max = - std::max_element( - map_res.begin(), map_res.end(), - [&](const std::pair& x, - const std::pair& y) { - return y.second != nil_val && op(*x.second, *y.second); - }) - ->second; - - // local solution - *result = std::make_pair( - std::make_pair(map_res_min != nil_val ? *map_res_min : value_t{}, - map_res_max != nil_val ? *map_res_max : value_t{}), - std::make_pair( - itr_traits::iterator_from_local(gbegin, gend, map_res_min), - itr_traits::iterator_from_local(gbegin, gend, map_res_max))); - }, - args, &res[i]); - } - rt::waitForCompletion(h); + using value_t = typename itr_traits::value_type; + static_assert(std::is_default_constructible::value); + + struct sol_t { + ForwardIt min, max; + value_t min_val, max_val; + }; + + if (first == last) return std::make_pair(last, last); + + // distributed map + auto map_res = distributed_map( + // range + first, last, + // kernel + [](ForwardIt first, ForwardIt last, Compare comp) { + using local_iterator_t = typename itr_traits::local_iterator_type; + + // local map + auto lrange = itr_traits::local_range(first, last); + auto map_res = local_map( + // range + lrange.begin(), lrange.end(), + // kernel + [&](local_iterator_t b, local_iterator_t e) { + return std::minmax_element(b, e, comp); + }); + + // reduce + auto nil_val = itr_traits::local_range(first, last).end(); + auto lmin_it = std::min_element( + map_res.begin(), map_res.end(), + [&](const std::pair& x, + const std::pair& y) { + return x.first != nil_val && comp(*x.first, *y.first); + }); + auto lmax_it = std::max_element( + map_res.begin(), map_res.end(), + [&](const std::pair& x, + const std::pair& y) { + return y.second != nil_val && comp(*x.second, *y.second); + }); + + // local solution + auto lmin = lmin_it != map_res.end() ? lmin_it->first : nil_val; + auto lmax = lmax_it != map_res.end() ? lmax_it->second : nil_val; + return sol_t{itr_traits::iterator_from_local(first, last, lmin), + itr_traits::iterator_from_local(first, last, lmax), + lmin != nil_val ? *lmin : value_t{}, + lmax != nil_val ? *lmax : value_t{}}; + }, + // map arguments + comp); + + // reduce auto res_min = std::min_element( - res.begin(), res.end(), [&](const res_t& x, const res_t& y) { - return x.second.first != last && comp(x.first.first, y.first.first); + map_res.begin(), map_res.end(), [&](const sol_t& x, const sol_t& y) { + return x.min != last && comp(x.min_val, y.min_val); }); auto res_max = std::max_element( - res.begin(), res.end(), [&](const res_t& x, const res_t& y) { - return y.second.first != last && comp(x.first.second, y.first.second); + map_res.begin(), map_res.end(), [&](const sol_t& x, const sol_t& y) { + return y.max != last && comp(x.max_val, y.max_val); }); - return std::make_pair(res_min->second.first, res_max->second.second); + return std::make_pair(res_min->min, res_max->max); } } // namespace impl From 531c34fe027083ec45116ada98471270b1da0874 Mon Sep 17 00:00:00 2001 From: Maurizio Drocco Date: Fri, 1 Feb 2019 10:29:19 -0800 Subject: [PATCH 04/19] [pnnl/SHAD#152] fixes --- include/shad/core/impl/minimum_maximum_ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/shad/core/impl/minimum_maximum_ops.h b/include/shad/core/impl/minimum_maximum_ops.h index f77af77f..df6aad68 100755 --- a/include/shad/core/impl/minimum_maximum_ops.h +++ b/include/shad/core/impl/minimum_maximum_ops.h @@ -264,7 +264,7 @@ std::pair minmax_element( } if (lminmax.second != nil_val && (partial_solution.max == last || - comp(*lminmax.second, partial_solution.max_val))) { + comp(partial_solution.max_val, *lminmax.second))) { auto gmax = itr_traits::iterator_from_local(first, last, lminmax.second); res.max = gmax; From e68c3dc204d31c3f7f85859687e1cacdb033de49 Mon Sep 17 00:00:00 2001 From: Maurizio Drocco Date: Fri, 1 Feb 2019 10:35:20 -0800 Subject: [PATCH 05/19] [pnnl/SHAD#152] comment --- include/shad/core/impl/impl_patterns.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/shad/core/impl/impl_patterns.h b/include/shad/core/impl/impl_patterns.h index 8c06c6d5..32e4c94b 100755 --- a/include/shad/core/impl/impl_patterns.h +++ b/include/shad/core/impl/impl_patterns.h @@ -42,6 +42,13 @@ namespace impl { #include #include +//////////////////////////////////////////////////////////////////////////////// +// +// apply_from is an utility function that performs compile-time unpacking of +// elements [i,N) from a N-ary tuple and invokes a callable on the unpacked +// elements. +// +//////////////////////////////////////////////////////////////////////////////// template struct Apply { template From 1f0ba044a13e4940b22e182847f63e73ddd944c7 Mon Sep 17 00:00:00 2001 From: Maurizio Drocco Date: Fri, 1 Feb 2019 10:36:54 -0800 Subject: [PATCH 06/19] [pnnl/SHAD#152] typo --- include/shad/core/impl/minimum_maximum_ops.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/shad/core/impl/minimum_maximum_ops.h b/include/shad/core/impl/minimum_maximum_ops.h index df6aad68..5d613b88 100755 --- a/include/shad/core/impl/minimum_maximum_ops.h +++ b/include/shad/core/impl/minimum_maximum_ops.h @@ -41,7 +41,6 @@ namespace shad { namespace impl { -// contract: template ForwardIt max_element(distributed_sequential_tag&& policy, ForwardIt first, ForwardIt last, Compare comp) { From edce916889a4dc7d04ad26516147c292c373325b Mon Sep 17 00:00:00 2001 From: Maurizio Drocco Date: Fri, 1 Feb 2019 10:41:46 -0800 Subject: [PATCH 07/19] [pnnl/SHAD#152] typo --- test/unit_tests/core/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/unit_tests/core/CMakeLists.txt b/test/unit_tests/core/CMakeLists.txt index af12cb18..2bf2aa8c 100755 --- a/test/unit_tests/core/CMakeLists.txt +++ b/test/unit_tests/core/CMakeLists.txt @@ -4,8 +4,8 @@ set(tests shad_array_test unordered_set_test unordered_map_test -# std_algorithm_test -# std_numeric_test + std_algorithm_test +# std_numeric_test shad_algorithm_test ) From 4df66a8251324c2c4027254fc7450a6779cfeed8 Mon Sep 17 00:00:00 2001 From: Maurizio Drocco Date: Fri, 1 Feb 2019 11:19:16 -0800 Subject: [PATCH 08/19] [pnnl/SHAD#152] fixes --- include/shad/core/impl/impl_patterns.h | 19 ++++++++----------- include/shad/core/impl/minimum_maximum_ops.h | 12 ++++++++++++ 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/include/shad/core/impl/impl_patterns.h b/include/shad/core/impl/impl_patterns.h index 32e4c94b..f688eb61 100755 --- a/include/shad/core/impl/impl_patterns.h +++ b/include/shad/core/impl/impl_patterns.h @@ -99,16 +99,6 @@ auto distributed_folding_map(ForwardIt first, ForwardIt last, MapF&& map_kernel, return res; } -//////////////////////////////////////////////////////////////////////////////// -// -// local_folding_map applies map_kernel over the local portion. -// -//////////////////////////////////////////////////////////////////////////////// -template -auto local_folding_map(ForwardIt first, ForwardIt last, MapF&& map_kernel) { - return map_kernel(first, last); -} - //////////////////////////////////////////////////////////////////////////////// // // distributed_map applies map_kernel to each local portion and returns an @@ -122,9 +112,13 @@ template auto distributed_map(ForwardIt first, ForwardIt last, MapF&& map_kernel, Args&&... args) { using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); using mapped_t = typename std::result_of::type; +#if __cpp_static_assert >= 200410 + static_assert(std::is_default_constructible::value); +#endif + + auto localities = itr_traits::localities(first, last); size_t i = 0; rt::Handle h; auto d_args = std::make_tuple(map_kernel, first, last, args...); @@ -155,6 +149,9 @@ template auto local_map(ForwardIt first, ForwardIt last, MapF&& map_kernel) { using mapped_t = typename std::result_of::type; +#if __cpp_static_assert >= 200410 + static_assert(std::is_default_constructible::value); +#endif // allocate partial results auto range_len = std::distance(first, last); diff --git a/include/shad/core/impl/minimum_maximum_ops.h b/include/shad/core/impl/minimum_maximum_ops.h index 5d613b88..bac24b3a 100755 --- a/include/shad/core/impl/minimum_maximum_ops.h +++ b/include/shad/core/impl/minimum_maximum_ops.h @@ -46,7 +46,9 @@ ForwardIt max_element(distributed_sequential_tag&& policy, ForwardIt first, ForwardIt last, Compare comp) { using itr_traits = distributed_iterator_traits; using value_t = typename itr_traits::value_type; +#if __cpp_static_assert >= 200410 static_assert(std::is_default_constructible::value); +#endif if (first == last) return last; @@ -84,7 +86,9 @@ ForwardIt max_element(distributed_parallel_tag&& policy, ForwardIt first, ForwardIt last, Compare comp) { using itr_traits = distributed_iterator_traits; using value_t = typename itr_traits::value_type; +#if __cpp_static_assert >= 200410 static_assert(std::is_default_constructible::value); +#endif if (first == last) return last; @@ -137,7 +141,9 @@ ForwardIt min_element(distributed_sequential_tag&& policy, ForwardIt first, ForwardIt last, Compare comp) { using itr_traits = distributed_iterator_traits; using value_t = typename itr_traits::value_type; +#if __cpp_static_assert >= 200410 static_assert(std::is_default_constructible::value); +#endif if (first == last) return last; @@ -175,7 +181,9 @@ ForwardIt min_element(distributed_parallel_tag&& policy, ForwardIt first, ForwardIt last, Compare comp) { using itr_traits = distributed_iterator_traits; using value_t = typename itr_traits::value_type; +#if __cpp_static_assert >= 200410 static_assert(std::is_default_constructible::value); +#endif if (first == last) return last; @@ -229,7 +237,9 @@ std::pair minmax_element( Compare comp) { using itr_traits = distributed_iterator_traits; using value_t = typename itr_traits::value_type; +#if __cpp_static_assert >= 200410 static_assert(std::is_default_constructible::value); +#endif struct sol_t { ForwardIt min, max; @@ -286,7 +296,9 @@ std::pair minmax_element( if (first == last) return std::make_pair(last, last); using itr_traits = distributed_iterator_traits; using value_t = typename itr_traits::value_type; +#if __cpp_static_assert >= 200410 static_assert(std::is_default_constructible::value); +#endif struct sol_t { ForwardIt min, max; From 5ef116a3f6ce60408640ceb10f0e13f4e83aace4 Mon Sep 17 00:00:00 2001 From: Maurizio Drocco Date: Fri, 1 Feb 2019 11:30:12 -0800 Subject: [PATCH 09/19] [pnnl/SHAD#152] C++11 syntax for static_assert --- include/shad/core/impl/impl_patterns.h | 10 +++---- include/shad/core/impl/minimum_maximum_ops.h | 30 ++++++++------------ 2 files changed, 16 insertions(+), 24 deletions(-) diff --git a/include/shad/core/impl/impl_patterns.h b/include/shad/core/impl/impl_patterns.h index f688eb61..e750f00f 100755 --- a/include/shad/core/impl/impl_patterns.h +++ b/include/shad/core/impl/impl_patterns.h @@ -114,9 +114,8 @@ auto distributed_map(ForwardIt first, ForwardIt last, MapF&& map_kernel, using itr_traits = distributed_iterator_traits; using mapped_t = typename std::result_of::type; -#if __cpp_static_assert >= 200410 - static_assert(std::is_default_constructible::value); -#endif + static_assert(std::is_default_constructible::value, + "distributed_map requires DefaultConstructible value type"); auto localities = itr_traits::localities(first, last); size_t i = 0; @@ -149,9 +148,8 @@ template auto local_map(ForwardIt first, ForwardIt last, MapF&& map_kernel) { using mapped_t = typename std::result_of::type; -#if __cpp_static_assert >= 200410 - static_assert(std::is_default_constructible::value); -#endif + static_assert(std::is_default_constructible::value, + "local_map requires DefaultConstructible value type"); // allocate partial results auto range_len = std::distance(first, last); diff --git a/include/shad/core/impl/minimum_maximum_ops.h b/include/shad/core/impl/minimum_maximum_ops.h index bac24b3a..0e06c2b4 100755 --- a/include/shad/core/impl/minimum_maximum_ops.h +++ b/include/shad/core/impl/minimum_maximum_ops.h @@ -46,9 +46,8 @@ ForwardIt max_element(distributed_sequential_tag&& policy, ForwardIt first, ForwardIt last, Compare comp) { using itr_traits = distributed_iterator_traits; using value_t = typename itr_traits::value_type; -#if __cpp_static_assert >= 200410 - static_assert(std::is_default_constructible::value); -#endif + static_assert(std::is_default_constructible::value, + "max_element requires DefaultConstructible value type"); if (first == last) return last; @@ -86,9 +85,8 @@ ForwardIt max_element(distributed_parallel_tag&& policy, ForwardIt first, ForwardIt last, Compare comp) { using itr_traits = distributed_iterator_traits; using value_t = typename itr_traits::value_type; -#if __cpp_static_assert >= 200410 - static_assert(std::is_default_constructible::value); -#endif + static_assert(std::is_default_constructible::value, + "max_element requires DefaultConstructible value type"); if (first == last) return last; @@ -141,9 +139,8 @@ ForwardIt min_element(distributed_sequential_tag&& policy, ForwardIt first, ForwardIt last, Compare comp) { using itr_traits = distributed_iterator_traits; using value_t = typename itr_traits::value_type; -#if __cpp_static_assert >= 200410 - static_assert(std::is_default_constructible::value); -#endif + static_assert(std::is_default_constructible::value, + "min_element requires DefaultConstructible value type"); if (first == last) return last; @@ -181,9 +178,8 @@ ForwardIt min_element(distributed_parallel_tag&& policy, ForwardIt first, ForwardIt last, Compare comp) { using itr_traits = distributed_iterator_traits; using value_t = typename itr_traits::value_type; -#if __cpp_static_assert >= 200410 - static_assert(std::is_default_constructible::value); -#endif + static_assert(std::is_default_constructible::value, + "min_element requires DefaultConstructible value type"); if (first == last) return last; @@ -237,9 +233,8 @@ std::pair minmax_element( Compare comp) { using itr_traits = distributed_iterator_traits; using value_t = typename itr_traits::value_type; -#if __cpp_static_assert >= 200410 - static_assert(std::is_default_constructible::value); -#endif + static_assert(std::is_default_constructible::value, + "minmax_element requires DefaultConstructible value type"); struct sol_t { ForwardIt min, max; @@ -296,9 +291,8 @@ std::pair minmax_element( if (first == last) return std::make_pair(last, last); using itr_traits = distributed_iterator_traits; using value_t = typename itr_traits::value_type; -#if __cpp_static_assert >= 200410 - static_assert(std::is_default_constructible::value); -#endif + static_assert(std::is_default_constructible::value, + "minmax_element requires DefaultConstructible value type"); struct sol_t { ForwardIt min, max; From e8c7335597b33ea5a3a7e6a0c06606b20a69bd60 Mon Sep 17 00:00:00 2001 From: Maurizio Drocco Date: Mon, 4 Feb 2019 14:00:25 -0800 Subject: [PATCH 10/19] [pnnl/SHAD#152] pattern-based non-modifying sequence ops --- include/shad/core/impl/impl_patterns.h | 27 + .../core/impl/non_modifyng_sequence_ops.h | 907 ++++++++---------- 2 files changed, 452 insertions(+), 482 deletions(-) diff --git a/include/shad/core/impl/impl_patterns.h b/include/shad/core/impl/impl_patterns.h index e750f00f..64ea6690 100755 --- a/include/shad/core/impl/impl_patterns.h +++ b/include/shad/core/impl/impl_patterns.h @@ -99,6 +99,30 @@ auto distributed_folding_map(ForwardIt first, ForwardIt last, MapF&& map_kernel, return res; } +template +auto distributed_folding_map_early_termination(ForwardIt first, ForwardIt last, + MapF&& map_kernel, HaltF&& halt, + const S& init_sol, + Args&&... args) { + using itr_traits = distributed_iterator_traits; + auto localities = itr_traits::localities(first, last); + auto res = init_sol; + for (auto locality = localities.begin(), end = localities.end(); + locality != end; ++locality) { + auto d_args = std::make_tuple(map_kernel, first, last, res, args...); + rt::executeAtWithRet( + locality, + [](const typeof(d_args)& d_args, S* result) { + *result = apply_from<1>(::std::get<0>(d_args), + ::std::forward(d_args)); + }, + d_args, &res); + if (halt(res)) return res; + } + return res; +} + //////////////////////////////////////////////////////////////////////////////// // // distributed_map applies map_kernel to each local portion and returns an @@ -108,6 +132,7 @@ auto distributed_folding_map(ForwardIt first, ForwardIt last, MapF&& map_kernel, // The return type of map_kernel must be DefaultConstructible. // //////////////////////////////////////////////////////////////////////////////// +// TODO specialize mapped_t to support lambdas returning bool template auto distributed_map(ForwardIt first, ForwardIt last, MapF&& map_kernel, Args&&... args) { @@ -116,6 +141,8 @@ auto distributed_map(ForwardIt first, ForwardIt last, MapF&& map_kernel, const ForwardIt&, const ForwardIt&, Args...)>::type; static_assert(std::is_default_constructible::value, "distributed_map requires DefaultConstructible value type"); + static_assert(!std::is_same::value, + "distributed-map kernels returning bool are not supported (yet)"); auto localities = itr_traits::localities(first, last); size_t i = 0; diff --git a/include/shad/core/impl/non_modifyng_sequence_ops.h b/include/shad/core/impl/non_modifyng_sequence_ops.h index 66e1cd49..239e7810 100644 --- a/include/shad/core/impl/non_modifyng_sequence_ops.h +++ b/include/shad/core/impl/non_modifyng_sequence_ops.h @@ -26,12 +26,16 @@ #define INCLUDE_SHAD_CORE_IMPL_NON_MODIFYING_SEQUENCE_OPS_H #include +#include #include #include + #include "shad/core/execution.h" #include "shad/distributed_iterator_traits.h" #include "shad/runtime/runtime.h" +#include "impl_patterns.h" + namespace shad { namespace impl { @@ -39,574 +43,513 @@ template bool all_of(distributed_sequential_tag&& policy, ForwardItr first, ForwardItr last, UnaryPredicate p) { using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - bool result; - - rt::executeAtWithRet( - locality, - [](const std::tuple& args, - bool* result) { - auto begin = std::get<0>(args); - auto end = std::get<1>(args); - auto predicate = std::get<2>(args); - - auto local_range = itr_traits::local_range(begin, end); - *result = - std::all_of(local_range.begin(), local_range.end(), predicate); - }, - std::make_tuple(first, last, p), &result); - - if (!result) { - return false; - } - } - - return true; + + return distributed_folding_map_early_termination( + // range + first, last, + // kernel + [](ForwardItr first, ForwardItr last, const bool partial_solution, + UnaryPredicate p) { + // local processing + auto lrange = itr_traits::local_range(first, last); + auto local_res = std::all_of(lrange.begin(), lrange.end(), p); + // update the partial solution + return local_res; + }, + // halt condition + [](const bool x) { return !x; }, + // initial solution + true, + // map arguments + p); } template bool all_of(distributed_parallel_tag&& policy, ForwardItr first, ForwardItr last, UnaryPredicate p) { using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - - rt::Handle H; - - std::vector results(localities.size()); - size_t i = 0; - - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - rt::asyncExecuteAtWithRet( - H, locality, - [](rt::Handle&, - const std::tuple& args, - char* result) { - auto begin = std::get<0>(args); - auto end = std::get<1>(args); - auto predicate = std::get<2>(args); - - auto local_range = itr_traits::local_range(begin, end); - *result = - std::all_of(local_range.begin(), local_range.end(), predicate) - ? 1 - : 0; - }, - std::make_tuple(first, last, p), &results[i]); - ++i; - } - - rt::waitForCompletion(H); - - return std::all_of(results.begin(), results.end(), - [](char v) -> bool { return v == 1; }); + using value_t = typename itr_traits::value_type; + + // distributed map + auto map_res = distributed_map( + // range + first, last, + // kernel + [](ForwardItr first, ForwardItr last, UnaryPredicate p) -> uint8_t { + using local_iterator_t = typename itr_traits::local_iterator_type; + + // local map + auto lrange = itr_traits::local_range(first, last); + + auto map_res = local_map( + // range + lrange.begin(), lrange.end(), + // kernel + [&](local_iterator_t b, local_iterator_t e) { + return std::all_of(b, e, p); + }); + + // local reduce + return std::all_of(map_res.begin(), map_res.end(), + [](bool x) { return x; }); + }, + // map arguments + p); + + // reduce + return std::all_of(map_res.begin(), map_res.end(), [](bool x) { return x; }); } template bool any_of(distributed_sequential_tag&& policy, ForwardItr first, ForwardItr last, UnaryPredicate p) { using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - bool result; - - rt::executeAtWithRet( - locality, - [](const std::tuple& args, - bool* result) { - auto begin = std::get<0>(args); - auto end = std::get<1>(args); - auto predicate = std::get<2>(args); - - auto local_range = itr_traits::local_range(begin, end); - *result = - std::any_of(local_range.begin(), local_range.end(), predicate); - }, - std::make_tuple(first, last, p), &result); - - if (result) { - return true; - } - } - - return false; + + return distributed_folding_map_early_termination( + // range + first, last, + // kernel + [](ForwardItr first, ForwardItr last, const bool partial_solution, + UnaryPredicate p) { + // local processing + auto lrange = itr_traits::local_range(first, last); + auto local_res = std::any_of(lrange.begin(), lrange.end(), p); + // update the partial solution + return local_res; + }, + // halt condition + [](const bool x) { return x; }, + // initial solution + false, + // map arguments + p); } template bool any_of(distributed_parallel_tag&& policy, ForwardItr first, ForwardItr last, UnaryPredicate p) { using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - - rt::Handle H; - - std::vector results(localities.size()); - size_t i = 0; - - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - rt::asyncExecuteAtWithRet( - H, locality, - [](rt::Handle&, - const std::tuple& args, - char* result) { - auto begin = std::get<0>(args); - auto end = std::get<1>(args); - auto predicate = std::get<2>(args); - - auto local_range = itr_traits::local_range(begin, end); - *result = - std::any_of(local_range.begin(), local_range.end(), predicate) - ? 1 - : 0; - }, - std::make_tuple(first, last, p), &results[i]); - ++i; - } - - rt::waitForCompletion(H); - - return std::any_of(results.begin(), results.end(), - [](char v) -> bool { return v == 1; }); + using value_t = typename itr_traits::value_type; + + // distributed map + auto map_res = distributed_map( + // range + first, last, + // kernel + [](ForwardItr first, ForwardItr last, UnaryPredicate p) -> uint8_t { + using local_iterator_t = typename itr_traits::local_iterator_type; + + // local map + auto lrange = itr_traits::local_range(first, last); + + auto map_res = local_map( + // range + lrange.begin(), lrange.end(), + // kernel + [&](local_iterator_t b, local_iterator_t e) { + return std::any_of(b, e, p); + }); + + // local reduce + return std::any_of(map_res.begin(), map_res.end(), + [](bool x) { return x; }); + }, + // map arguments + p); + + // reduce + return std::any_of(map_res.begin(), map_res.end(), [](bool x) { return x; }); } template -ForwardItr find(distributed_parallel_tag&& policy, ForwardItr first, +ForwardItr find(distributed_sequential_tag&& policy, ForwardItr first, ForwardItr last, const T& value) { using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - - rt::Handle H; - - std::vector results(localities.size(), last); - size_t i = 0; - - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - rt::asyncExecuteAtWithRet( - H, locality, - [](rt::Handle&, const std::tuple& args, - ForwardItr* result) { - auto begin = std::get<0>(args); - *result = std::get<1>(args); - auto value = std::get<2>(args); - - auto local_range = itr_traits::local_range(begin, *result); - auto local_res = - std::find(local_range.begin(), local_range.end(), value); - - if (local_res != local_range.end()) { - *result = std::move(itr_traits::iterator_from_local( - std::get<0>(args), std::get<1>(args), local_res)); - } - }, - std::make_tuple(first, last, value), &results[i]); - ++i; - } - - rt::waitForCompletion(H); - - auto resultPos = - std::find_if(std::begin(results), std::end(results), - [&](const ForwardItr& o) -> bool { return last != o; }); - if (resultPos != results.end()) last = std::move(*resultPos); - - return last; + + return distributed_folding_map_early_termination( + // range + first, last, + // kernel + [](ForwardItr first, ForwardItr last, const ForwardItr partial_solution, + const T& value) { + // local processing + auto lrange = itr_traits::local_range(first, last); + auto local_res = std::find(lrange.begin(), lrange.end(), value); + // update the partial solution + return (local_res != lrange.end()) + ? itr_traits::iterator_from_local(first, last, local_res) + : partial_solution; + }, + // halt condition + [&](const ForwardItr x) { return x != last; }, + // initial solution + last, + // map arguments + value); } template -ForwardItr find(distributed_sequential_tag&& policy, ForwardItr first, +ForwardItr find(distributed_parallel_tag&& policy, ForwardItr first, ForwardItr last, const T& value) { using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - ForwardItr result = last; - - rt::executeAtWithRet( - locality, - [](const std::tuple& args, - ForwardItr* result) { - auto begin = std::get<0>(args); - *result = std::get<1>(args); - auto value = std::get<2>(args); - - auto local_range = itr_traits::local_range(begin, *result); - auto local_res = - std::find(local_range.begin(), local_range.end(), value); - - if (local_res != local_range.end()) { - *result = std::move(itr_traits::iterator_from_local( - std::get<0>(args), std::get<1>(args), local_res)); - } - }, - std::make_tuple(first, last, value), &result); - - if (result != last) { - return result; - } - } - - return last; + using value_t = typename itr_traits::value_type; + + // distributed map + auto map_res = distributed_map( + // range + first, last, + // kernel + [](ForwardItr first, ForwardItr last, const T& value) { + using local_iterator_t = typename itr_traits::local_iterator_type; + + // local map + auto lrange = itr_traits::local_range(first, last); + + auto map_res = local_map( + // range + lrange.begin(), lrange.end(), + // kernel + [&](local_iterator_t b, local_iterator_t e) { + auto res = std::find(b, e, value); + return res != e ? res : lrange.end(); + }); + + // local reduce + auto found = std::find_if( + map_res.begin(), map_res.end(), + [&](const local_iterator_t& i) { return i != lrange.end(); }); + return found != map_res.end() + ? itr_traits::iterator_from_local(first, last, *found) + : last; + }, + // map arguments + value); + + // reduce + auto found = std::find_if(map_res.begin(), map_res.end(), + [&](ForwardItr i) { return i != last; }); + return found != map_res.end() ? *found : last; } template ForwardItr find_if(distributed_sequential_tag&& policy, ForwardItr first, ForwardItr last, UnaryPredicate p) { using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - ForwardItr result = last; - - rt::executeAtWithRet( - locality, - [](const std::tuple& args, - ForwardItr* result) { - auto begin = std::get<0>(args); - *result = std::get<1>(args); - auto predicate = std::get<2>(args); - - auto local_range = itr_traits::local_range(begin, *result); - auto local_res = - std::find_if(local_range.begin(), local_range.end(), predicate); - - if (local_res != local_range.end()) { - *result = std::move(itr_traits::iterator_from_local( - std::get<0>(args), std::get<1>(args), local_res)); - } - }, - std::make_tuple(first, last, p), &result); - - if (result != last) { - return result; - } - } - - return last; + + return distributed_folding_map_early_termination( + // range + first, last, + // kernel + [](ForwardItr first, ForwardItr last, const ForwardItr partial_solution, + UnaryPredicate p) { + // local processing + auto lrange = itr_traits::local_range(first, last); + auto local_res = std::find_if(lrange.begin(), lrange.end(), p); + // update the partial solution + return (local_res != lrange.end()) + ? itr_traits::iterator_from_local(first, last, local_res) + : partial_solution; + }, + // halt condition + [&](const ForwardItr x) { return x != last; }, + // initial solution + last, + // map arguments + p); } template ForwardItr find_if(distributed_parallel_tag&& policy, ForwardItr first, ForwardItr last, UnaryPredicate p) { using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - - rt::Handle H; - - std::vector results(localities.size(), last); - size_t i = 0; - - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - rt::asyncExecuteAtWithRet( - H, locality, - [](rt::Handle&, - const std::tuple& args, - ForwardItr* result) { - auto begin = std::get<0>(args); - *result = std::get<1>(args); - auto predicate = std::get<2>(args); - - auto local_range = itr_traits::local_range(begin, *result); - auto local_res = - std::find_if(local_range.begin(), local_range.end(), predicate); - - if (local_res != local_range.end()) { - *result = std::move(itr_traits::iterator_from_local( - std::get<0>(args), std::get<1>(args), local_res)); - } - }, - std::make_tuple(first, last, p), &results[i]); - - ++i; - } - - rt::waitForCompletion(H); - - auto resultPos = - std::find_if(std::begin(results), std::end(results), - [&](const ForwardItr& o) -> bool { return last != o; }); - if (resultPos != results.end()) last = std::move(*resultPos); - - return last; + using value_t = typename itr_traits::value_type; + + // distributed map + auto map_res = distributed_map( + // range + first, last, + // kernel + [](ForwardItr first, ForwardItr last, UnaryPredicate p) { + using local_iterator_t = typename itr_traits::local_iterator_type; + + // local map + auto lrange = itr_traits::local_range(first, last); + + auto map_res = local_map( + // range + lrange.begin(), lrange.end(), + // kernel + [&](local_iterator_t b, local_iterator_t e) { + auto res = std::find_if(b, e, p); + return res != e ? res : lrange.end(); + }); + + // local reduce + auto found = std::find_if( + map_res.begin(), map_res.end(), + [&](const local_iterator_t& i) { return i != lrange.end(); }); + return found != map_res.end() + ? itr_traits::iterator_from_local(first, last, *found) + : last; + }, + // map arguments + p); + + // reduce + auto found = std::find_if(map_res.begin(), map_res.end(), + [&](ForwardItr i) { return i != last; }); + return found != map_res.end() ? *found : last; } template ForwardItr find_if_not(distributed_sequential_tag&& policy, ForwardItr first, ForwardItr last, UnaryPredicate p) { using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - ForwardItr result = last; - - rt::executeAtWithRet( - locality, - [](const std::tuple& args, - ForwardItr* result) { - auto begin = std::get<0>(args); - *result = std::get<1>(args); - auto predicate = std::get<2>(args); - - auto local_range = itr_traits::local_range(begin, *result); - auto local_res = std::find_if_not(local_range.begin(), - local_range.end(), predicate); - - if (local_res != local_range.end()) { - *result = std::move(itr_traits::iterator_from_local( - std::get<0>(args), std::get<1>(args), local_res)); - } - }, - std::make_tuple(first, last, p), &result); - - if (result != last) { - return result; - } - } - - return last; + + return distributed_folding_map_early_termination( + // range + first, last, + // kernel + [](ForwardItr first, ForwardItr last, const ForwardItr partial_solution, + UnaryPredicate p) { + // local processing + auto lrange = itr_traits::local_range(first, last); + auto local_res = std::find_if_not(lrange.begin(), lrange.end(), p); + // update the partial solution + return (local_res != lrange.end()) + ? itr_traits::iterator_from_local(first, last, local_res) + : partial_solution; + }, + // halt condition + [&](const ForwardItr x) { return x != last; }, + // initial solution + last, + // map arguments + p); } template ForwardItr find_if_not(distributed_parallel_tag&& policy, ForwardItr first, ForwardItr last, UnaryPredicate p) { using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - - rt::Handle H; - - std::vector results(localities.size(), last); - size_t i = 0; - - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - rt::asyncExecuteAtWithRet( - H, locality, - [](rt::Handle&, - const std::tuple& args, - ForwardItr* result) { - auto begin = std::get<0>(args); - *result = std::get<1>(args); - auto predicate = std::get<2>(args); - - auto local_range = itr_traits::local_range(begin, *result); - auto local_res = std::find_if_not(local_range.begin(), - local_range.end(), predicate); - - if (local_res != local_range.end()) { - *result = std::move(itr_traits::iterator_from_local( - std::get<0>(args), std::get<1>(args), local_res)); - } - }, - std::make_tuple(first, last, p), &results[i]); - - ++i; - } - - rt::waitForCompletion(H); - - auto resultPos = - std::find_if(std::begin(results), std::end(results), - [&](const ForwardItr& o) -> bool { return last != o; }); - if (resultPos != results.end()) last = std::move(*resultPos); - - return last; + using value_t = typename itr_traits::value_type; + + // distributed map + auto map_res = distributed_map( + // range + first, last, + // kernel + [](ForwardItr first, ForwardItr last, UnaryPredicate p) { + using local_iterator_t = typename itr_traits::local_iterator_type; + + // local map + auto lrange = itr_traits::local_range(first, last); + + auto map_res = local_map( + // range + lrange.begin(), lrange.end(), + // kernel + [&](local_iterator_t b, local_iterator_t e) { + auto res = std::find_if_not(b, e, p); + return res != e ? res : lrange.end(); + }); + + // local reduce + auto found = std::find_if( + map_res.begin(), map_res.end(), + [&](const local_iterator_t& i) { return i != lrange.end(); }); + return found != map_res.end() + ? itr_traits::iterator_from_local(first, last, *found) + : last; + }, + // map arguments + p); + + // reduce + auto found = std::find_if(map_res.begin(), map_res.end(), + [&](ForwardItr i) { return i != last; }); + return found != map_res.end() ? *found : last; } template void for_each(distributed_sequential_tag&& policy, ForwardItr first, ForwardItr last, UnaryPredicate p) { using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - rt::executeAt( - locality, - [](const std::tuple& args) { - auto begin = std::get<0>(args); - auto end = std::get<1>(args); - auto predicate = std::get<2>(args); - - auto local_range = itr_traits::local_range(begin, end); - std::for_each(local_range.begin(), local_range.end(), predicate); - }, - std::make_tuple(first, last, p)); - } + + return distributed_folding_map( + // range + first, last, + // kernel + [](ForwardItr first, ForwardItr last, nullptr_t, UnaryPredicate p) { + // local processing + auto lrange = itr_traits::local_range(first, last); + auto local_res = std::for_each(lrange.begin(), lrange.end(), p); + }, + // initial solution + nullptr, + // map arguments + p); } template void for_each(distributed_parallel_tag&& policy, ForwardItr first, ForwardItr last, UnaryPredicate p) { using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - - rt::Handle H; - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - rt::asyncExecuteAt( - H, locality, - [](rt::Handle&, - const std::tuple& args) { - auto begin = std::get<0>(args); - auto end = std::get<1>(args); - auto predicate = std::get<2>(args); - - auto local_range = itr_traits::local_range(begin, end); - std::for_each(local_range.begin(), local_range.end(), predicate); - }, - std::make_tuple(first, last, p)); - } - - rt::waitForCompletion(H); + using value_t = typename itr_traits::value_type; + + // distributed map + auto map_res = distributed_map( + // range + first, last, + // kernel + [](ForwardItr first, ForwardItr last, UnaryPredicate p) { + using local_iterator_t = typename itr_traits::local_iterator_type; + + // local map + auto lrange = itr_traits::local_range(first, last); + + local_map( + // range + lrange.begin(), lrange.end(), + // kernel + [&](local_iterator_t b, local_iterator_t e) { + std::for_each(b, e, p); + }); + }, + // map arguments + p); } template typename shad::distributed_iterator_traits::difference_type count( - distributed_parallel_tag&& policy, InputItr first, InputItr last, + distributed_sequential_tag&& policy, InputItr first, InputItr last, const T& value) { using itr_traits = distributed_iterator_traits; - using difference_type = typename itr_traits::difference_type; - auto localities = itr_traits::localities(first, last); - - rt::Handle H; - - std::vector results(localities.size()); - size_t i = 0; - - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - rt::asyncExecuteAtWithRet( - H, locality, - [](rt::Handle&, const std::tuple& args, - difference_type* result) { - auto begin = std::get<0>(args); - auto end = std::get<1>(args); - auto value = std::get<2>(args); - - auto local_range = itr_traits::local_range(begin, end); - *result = std::count(local_range.begin(), local_range.end(), value); - }, - std::make_tuple(first, last, value), &results[i]); - - ++i; - } - - rt::waitForCompletion(H); - - return std::accumulate(results.begin(), results.end(), difference_type(0)); + using res_t = + typename shad::distributed_iterator_traits::difference_type; + + return distributed_folding_map( + // range + first, last, + // kernel + [](InputItr first, InputItr last, res_t cnt, const T& value) { + // local processing + auto lrange = itr_traits::local_range(first, last); + auto local_res = std::count(lrange.begin(), lrange.end(), value); + // update the partial solution + return cnt + local_res; + }, + // initial solution + res_t{0}, + // map arguments + value); } template typename shad::distributed_iterator_traits::difference_type count( - distributed_sequential_tag&& policy, InputItr first, InputItr last, + distributed_parallel_tag&& policy, InputItr first, InputItr last, const T& value) { using itr_traits = distributed_iterator_traits; - using difference_type = typename itr_traits::difference_type; - auto localities = itr_traits::localities(first, last); - - difference_type result = 0; - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - difference_type delta = 0; - - rt::executeAtWithRet( - locality, - [](const std::tuple& args, - difference_type* result) { - auto begin = std::get<0>(args); - auto end = std::get<1>(args); - auto value = std::get<2>(args); - - auto local_range = itr_traits::local_range(begin, end); - *result = std::count(local_range.begin(), local_range.end(), value); - }, - std::make_tuple(first, last, value), &delta); - - result += delta; - } - - return result; + using res_t = + typename shad::distributed_iterator_traits::difference_type; + + // distributed map + auto map_res = distributed_map( + // range + first, last, + // kernel + [](InputItr first, InputItr last, const T& value) { + using local_iterator_t = typename itr_traits::local_iterator_type; + + // local map + auto lrange = itr_traits::local_range(first, last); + + auto map_res = local_map( + // range + lrange.begin(), lrange.end(), + // kernel + [&](local_iterator_t b, local_iterator_t e) { + return std::count(b, e, value); + }); + + // local reduce + return std::accumulate( + map_res.begin(), map_res.end(), res_t{0}, + [](const res_t& acc, const res_t& x) { return acc + x; }); + }, + // map arguments + value); + + // reduce + return std::accumulate( + map_res.begin(), map_res.end(), res_t{0}, + [](const res_t& acc, const res_t& x) { return acc + x; }); } template typename shad::distributed_iterator_traits::difference_type count_if( - distributed_parallel_tag&& policy, InputItr first, InputItr last, - UnaryPredicate predicate) { + distributed_sequential_tag&& policy, InputItr first, InputItr last, + UnaryPredicate p) { using itr_traits = distributed_iterator_traits; - using difference_type = typename itr_traits::difference_type; - auto localities = itr_traits::localities(first, last); - - rt::Handle H; - - std::vector results(localities.size()); - size_t i = 0; - - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - rt::asyncExecuteAtWithRet( - H, locality, - [](rt::Handle&, - const std::tuple& args, - difference_type* result) { - auto begin = std::get<0>(args); - auto end = std::get<1>(args); - auto predicate = std::get<2>(args); - - auto local_range = itr_traits::local_range(begin, end); - *result = - std::count_if(local_range.begin(), local_range.end(), predicate); - }, - std::make_tuple(first, last, predicate), &results[i]); - - ++i; - } - - rt::waitForCompletion(H); - - return std::accumulate(results.begin(), results.end(), difference_type(0)); + using res_t = + typename shad::distributed_iterator_traits::difference_type; + + return distributed_folding_map( + // range + first, last, + // kernel + [](InputItr first, InputItr last, res_t cnt, UnaryPredicate p) { + // local processing + auto lrange = itr_traits::local_range(first, last); + auto local_res = std::count_if(lrange.begin(), lrange.end(), p); + // update the partial solution + return cnt + local_res; + }, + // initial solution + res_t{0}, + // map arguments + p); } template typename shad::distributed_iterator_traits::difference_type count_if( - distributed_sequential_tag&& policy, InputItr first, InputItr last, - UnaryPredicate predicate) { + distributed_parallel_tag&& policy, InputItr first, InputItr last, + UnaryPredicate p) { using itr_traits = distributed_iterator_traits; - using difference_type = typename itr_traits::difference_type; - auto localities = itr_traits::localities(first, last); - - difference_type result = 0; - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - difference_type delta = 0; - - rt::executeAtWithRet( - locality, - [](const std::tuple& args, - difference_type* result) { - auto begin = std::get<0>(args); - auto end = std::get<1>(args); - auto predicate = std::get<2>(args); - - auto local_range = itr_traits::local_range(begin, end); - *result = - std::count_if(local_range.begin(), local_range.end(), predicate); - }, - std::make_tuple(first, last, predicate), &delta); - - result += delta; - } - - return result; + using res_t = + typename shad::distributed_iterator_traits::difference_type; + + // distributed map + auto map_res = distributed_map( + // range + first, last, + // kernel + [](InputItr first, InputItr last, UnaryPredicate p) { + using local_iterator_t = typename itr_traits::local_iterator_type; + + // local map + auto lrange = itr_traits::local_range(first, last); + + auto map_res = local_map( + // range + lrange.begin(), lrange.end(), + // kernel + [&](local_iterator_t b, local_iterator_t e) { + return std::count_if(b, e, p); + }); + + // local reduce + return std::accumulate( + map_res.begin(), map_res.end(), res_t{0}, + [](const res_t& acc, const res_t& x) { return acc + x; }); + }, + // map arguments + p); + + // reduce + return std::accumulate( + map_res.begin(), map_res.end(), res_t{0}, + [](const res_t& acc, const res_t& x) { return acc + x; }); } } // namespace impl From 9778d3fe61e05fa179d5208d93c0aadb2f2e5921 Mon Sep 17 00:00:00 2001 From: Maurizio Drocco Date: Mon, 4 Feb 2019 15:37:07 -0800 Subject: [PATCH 11/19] [pnnl/SHAD#152] fixes --- include/shad/core/impl/impl_patterns.h | 42 ++++++++++--------- include/shad/core/impl/minimum_maximum_ops.h | 10 ++--- .../core/impl/non_modifyng_sequence_ops.h | 5 ++- 3 files changed, 30 insertions(+), 27 deletions(-) diff --git a/include/shad/core/impl/impl_patterns.h b/include/shad/core/impl/impl_patterns.h index 64ea6690..fd012edf 100755 --- a/include/shad/core/impl/impl_patterns.h +++ b/include/shad/core/impl/impl_patterns.h @@ -30,6 +30,7 @@ #include #include #include +#include #include "shad/distributed_iterator_traits.h" #include "shad/runtime/runtime.h" @@ -37,11 +38,6 @@ namespace shad { namespace impl { -#include -#include -#include -#include - //////////////////////////////////////////////////////////////////////////////// // // apply_from is an utility function that performs compile-time unpacking of @@ -80,8 +76,8 @@ inline auto apply_from(F&& f, T&& t) { // //////////////////////////////////////////////////////////////////////////////// template -auto distributed_folding_map(ForwardIt first, ForwardIt last, MapF&& map_kernel, - const S& init_sol, Args&&... args) { +S distributed_folding_map(ForwardIt first, ForwardIt last, MapF&& map_kernel, + const S& init_sol, Args&&... args) { using itr_traits = distributed_iterator_traits; auto localities = itr_traits::localities(first, last); auto res = init_sol; @@ -101,10 +97,9 @@ auto distributed_folding_map(ForwardIt first, ForwardIt last, MapF&& map_kernel, template -auto distributed_folding_map_early_termination(ForwardIt first, ForwardIt last, - MapF&& map_kernel, HaltF&& halt, - const S& init_sol, - Args&&... args) { +S distributed_folding_map_early_termination(ForwardIt first, ForwardIt last, + MapF&& map_kernel, HaltF&& halt, + const S& init_sol, Args&&... args) { using itr_traits = distributed_iterator_traits; auto localities = itr_traits::localities(first, last); auto res = init_sol; @@ -134,15 +129,18 @@ auto distributed_folding_map_early_termination(ForwardIt first, ForwardIt last, //////////////////////////////////////////////////////////////////////////////// // TODO specialize mapped_t to support lambdas returning bool template -auto distributed_map(ForwardIt first, ForwardIt last, MapF&& map_kernel, - Args&&... args) { +std::vector< + typename std::result_of::type> +distributed_map(ForwardIt first, ForwardIt last, MapF&& map_kernel, + Args&&... args) { using itr_traits = distributed_iterator_traits; - using mapped_t = typename std::result_of::type; + using mapped_t = + typename std::result_of::type; static_assert(std::is_default_constructible::value, "distributed_map requires DefaultConstructible value type"); - static_assert(!std::is_same::value, - "distributed-map kernels returning bool are not supported (yet)"); + static_assert( + !std::is_same::value, + "distributed-map kernels returning bool are not supported (yet)"); auto localities = itr_traits::localities(first, last); size_t i = 0; @@ -171,12 +169,16 @@ auto distributed_map(ForwardIt first, ForwardIt last, MapF&& map_kernel, // The return type of map_kernel must be DefaultConstructible. // //////////////////////////////////////////////////////////////////////////////// +// TODO specialize mapped_t to support lambdas returning bool template -auto local_map(ForwardIt first, ForwardIt last, MapF&& map_kernel) { - using mapped_t = - typename std::result_of::type; +std::vector::type> +local_map(ForwardIt first, ForwardIt last, MapF&& map_kernel) { + using mapped_t = typename std::result_of::type; static_assert(std::is_default_constructible::value, "local_map requires DefaultConstructible value type"); + static_assert( + !std::is_same::value, + "distributed-map kernels returning bool are not supported (yet)"); // allocate partial results auto range_len = std::distance(first, last); diff --git a/include/shad/core/impl/minimum_maximum_ops.h b/include/shad/core/impl/minimum_maximum_ops.h index 0e06c2b4..6231d946 100755 --- a/include/shad/core/impl/minimum_maximum_ops.h +++ b/include/shad/core/impl/minimum_maximum_ops.h @@ -203,11 +203,11 @@ ForwardIt min_element(distributed_parallel_tag&& policy, ForwardIt first, // local reduce auto nil_val = itr_traits::local_range(first, last).end(); - auto lmin_it = std::min_element( - map_res.begin(), map_res.end(), - [&](const local_iterator_t& x, const local_iterator_t& y) { - return x != nil_val && comp(*x, *y); - }); + auto lmin_it = + std::min_element(map_res.begin(), map_res.end(), + [&](local_iterator_t x, local_iterator_t y) { + return x != nil_val && comp(*x, *y); + }); // local solution auto lmin = lmin_it != map_res.end() ? *lmin_it : nil_val; diff --git a/include/shad/core/impl/non_modifyng_sequence_ops.h b/include/shad/core/impl/non_modifyng_sequence_ops.h index 239e7810..56679ec5 100644 --- a/include/shad/core/impl/non_modifyng_sequence_ops.h +++ b/include/shad/core/impl/non_modifyng_sequence_ops.h @@ -29,6 +29,7 @@ #include #include #include +#include #include "shad/core/execution.h" #include "shad/distributed_iterator_traits.h" @@ -85,7 +86,7 @@ bool all_of(distributed_parallel_tag&& policy, ForwardItr first, // range lrange.begin(), lrange.end(), // kernel - [&](local_iterator_t b, local_iterator_t e) { + [&](const local_iterator_t &b, const local_iterator_t &e) -> uint8_t { return std::all_of(b, e, p); }); @@ -146,7 +147,7 @@ bool any_of(distributed_parallel_tag&& policy, ForwardItr first, // range lrange.begin(), lrange.end(), // kernel - [&](local_iterator_t b, local_iterator_t e) { + [&](local_iterator_t b, local_iterator_t e) -> uint8_t { return std::any_of(b, e, p); }); From d7ee2485bc784ffeb1eac4276d512a369bc71ce2 Mon Sep 17 00:00:00 2001 From: Maurizio Drocco Date: Mon, 4 Feb 2019 15:57:45 -0800 Subject: [PATCH 12/19] [pnnl/SHAD#152] fix --- include/shad/core/impl/non_modifyng_sequence_ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/shad/core/impl/non_modifyng_sequence_ops.h b/include/shad/core/impl/non_modifyng_sequence_ops.h index 56679ec5..3b929d76 100644 --- a/include/shad/core/impl/non_modifyng_sequence_ops.h +++ b/include/shad/core/impl/non_modifyng_sequence_ops.h @@ -381,7 +381,7 @@ void for_each(distributed_sequential_tag&& policy, ForwardItr first, // range first, last, // kernel - [](ForwardItr first, ForwardItr last, nullptr_t, UnaryPredicate p) { + [](ForwardItr first, ForwardItr last, std::nullptr_t, UnaryPredicate p) { // local processing auto lrange = itr_traits::local_range(first, last); auto local_res = std::for_each(lrange.begin(), lrange.end(), p); From a591ea61473a9588f28793817ea4bb25f6edaedd Mon Sep 17 00:00:00 2001 From: Maurizio Drocco Date: Mon, 4 Feb 2019 17:34:55 -0800 Subject: [PATCH 13/19] [pnnl/SHAD#152] pzttern-based single-range modifying-sequence ops --- include/shad/core/impl/impl_patterns.h | 77 +++++ .../shad/core/impl/modifyng_sequence_ops.h | 289 ++++++++---------- .../core/impl/non_modifyng_sequence_ops.h | 20 +- 3 files changed, 217 insertions(+), 169 deletions(-) diff --git a/include/shad/core/impl/impl_patterns.h b/include/shad/core/impl/impl_patterns.h index fd012edf..fbae1340 100755 --- a/include/shad/core/impl/impl_patterns.h +++ b/include/shad/core/impl/impl_patterns.h @@ -95,6 +95,24 @@ S distributed_folding_map(ForwardIt first, ForwardIt last, MapF&& map_kernel, return res; } +template +void distributed_folding_map_void(ForwardIt first, ForwardIt last, + MapF&& map_kernel, Args&&... args) { + using itr_traits = distributed_iterator_traits; + auto localities = itr_traits::localities(first, last); + for (auto locality = localities.begin(), end = localities.end(); + locality != end; ++locality) { + auto d_args = std::make_tuple(map_kernel, first, last, args...); + rt::executeAt( + locality, + [](const typeof(d_args)& d_args) { + apply_from<1>(::std::get<0>(d_args), + ::std::forward(d_args)); + }, + d_args); + } +} + template S distributed_folding_map_early_termination(ForwardIt first, ForwardIt last, @@ -161,6 +179,27 @@ distributed_map(ForwardIt first, ForwardIt last, MapF&& map_kernel, return res; } +template +void distributed_map_void(ForwardIt first, ForwardIt last, MapF&& map_kernel, + Args&&... args) { + using itr_traits = distributed_iterator_traits; + auto localities = itr_traits::localities(first, last); + size_t i = 0; + rt::Handle h; + auto d_args = std::make_tuple(map_kernel, first, last, args...); + for (auto locality = localities.begin(), end = localities.end(); + locality != end; ++locality, ++i) { + rt::asyncExecuteAt( + h, locality, + [](rt::Handle&, const typeof(d_args)& d_args) { + apply_from<1>(::std::get<0>(d_args), + ::std::forward(d_args)); + }, + d_args); + } + rt::waitForCompletion(h); +} + //////////////////////////////////////////////////////////////////////////////// // // local_map applies map_kernel over a partitioning of a local portion and @@ -221,6 +260,44 @@ local_map(ForwardIt first, ForwardIt last, MapF&& map_kernel) { return map_res; } +template +void local_map_void(ForwardIt first, ForwardIt last, MapF&& map_kernel) { + // allocate partial results + auto range_len = std::distance(first, last); + auto n_blocks = std::min(rt::impl::getConcurrency(), (size_t)range_len); + + if (n_blocks) { + auto block_size = (range_len + n_blocks - 1) / n_blocks; + + rt::Handle map_h; + for (size_t block_id = 0; block_id < n_blocks; ++block_id) { + auto map_args = + std::make_tuple(block_id, block_size, first, last, map_kernel); + rt::asyncExecuteAt( + map_h, rt::thisLocality(), + [](rt::Handle&, const typeof(map_args)& map_args) { + size_t block_id = std::get<0>(map_args); + size_t block_size = std::get<1>(map_args); + auto begin = std::get<2>(map_args); + auto end = std::get<3>(map_args); + auto map_kernel = std::get<4>(map_args); + // iteration-block boundaries + auto block_begin = begin; + std::advance(block_begin, block_id * block_size); + auto block_end = block_begin; + if (std::distance(block_begin, end) < block_size) + block_end = end; + else + std::advance(block_end, block_size); + // map over the block + map_kernel(block_begin, block_end); + }, + map_args); + } + rt::waitForCompletion(map_h); + } +} + } // namespace impl } // namespace shad diff --git a/include/shad/core/impl/modifyng_sequence_ops.h b/include/shad/core/impl/modifyng_sequence_ops.h index c8b562de..52b447fd 100755 --- a/include/shad/core/impl/modifyng_sequence_ops.h +++ b/include/shad/core/impl/modifyng_sequence_ops.h @@ -42,47 +42,46 @@ template void fill(distributed_parallel_tag&& policy, ForwardIt first, ForwardIt last, const T& value) { using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - rt::Handle H; - - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - rt::asyncExecuteAt( - H, locality, - [](rt::Handle&, const std::tuple& args) { - auto begin = std::get<0>(args); - auto end = std::get<1>(args); - auto value = std::get<2>(args); - - auto local_range = itr_traits::local_range(begin, end); - std::fill(local_range.begin(), local_range.end(), value); - }, - std::make_tuple(first, last, value)); - } - - rt::waitForCompletion(H); + // distributed map + distributed_map_void( + // range + first, last, + // kernel + [](ForwardIt first, ForwardIt last, const T& value) { + using local_iterator_t = typename itr_traits::local_iterator_type; + + // local map + auto lrange = itr_traits::local_range(first, last); + + local_map_void( + // range + lrange.begin(), lrange.end(), + // kernel + [&](local_iterator_t b, local_iterator_t e) { + std::fill(b, e, value); + }); + }, + // map arguments + value); } template void fill(distributed_sequential_tag&& policy, ForwardIt first, ForwardIt last, const T& value) { using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - rt::executeAt(locality, - [](const std::tuple& args) { - auto begin = std::get<0>(args); - auto end = std::get<1>(args); - auto value = std::get<2>(args); - - auto local_range = itr_traits::local_range(begin, end); - std::fill(local_range.begin(), local_range.end(), value); - }, - std::make_tuple(first, last, value)); - } + distributed_folding_map_void( + // range + first, last, + // kernel + [](ForwardIt first, ForwardIt last, const T& value) { + // local processing + auto lrange = itr_traits::local_range(first, last); + std::fill(lrange.begin(), lrange.end(), value); + }, + // map arguments + value); } template @@ -149,167 +148,143 @@ ForwardIt2 transform(distributed_sequential_tag&& policy, ForwardIt1 first1, template void generate(distributed_parallel_tag&& policy, ForwardIt first, ForwardIt last, Generator generator) { - using T = typename ForwardIt::value_type; using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - - rt::Handle H; - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - rt::asyncExecuteAt( - H, locality, - [](rt::Handle&, - const std::tuple& args) { - auto begin = std::get<0>(args); - auto end = std::get<1>(args); - auto generator = std::get<2>(args); - auto local_range = itr_traits::local_range(begin, end); - auto lbegin = local_range.begin(); - auto lend = local_range.end(); - - // call the generator to align with the offset - auto it = itr_traits::iterator_from_local(begin, end, lbegin); - for (auto calls = std::distance(begin, it); calls; --calls) - generator(); - - std::generate(lbegin, lend, generator); - }, - std::make_tuple(first, last, generator)); - } - rt::waitForCompletion(H); + // distributed map + distributed_map_void( + // range + first, last, + // kernel + [](ForwardIt first, ForwardIt last, Generator generator) { + using local_iterator_t = typename itr_traits::local_iterator_type; + + // local map + auto lrange = itr_traits::local_range(first, last); + + local_map_void( + // range + lrange.begin(), lrange.end(), + // kernel + [&](local_iterator_t b, local_iterator_t e) { + std::generate(b, e, generator); + }); + }, + // map arguments + generator); } template void generate(distributed_sequential_tag&& policy, ForwardIt first, ForwardIt last, Generator generator) { - using T = typename ForwardIt::value_type; using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - rt::executeAt(locality, - [](const std::tuple& args) { - auto begin = std::get<0>(args); - auto end = std::get<1>(args); - auto generator = std::get<2>(args); - auto local_range = itr_traits::local_range(begin, end); - auto lbegin = local_range.begin(); - auto lend = local_range.end(); - - // call the generator to align with the offset - auto it = - itr_traits::iterator_from_local(begin, end, lbegin); - for (auto calls = std::distance(begin, it); calls; --calls) - generator(); - - std::generate(lbegin, lend, generator); - }, - std::make_tuple(first, last, generator)); - } + distributed_folding_map_void( + // range + first, last, + // kernel + [](ForwardIt first, ForwardIt last, Generator generator) { + // local processing + auto lrange = itr_traits::local_range(first, last); + std::generate(lrange.begin(), lrange.end(), generator); + }, + // map arguments + generator); } template void replace(distributed_parallel_tag&& policy, ForwardIt first, ForwardIt last, const T& old_value, const T& new_value) { using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - rt::Handle H; - - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - rt::asyncExecuteAt( - H, locality, - [](rt::Handle&, const std::tuple& args) { - auto begin = std::get<0>(args); - auto end = std::get<1>(args); - auto old_value = std::get<2>(args); - auto new_value = std::get<3>(args); - - auto local_range = itr_traits::local_range(begin, end); - std::replace(local_range.begin(), local_range.end(), old_value, - new_value); - }, - std::make_tuple(first, last, old_value, new_value)); - } - - rt::waitForCompletion(H); + // distributed map + distributed_map_void( + // range + first, last, + // kernel + [](ForwardIt first, ForwardIt last, const T& old_value, + const T& new_value) { + using local_iterator_t = typename itr_traits::local_iterator_type; + + // local map + auto lrange = itr_traits::local_range(first, last); + + local_map_void( + // range + lrange.begin(), lrange.end(), + // kernel + [&](local_iterator_t b, local_iterator_t e) { + std::replace(b, e, old_value, new_value); + }); + }, + // map arguments + old_value, new_value); } template void replace(distributed_sequential_tag&& policy, ForwardIt first, ForwardIt last, const T& old_value, const T& new_value) { using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - rt::executeAt(locality, - [](const std::tuple& args) { - auto begin = std::get<0>(args); - auto end = std::get<1>(args); - auto old_value = std::get<2>(args); - auto new_value = std::get<3>(args); - - auto local_range = itr_traits::local_range(begin, end); - std::replace(local_range.begin(), local_range.end(), - old_value, new_value); - }, - std::make_tuple(first, last, old_value, new_value)); - } + distributed_folding_map_void( + // range + first, last, + // kernel + [](ForwardIt first, ForwardIt last, const T& old_value, + const T& new_value) { + // local processing + auto lrange = itr_traits::local_range(first, last); + std::replace(lrange.begin(), lrange.end(), old_value, new_value); + }, + // map arguments + old_value, new_value); } template void replace_if(distributed_parallel_tag&& policy, ForwardIt first, ForwardIt last, UnaryPredicate p, const T& new_value) { using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - - rt::Handle H; - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - rt::asyncExecuteAt( - H, locality, - [](rt::Handle&, - const std::tuple& args) { - auto begin = std::get<0>(args); - auto end = std::get<1>(args); - auto p = std::get<2>(args); - auto new_value = std::get<3>(args); - - auto local_range = itr_traits::local_range(begin, end); - std::replace_if(local_range.begin(), local_range.end(), p, new_value); - }, - std::make_tuple(first, last, p, new_value)); - } - - rt::waitForCompletion(H); + // distributed map + distributed_map_void( + // range + first, last, + // kernel + [](ForwardIt first, ForwardIt last, UnaryPredicate p, + const T& new_value) { + using local_iterator_t = typename itr_traits::local_iterator_type; + + // local map + auto lrange = itr_traits::local_range(first, last); + + local_map_void( + // range + lrange.begin(), lrange.end(), + // kernel + [&](local_iterator_t b, local_iterator_t e) { + std::replace_if(b, e, p, new_value); + }); + }, + // map arguments + p, new_value); } template void replace_if(distributed_sequential_tag&& policy, ForwardIt first, ForwardIt last, UnaryPredicate p, const T& new_value) { using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - rt::executeAt( - locality, - [](const std::tuple& args) { - auto begin = std::get<0>(args); - auto end = std::get<1>(args); - auto p = std::get<2>(args); - auto new_value = std::get<3>(args); - - auto local_range = itr_traits::local_range(begin, end); - std::replace_if(local_range.begin(), local_range.end(), p, new_value); - }, - std::make_tuple(first, last, p, new_value)); - } + distributed_folding_map_void( + // range + first, last, + // kernel + [](ForwardIt first, ForwardIt last, UnaryPredicate p, + const T& new_value) { + // local processing + auto lrange = itr_traits::local_range(first, last); + std::replace_if(lrange.begin(), lrange.end(), p, new_value); + }, + // map arguments + p, new_value); } } // namespace impl diff --git a/include/shad/core/impl/non_modifyng_sequence_ops.h b/include/shad/core/impl/non_modifyng_sequence_ops.h index 3b929d76..1d6d1b5e 100644 --- a/include/shad/core/impl/non_modifyng_sequence_ops.h +++ b/include/shad/core/impl/non_modifyng_sequence_ops.h @@ -26,10 +26,10 @@ #define INCLUDE_SHAD_CORE_IMPL_NON_MODIFYING_SEQUENCE_OPS_H #include +#include #include #include #include -#include #include "shad/core/execution.h" #include "shad/distributed_iterator_traits.h" @@ -86,9 +86,8 @@ bool all_of(distributed_parallel_tag&& policy, ForwardItr first, // range lrange.begin(), lrange.end(), // kernel - [&](const local_iterator_t &b, const local_iterator_t &e) -> uint8_t { - return std::all_of(b, e, p); - }); + [&](const local_iterator_t& b, const local_iterator_t& e) + -> uint8_t { return std::all_of(b, e, p); }); // local reduce return std::all_of(map_res.begin(), map_res.end(), @@ -377,17 +376,15 @@ void for_each(distributed_sequential_tag&& policy, ForwardItr first, ForwardItr last, UnaryPredicate p) { using itr_traits = distributed_iterator_traits; - return distributed_folding_map( + distributed_folding_map_void( // range first, last, // kernel - [](ForwardItr first, ForwardItr last, std::nullptr_t, UnaryPredicate p) { + [](ForwardItr first, ForwardItr last, UnaryPredicate p) { // local processing auto lrange = itr_traits::local_range(first, last); - auto local_res = std::for_each(lrange.begin(), lrange.end(), p); + std::for_each(lrange.begin(), lrange.end(), p); }, - // initial solution - nullptr, // map arguments p); } @@ -396,10 +393,9 @@ template void for_each(distributed_parallel_tag&& policy, ForwardItr first, ForwardItr last, UnaryPredicate p) { using itr_traits = distributed_iterator_traits; - using value_t = typename itr_traits::value_type; // distributed map - auto map_res = distributed_map( + distributed_map_void( // range first, last, // kernel @@ -409,7 +405,7 @@ void for_each(distributed_parallel_tag&& policy, ForwardItr first, // local map auto lrange = itr_traits::local_range(first, last); - local_map( + local_map_void( // range lrange.begin(), lrange.end(), // kernel From f7a6dbb3c7d7bed32bc5aca7512e0c9f508c559b Mon Sep 17 00:00:00 2001 From: Maurizio Drocco Date: Tue, 5 Feb 2019 14:46:57 -0800 Subject: [PATCH 14/19] [pnnl/SHAD#152] pattern-based single-range numeric ops --- include/shad/core/impl/numeric_ops.h | 741 +++++++++++++-------------- include/shad/core/numeric.h | 413 ++++++++------- 2 files changed, 564 insertions(+), 590 deletions(-) diff --git a/include/shad/core/impl/numeric_ops.h b/include/shad/core/impl/numeric_ops.h index 85ff76aa..25e1dd7c 100644 --- a/include/shad/core/impl/numeric_ops.h +++ b/include/shad/core/impl/numeric_ops.h @@ -28,67 +28,60 @@ #include #include #include + #include "shad/core/execution.h" #include "shad/distributed_iterator_traits.h" #include "shad/runtime/runtime.h" +#include "impl_patterns.h" + namespace shad { namespace impl { template void iota(ForwardIterator first, ForwardIterator last, const T& value) { using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - size_t next_value = value; - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - rt::executeAtWithRet( - locality, - [](const std::tuple& args, - size_t* result) { - auto begin = std::get<0>(args); - auto end = std::get<1>(args); - auto value = std::get<2>(args); - auto local_range = itr_traits::local_range(begin, end); - auto lbeg = local_range.begin(); - while(lbeg != local_range.end()) { - *lbeg++ = value; - ++value; - } - *result = value; - }, - std::make_tuple(first, last, next_value), &next_value); - } + distributed_folding_map( + // range + first, last, + // kernel + [](ForwardIterator first, ForwardIterator last, T res) { + // local processing + auto lrange = itr_traits::local_range(first, last); + for (auto it = lrange.begin(); it != lrange.end(); ++it) { + *it = res++; + } + // update the partial solution + return res; + }, + // initial solution + value); } template -T accumulate(InputIt first, InputIt last, T init, - BinaryOperation op) { +T accumulate(InputIt first, InputIt last, T init, BinaryOperation op) { using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - rt::executeAtWithRet( - locality, - [](const std::tuple& args, - T* result) { - auto begin = std::get<0>(args); - auto end = std::get<1>(args); - auto init = std::get<2>(args); - auto op = std::get<3>(args); - auto local_range = itr_traits::local_range(begin, end); - *result = std::accumulate(local_range.begin(), - local_range.end(), init, op); - }, - std::make_tuple(first, last, init, op), &init); - } - return init; + + return distributed_folding_map( + // range + first, last, + // kernel + [](InputIt first, InputIt last, T res, BinaryOperation op) { + // local processing + auto lrange = itr_traits::local_range(first, last); + res = std::accumulate(lrange.begin(), lrange.end(), res, op); + // update the partial solution + return res; + }, + // initial solution + init, + // map arguments + op); } -template< class InputIt1, class InputIt2, class T > -T inner_product(InputIt1 first1, InputIt1 last1, - InputIt2 first2, T init) { +template +T inner_product(InputIt1 first1, InputIt1 last1, InputIt2 first2, T init) { using itr_traits = distributed_iterator_traits; auto localities = itr_traits::localities(first1, last1); auto args = std::make_pair(first2, init); @@ -96,13 +89,12 @@ T inner_product(InputIt1 first1, InputIt1 last1, locality != end; ++locality) { rt::executeAtWithRet( locality, - [](const std::tuple>& args, + [](const std::tuple>& args, std::pair* result) { auto first2 = std::get<2>(args).first; auto init = std::get<2>(args).second; - auto local_range = itr_traits::local_range(std::get<0>(args), - std::get<1>(args)); + auto local_range = + itr_traits::local_range(std::get<0>(args), std::get<1>(args)); auto begin = local_range.begin(); auto end = local_range.end(); while (begin != end) { @@ -117,11 +109,9 @@ T inner_product(InputIt1 first1, InputIt1 last1, return args.second; } - -template< class InputIt1, class InputIt2, class T, - class BinaryOperation1, class BinaryOperation2> -T inner_product(InputIt1 first1, InputIt1 last1, - InputIt2 first2, T init, +template +T inner_product(InputIt1 first1, InputIt1 last1, InputIt2 first2, T init, BinaryOperation1 op1, BinaryOperation2 op2) { using itr_traits = distributed_iterator_traits; auto localities = itr_traits::localities(first1, last1); @@ -130,16 +120,15 @@ T inner_product(InputIt1 first1, InputIt1 last1, locality != end; ++locality) { rt::executeAtWithRet( locality, - [](const std::tuple, + [](const std::tuple, BinaryOperation1, BinaryOperation2>& args, std::pair* result) { auto first2 = std::get<2>(args).first; auto init = std::get<2>(args).second; auto op1 = std::get<3>(args); auto op2 = std::get<4>(args); - auto local_range = itr_traits::local_range(std::get<0>(args), - std::get<1>(args)); + auto local_range = + itr_traits::local_range(std::get<0>(args), std::get<1>(args)); auto begin = local_range.begin(); auto end = local_range.end(); while (begin != end) { @@ -155,25 +144,25 @@ T inner_product(InputIt1 first1, InputIt1 last1, } template -OutputIt adjacent_difference(distributed_sequential_tag&&, - InputIt first, InputIt last, - OutputIt d_first, BinaryOperation op) { +OutputIt adjacent_difference(distributed_sequential_tag&&, InputIt first, + InputIt last, OutputIt d_first, + BinaryOperation op) { using itr_traits = distributed_iterator_traits; auto localities = itr_traits::localities(first, last); using value_t = typename itr_traits::value_type; auto startingLoc = localities.begin(); value_t acc; auto res = std::make_pair(d_first, acc); - for (auto locality = startingLoc, end = localities.end(); - locality != end; ++locality) { + for (auto locality = startingLoc, end = localities.end(); locality != end; + ++locality) { rt::executeAtWithRet( locality, - [](const std::tuple& args, + [](const std::tuple& args, std::pair* result) { auto d_first = std::get<2>(args); - auto local_range = itr_traits::local_range(std::get<0>(args), - std::get<1>(args)); + auto local_range = + itr_traits::local_range(std::get<0>(args), std::get<1>(args)); auto begin = local_range.begin(); auto end = local_range.end(); if (begin == end) { @@ -186,24 +175,24 @@ OutputIt adjacent_difference(distributed_sequential_tag&&, *d_first = acc; } else { *d_first = op(acc, std::get<4>(args)); - } + } while (++begin != end) { - value_t val = *begin; - *++d_first = val - std::move(acc); - acc = std::move(val); + value_t val = *begin; + *++d_first = val - std::move(acc); + acc = std::move(val); } *result = std::make_pair(++d_first, acc); }, std::make_tuple(first, last, res.first, startingLoc, res.second, op), - &res); + &res); } return d_first; } template -OutputIt adjacent_difference(distributed_parallel_tag&& policy, - InputIt first, InputIt last, - OutputIt d_first, BinaryOperation op) { +OutputIt adjacent_difference(distributed_parallel_tag&& policy, InputIt first, + InputIt last, OutputIt d_first, + BinaryOperation op) { if (first == last) return d_first; using itr_traits = distributed_iterator_traits; auto localities = itr_traits::localities(first, last); @@ -211,17 +200,17 @@ OutputIt adjacent_difference(distributed_parallel_tag&& policy, auto startingLoc = localities.begin(); value_t acc; uint32_t numLoc = localities.size(); - std::vectorres(numLoc); + std::vector res(numLoc); rt::Handle h; size_t i = 0; - for (auto locality = startingLoc, end = localities.end(); - locality != end; ++locality, ++i) { + for (auto locality = startingLoc, end = localities.end(); locality != end; + ++locality, ++i) { rt::asyncExecuteAtWithRet( h, locality, - [](rt::Handle&, const std::tuple& args, + [](rt::Handle&, + const std::tuple& args, OutputIt* result) { - auto gbegin = std::get<0>(args); auto gend = std::get<1>(args); auto local_range = itr_traits::local_range(gbegin, gend); @@ -242,41 +231,39 @@ OutputIt adjacent_difference(distributed_parallel_tag&& policy, std::advance(d_first, (std::distance(gbegin, it))); value_t val = *d_first; *d_first = op(*begin, *it); - } + } while (++begin != end) { - value_t val = *begin; - *++d_first = op(val, std::move(acc)); - acc = std::move(val); + value_t val = *begin; + *++d_first = op(val, std::move(acc)); + acc = std::move(val); } *result = ++d_first; }, - std::make_tuple(first, last, d_first, startingLoc, op), - &res[i]); + std::make_tuple(first, last, d_first, startingLoc, op), &res[i]); } rt::waitForCompletion(h); - return res[numLoc-1]; + return res[numLoc - 1]; } - template -OutputIt partial_sum(InputIt first, InputIt last, - OutputIt d_first, BinaryOperation op) { +OutputIt partial_sum(InputIt first, InputIt last, OutputIt d_first, + BinaryOperation op) { using itr_traits = distributed_iterator_traits; auto localities = itr_traits::localities(first, last); using value_t = typename itr_traits::value_type; auto startingLoc = localities.begin(); value_t acc; auto res = std::make_pair(d_first, acc); - for (auto locality = startingLoc, end = localities.end(); - locality != end; ++locality) { + for (auto locality = startingLoc, end = localities.end(); locality != end; + ++locality) { rt::executeAtWithRet( locality, - [](const std::tuple& args, + [](const std::tuple& args, std::pair* result) { auto d_first = std::get<2>(args); - auto local_range = itr_traits::local_range(std::get<0>(args), - std::get<1>(args)); + auto local_range = + itr_traits::local_range(std::get<0>(args), std::get<1>(args)); auto begin = local_range.begin(); auto end = local_range.end(); if (begin == end) { @@ -296,94 +283,74 @@ OutputIt partial_sum(InputIt first, InputIt last, *result = std::make_pair(++d_first, acc); }, std::make_tuple(first, last, res.first, startingLoc, res.second, op), - &res); + &res); } return d_first; } template -T reduce(distributed_sequential_tag&& policy, - InputIt first, InputIt last, T init, - BinaryOperation op) { - using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - rt::executeAtWithRet( - locality, - [](const std::tuple& args, - T* result) { - auto local_range = itr_traits::local_range(std::get<0>(args), - std::get<1>(args)); - auto begin = local_range.begin(); - auto end = local_range.end(); - auto init = std::get<2>(args); - auto op = std::get<3>(args); - for (; begin != end; ++begin) { - init = op(std::move(init), *begin); - } - *result = init; - }, - std::make_tuple(first, last, init, op), &init); - } - return init; +T reduce(distributed_sequential_tag&& policy, InputIt first, InputIt last, + T init, BinaryOperation op) { + return impl::accumulate(first, last, init, op); } template -T reduce(distributed_parallel_tag&& policy, - InputIt first, InputIt last, T init, +T reduce(distributed_parallel_tag&& policy, InputIt first, InputIt last, T init, BinaryOperation op) { using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - rt::Handle h; - std::vector results(localities.size()); - size_t i = 0; - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality, ++i) { - rt::asyncExecuteAtWithRet( - h, locality, - [](rt::Handle &h, - const std::tuple& args, - T* result) { - auto local_range = itr_traits::local_range(std::get<0>(args), - std::get<1>(args)); - auto begin = local_range.begin(); - auto end = local_range.end(); - auto op = std::get<2>(args); - T acc = *begin; - while (++begin != end) { - acc = op(std::move(acc), *begin); - } - *result = acc; - }, - std::make_tuple(first, last, op), &results[i]); - } - rt::waitForCompletion(h); - for (auto lval : results) { - init = op(std::move(init), lval); - } - return init; + + // distributed map + auto map_res = distributed_map( + // range + first, last, + // kernel + [](InputIt first, InputIt last, BinaryOperation op) { + using local_iterator_t = typename itr_traits::local_iterator_type; + + // local map + auto lrange = itr_traits::local_range(first, last); + auto map_res = local_map( + // range + lrange.begin(), lrange.end(), + // kernel + [&](local_iterator_t b, local_iterator_t e) { + auto res = *b; + while (++b != e) res = op(std::move(res), *b); + return res; + }); + + // local reduce + auto b = map_res.begin(); + auto res = *b; + while (++b != map_res.end()) res = op(std::move(res), *b); + return res; + }, + // map arguments + op); + + // TODO parallel + // reduce + return std::accumulate(map_res.begin(), map_res.end(), init, op); } -template -OutputIt exclusive_scan(distributed_sequential_tag&& policy, - InputIt first, InputIt last, OutputIt d_first, - BinaryOperation op, T init) { +template +OutputIt exclusive_scan(distributed_sequential_tag&& policy, InputIt first, + InputIt last, OutputIt d_first, BinaryOperation op, + T init) { using itr_traits = distributed_iterator_traits; auto localities = itr_traits::localities(first, last); auto startingLoc = localities.begin(); auto res = std::make_pair(d_first, init); - for (auto locality = startingLoc, end = localities.end(); - locality != end; ++locality) { + for (auto locality = startingLoc, end = localities.end(); locality != end; + ++locality) { rt::executeAtWithRet( locality, - [](const std::tuple& args, + [](const std::tuple& + args, std::pair* result) { auto d_first = std::get<2>(args); - auto local_range = itr_traits::local_range(std::get<0>(args), - std::get<1>(args)); + auto local_range = + itr_traits::local_range(std::get<0>(args), std::get<1>(args)); auto begin = local_range.begin(); auto end = local_range.end(); if (begin == end) { @@ -405,11 +372,10 @@ OutputIt exclusive_scan(distributed_sequential_tag&& policy, return d_first; } -template -OutputIt exclusive_scan(distributed_parallel_tag&& policy, - InputIt first, InputIt last, OutputIt d_first, - BinaryOperation op, T init) { +template +OutputIt exclusive_scan(distributed_parallel_tag&& policy, InputIt first, + InputIt last, OutputIt d_first, BinaryOperation op, + T init) { using itr_traits = distributed_iterator_traits; auto localities = itr_traits::localities(first, last); auto startingLoc = localities.begin(); @@ -417,12 +383,12 @@ OutputIt exclusive_scan(distributed_parallel_tag&& policy, std::vector> res(numLoc); rt::Handle h; size_t i = 0; - for (auto locality = startingLoc, end = localities.end(); - locality != end; ++locality, ++i) { + for (auto locality = startingLoc, end = localities.end(); locality != end; + ++locality, ++i) { rt::asyncExecuteAtWithRet( h, locality, - [](rt::Handle&, const std::tuple& args, + [](rt::Handle&, + const std::tuple& args, std::pair* result) { auto d_first = std::get<2>(args); auto df = d_first; @@ -454,7 +420,7 @@ OutputIt exclusive_scan(distributed_parallel_tag&& policy, auto acc = init; OutputIt chunk_end = d_first; using outitr_traits = distributed_iterator_traits; - for (i=0; i& args) { + const std::tuple& args) { auto gbegin = std::get<0>(args); auto gend = std::get<1>(args); auto local_range = outitr_traits::local_range(std::get<0>(args), @@ -475,7 +440,7 @@ OutputIt exclusive_scan(distributed_parallel_tag&& policy, BinaryOperation op = std::get<2>(args); auto acc = std::get<3>(args); *begin = acc; - for (++begin; begin!= end; ++begin) { + for (++begin; begin != end; ++begin) { *begin = op(std::move(acc), *begin); } }, @@ -489,25 +454,24 @@ OutputIt exclusive_scan(distributed_parallel_tag&& policy, } template -OutputIt inclusive_scan(distributed_sequential_tag&& policy, - InputIt first, InputIt last, OutputIt d_first, - BinaryOperation op) { +OutputIt inclusive_scan(distributed_sequential_tag&& policy, InputIt first, + InputIt last, OutputIt d_first, BinaryOperation op) { using itr_traits = distributed_iterator_traits; auto localities = itr_traits::localities(first, last); auto startingLoc = localities.begin(); using value_t = typename itr_traits::value_type; value_t acc; auto res = std::make_pair(d_first, acc); - for (auto locality = startingLoc, end = localities.end(); - locality != end; ++locality) { + for (auto locality = startingLoc, end = localities.end(); locality != end; + ++locality) { rt::executeAtWithRet( locality, - [](const std::tuple& args, + [](const std::tuple& args, std::pair* result) { auto d_first = std::get<2>(args); - auto local_range = itr_traits::local_range(std::get<0>(args), - std::get<1>(args)); + auto local_range = + itr_traits::local_range(std::get<0>(args), std::get<1>(args)); auto begin = local_range.begin(); auto end = local_range.end(); if (begin == end) { @@ -529,16 +493,14 @@ OutputIt inclusive_scan(distributed_sequential_tag&& policy, *result = std::make_pair(++d_first, acc); }, std::make_tuple(first, last, res.first, startingLoc, res.second, op), - &res); + &res); } return d_first; } -template -OutputIt inclusive_scan(distributed_parallel_tag&& policy, - InputIt first, InputIt last, OutputIt d_first, - BinaryOperation op) { +template +OutputIt inclusive_scan(distributed_parallel_tag&& policy, InputIt first, + InputIt last, OutputIt d_first, BinaryOperation op) { using itr_traits = distributed_iterator_traits; using value_t = typename itr_traits::value_type; auto localities = itr_traits::localities(first, last); @@ -550,12 +512,12 @@ OutputIt inclusive_scan(distributed_parallel_tag&& policy, std::vector> res(numLoc); rt::Handle h; size_t i = 0; - for (auto locality = startingLoc, end = localities.end(); - locality != end; ++locality, ++i) { + for (auto locality = startingLoc, end = localities.end(); locality != end; + ++locality, ++i) { rt::asyncExecuteAtWithRet( h, locality, - [](rt::Handle&, const std::tuple& args, + [](rt::Handle&, + const std::tuple& args, std::pair* result) { auto d_first = std::get<2>(args); auto df = d_first; @@ -586,7 +548,7 @@ OutputIt inclusive_scan(distributed_parallel_tag&& policy, auto d_f = res[0].first; value_t acc = res[0].second; OutputIt chunk_end = d_first; - for (i=1; i& args) { + [](rt::Handle&, const std::tuple& args) { auto gbegin = std::get<0>(args); auto gend = std::get<1>(args); - auto local_range = itr_traits::local_range(std::get<0>(args), - std::get<1>(args)); + auto local_range = + itr_traits::local_range(std::get<0>(args), std::get<1>(args)); auto begin = local_range.begin(); auto end = local_range.end(); BinaryOperation op = std::get<2>(args); auto acc = std::get<3>(args); - for (auto it = begin; it!= end; ++it) { + for (auto it = begin; it != end; ++it) { *it = op(*it, std::move(acc)); } }, @@ -617,25 +579,24 @@ OutputIt inclusive_scan(distributed_parallel_tag&& policy, return chunk_end; } -template -OutputIt inclusive_scan(distributed_sequential_tag&& policy, - InputIt first, InputIt last, OutputIt d_first, - BinaryOperation op, T init) { +template +OutputIt inclusive_scan(distributed_sequential_tag&& policy, InputIt first, + InputIt last, OutputIt d_first, BinaryOperation op, + T init) { using itr_traits = distributed_iterator_traits; auto localities = itr_traits::localities(first, last); auto startingLoc = localities.begin(); auto res = std::make_pair(d_first, init); - for (auto locality = startingLoc, end = localities.end(); - locality != end; ++locality) { + for (auto locality = startingLoc, end = localities.end(); locality != end; + ++locality) { rt::executeAtWithRet( locality, - [](const std::tuple& args, + [](const std::tuple& + args, std::pair* result) { auto d_first = std::get<2>(args); - auto local_range = itr_traits::local_range(std::get<0>(args), - std::get<1>(args)); + auto local_range = + itr_traits::local_range(std::get<0>(args), std::get<1>(args)); auto begin = local_range.begin(); auto end = local_range.end(); if (begin == end) { @@ -644,24 +605,22 @@ OutputIt inclusive_scan(distributed_sequential_tag&& policy, } BinaryOperation op = std::get<4>(args); T acc = op(std::get<3>(args), *begin); - *d_first = acc; + *d_first = acc; while (++begin != end) { acc = op(std::move(acc), *begin); *++d_first = acc; } *result = std::make_pair(++d_first, acc); }, - std::make_tuple(first, last, res.first, res.second, op), - &res); + std::make_tuple(first, last, res.first, res.second, op), &res); } return d_first; } -template -OutputIt inclusive_scan(distributed_parallel_tag&& policy, - InputIt first, InputIt last, OutputIt d_first, - BinaryOperation op, T init) { +template +OutputIt inclusive_scan(distributed_parallel_tag&& policy, InputIt first, + InputIt last, OutputIt d_first, BinaryOperation op, + T init) { using itr_traits = distributed_iterator_traits; auto localities = itr_traits::localities(first, last); auto startingLoc = localities.begin(); @@ -669,12 +628,12 @@ OutputIt inclusive_scan(distributed_parallel_tag&& policy, std::vector> res(numLoc); rt::Handle h; size_t i = 0; - for (auto locality = startingLoc, end = localities.end(); - locality != end; ++locality, ++i) { + for (auto locality = startingLoc, end = localities.end(); locality != end; + ++locality, ++i) { rt::asyncExecuteAtWithRet( h, locality, - [](rt::Handle&, const std::tuple& args, + [](rt::Handle&, + const std::tuple& args, std::pair* result) { auto d_first = std::get<2>(args); auto df = d_first; @@ -706,7 +665,7 @@ OutputIt inclusive_scan(distributed_parallel_tag&& policy, auto acc = init; OutputIt chunk_end = d_first; using outitr_traits = distributed_iterator_traits; - for (i=0; i& args) { + [](rt::Handle&, + const std::tuple& args) { auto gbegin = std::get<0>(args); auto gend = std::get<1>(args); - auto local_range = itr_traits::local_range(std::get<0>(args), - std::get<1>(args)); + auto local_range = + itr_traits::local_range(std::get<0>(args), std::get<1>(args)); auto begin = local_range.begin(); auto end = local_range.end(); BinaryOperation op = std::get<2>(args); auto acc = std::get<3>(args); - for (auto it = begin; it!= end; ++it) { + for (auto it = begin; it != end; ++it) { *it = op(std::move(acc), *it); } }, @@ -737,78 +696,84 @@ OutputIt inclusive_scan(distributed_parallel_tag&& policy, return chunk_end; } +//////////////////////////////////////////////////////////////////////////////// +// +// transform_reduce +// +//////////////////////////////////////////////////////////////////////////////// +// single range - sequential template -T transform_reduce(distributed_sequential_tag&& policy, - ForwardIt first, ForwardIt last, T init, - BinaryOp op, UnaryOp uop) { +T transform_reduce(distributed_sequential_tag&& policy, ForwardIt first, + ForwardIt last, T init, BinaryOp op, UnaryOp uop) { using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality) { - rt::executeAtWithRet( - locality, - [](const std::tuple& args, - T* result) { - auto local_range = itr_traits::local_range(std::get<0>(args), - std::get<1>(args)); - auto begin = local_range.begin(); - auto end = local_range.end(); - auto init = std::get<2>(args); - auto op = std::get<3>(args); - auto uop = std::get<4>(args); - for (; begin != end; ++begin) { - init = op(std::move(init), uop(*begin)); - } - *result = init; - }, - std::make_tuple(first, last, init, op, uop), &init); - } - return init; + + return distributed_folding_map( + // range + first, last, + // kernel + [](ForwardIt first, ForwardIt last, T res, BinaryOp op, UnaryOp uop) { + // local processing + auto lrange = itr_traits::local_range(first, last); + for (auto b = lrange.begin(); b != lrange.end(); ++b) { + res = op(std::move(res), uop(*b)); + } + // update the partial solution + return res; + }, + // initial solution + init, + // map arguments + op, uop); + + // } +// single range - parallel template -T transform_reduce(distributed_parallel_tag&& policy, - ForwardIt first, ForwardIt last, T init, - BinaryOp op, UnaryOp uop) { +T transform_reduce(distributed_parallel_tag&& policy, ForwardIt first, + ForwardIt last, T init, BinaryOp op, UnaryOp uop) { using itr_traits = distributed_iterator_traits; - auto localities = itr_traits::localities(first, last); - rt::Handle h; - std::vector results(localities.size()); - size_t i = 0; - for (auto locality = localities.begin(), end = localities.end(); - locality != end; ++locality, ++i) { - rt::asyncExecuteAtWithRet( - h, locality, - [](rt::Handle &h, - const std::tuple& args, - T* result) { - auto local_range = itr_traits::local_range(std::get<0>(args), - std::get<1>(args)); - auto begin = local_range.begin(); - auto end = local_range.end(); - auto op = std::get<2>(args); - auto uop = std::get<3>(args); - T acc = uop(*begin); - while (++begin != end) { - acc = op(std::move(acc), uop(*begin)); - } - *result = acc; - }, - std::make_tuple(first, last, op, uop), &results[i]); - } - rt::waitForCompletion(h); - for (auto lval : results) { - init = op(std::move(init), lval); - } - return init; + + // distributed map + auto map_res = distributed_map( + // range + first, last, + // kernel + [](ForwardIt first, ForwardIt last, BinaryOp op, UnaryOp uop) { + using local_iterator_t = typename itr_traits::local_iterator_type; + + // local map + auto lrange = itr_traits::local_range(first, last); + auto map_res = local_map( + // range + lrange.begin(), lrange.end(), + // kernel + [&](local_iterator_t b, local_iterator_t e) { + auto res = *b; + while (++b != e) res = op(std::move(res), uop(*b)); + return res; + }); + + // local reduce + auto b = map_res.begin(); + auto res = *b; + while (++b != map_res.end()) res = op(std::move(res), *b); + return res; + }, + // map arguments + op, uop); + + // TODO parallel + // reduce + return std::accumulate(map_res.begin(), map_res.end(), init, op); } -template -T transform_reduce(distributed_sequential_tag&& policy, - ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, - T init, BinaryOp1 op1, BinaryOp2 op2) { +// two ranges - sequential +template +T transform_reduce(distributed_sequential_tag&& policy, ForwardIt1 first1, + ForwardIt1 last1, ForwardIt2 first2, T init, BinaryOp1 op1, + BinaryOp2 op2) { using itr_traits = distributed_iterator_traits; auto localities = itr_traits::localities(first1, last1); std::pair res = std::make_pair(first2, init); @@ -816,11 +781,11 @@ T transform_reduce(distributed_sequential_tag&& policy, locality != end; ++locality) { rt::executeAtWithRet( locality, - [](const std::tuple& args, + [](const std::tuple& args, std::pair* result) { - auto local_range = itr_traits::local_range(std::get<0>(args), - std::get<1>(args)); + auto local_range = + itr_traits::local_range(std::get<0>(args), std::get<1>(args)); auto begin = local_range.begin(); auto end = local_range.end(); auto first2 = std::get<2>(args); @@ -829,7 +794,7 @@ T transform_reduce(distributed_sequential_tag&& policy, auto op2 = std::get<5>(args); for (; begin != end; ++begin, ++first2) { init = op(std::move(init), op2(*begin, *first2)); - } + } *result = std::make_pair(first2, init); }, std::make_tuple(first1, last1, res.first, res.second, op1, op2), &res); @@ -837,11 +802,12 @@ T transform_reduce(distributed_sequential_tag&& policy, return res.second; } -template -T transform_reduce(distributed_parallel_tag&& policy, - ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, - T init, BinaryOp1 op1, BinaryOp2 op2) { +// two ranges - parallel +template +T transform_reduce(distributed_parallel_tag&& policy, ForwardIt1 first1, + ForwardIt1 last1, ForwardIt2 first2, T init, BinaryOp1 op1, + BinaryOp2 op2) { using itr_traits = distributed_iterator_traits; auto localities = itr_traits::localities(first1, last1); rt::Handle h; @@ -851,9 +817,9 @@ T transform_reduce(distributed_parallel_tag&& policy, locality != end; ++locality, ++i) { rt::asyncExecuteAtWithRet( h, locality, - [](rt::Handle &h, - const std::tuple& args, + [](rt::Handle& h, + const std::tuple& args, T* result) { auto gbegin = std::get<0>(args); auto gend = std::get<1>(args); @@ -881,27 +847,26 @@ T transform_reduce(distributed_parallel_tag&& policy, return init; } -template +template OutputIt transform_exclusive_scan(distributed_sequential_tag&& policy, - InputIt first, InputIt last, - OutputIt d_first, T init, - BinaryOperation op, + InputIt first, InputIt last, OutputIt d_first, + T init, BinaryOperation op, UnaryOperation uop) { using itr_traits = distributed_iterator_traits; auto localities = itr_traits::localities(first, last); auto startingLoc = localities.begin(); auto res = std::make_pair(d_first, init); - for (auto locality = startingLoc, end = localities.end(); - locality != end; ++locality) { + for (auto locality = startingLoc, end = localities.end(); locality != end; + ++locality) { rt::executeAtWithRet( locality, - [](const std::tuple& args, + [](const std::tuple& args, std::pair* result) { auto d_first = std::get<2>(args); - auto local_range = itr_traits::local_range(std::get<0>(args), - std::get<1>(args)); + auto local_range = + itr_traits::local_range(std::get<0>(args), std::get<1>(args)); auto begin = local_range.begin(); auto end = local_range.end(); if (begin == end) { @@ -924,12 +889,11 @@ OutputIt transform_exclusive_scan(distributed_sequential_tag&& policy, return res.first; } -template +template OutputIt transform_exclusive_scan(distributed_parallel_tag&& policy, - InputIt first, InputIt last, - OutputIt d_first, T init, - BinaryOperation op, + InputIt first, InputIt last, OutputIt d_first, + T init, BinaryOperation op, UnaryOperation uop) { using itr_traits = distributed_iterator_traits; auto localities = itr_traits::localities(first, last); @@ -938,13 +902,13 @@ OutputIt transform_exclusive_scan(distributed_parallel_tag&& policy, std::vector> res(numLoc); rt::Handle h; size_t i = 0; - for (auto locality = startingLoc, end = localities.end(); - locality != end; ++locality, ++i) { + for (auto locality = startingLoc, end = localities.end(); locality != end; + ++locality, ++i) { rt::asyncExecuteAtWithRet( h, locality, [](rt::Handle&, - const std::tuple& args, + const std::tuple& args, std::pair* result) { auto d_first = std::get<2>(args); auto df = d_first; @@ -978,7 +942,7 @@ OutputIt transform_exclusive_scan(distributed_parallel_tag&& policy, auto acc = init; OutputIt chunk_end = d_first; using outitr_traits = distributed_iterator_traits; - for (i=0; i& args) { + const std::tuple& args) { auto gbegin = std::get<0>(args); auto gend = std::get<1>(args); - auto local_range = itr_traits::local_range(std::get<0>(args), - std::get<1>(args)); + auto local_range = + itr_traits::local_range(std::get<0>(args), std::get<1>(args)); auto begin = local_range.begin(); auto end = local_range.end(); if (begin == end) return; BinaryOperation op = std::get<2>(args); auto acc = std::get<3>(args); *begin = acc; - for (++begin; begin!= end; ++begin) { + for (++begin; begin != end; ++begin) { *begin = op(std::move(acc), *begin); } }, @@ -1012,11 +975,10 @@ OutputIt transform_exclusive_scan(distributed_parallel_tag&& policy, return chunk_end; } -template +template OutputIt transform_inclusive_scan(distributed_sequential_tag&& policy, - InputIt first, InputIt last, - OutputIt d_first, + InputIt first, InputIt last, OutputIt d_first, BinaryOperation op, UnaryOperation uop) { using itr_traits = distributed_iterator_traits; auto localities = itr_traits::localities(first, last); @@ -1024,17 +986,16 @@ OutputIt transform_inclusive_scan(distributed_sequential_tag&& policy, using value_t = typename itr_traits::value_type; value_t acc; auto res = std::make_pair(d_first, acc); - for (auto locality = startingLoc, end = localities.end(); - locality != end; ++locality) { + for (auto locality = startingLoc, end = localities.end(); locality != end; + ++locality) { rt::executeAtWithRet( locality, - [](const std::tuple& args, std::pair* result) { auto d_first = std::get<2>(args); - auto local_range = itr_traits::local_range(std::get<0>(args), - std::get<1>(args)); + auto local_range = + itr_traits::local_range(std::get<0>(args), std::get<1>(args)); auto begin = local_range.begin(); auto end = local_range.end(); if (begin == end) { @@ -1056,18 +1017,17 @@ OutputIt transform_inclusive_scan(distributed_sequential_tag&& policy, } *result = std::make_pair(++d_first, acc); }, - std::make_tuple(first, last, res.first, startingLoc, - res.second, op, uop), - &res); + std::make_tuple(first, last, res.first, startingLoc, res.second, op, + uop), + &res); } return res.first; } -template +template OutputIt transform_inclusive_scan(distributed_parallel_tag&& policy, - InputIt first, InputIt last, - OutputIt d_first, + InputIt first, InputIt last, OutputIt d_first, BinaryOperation op, UnaryOperation uop) { using itr_traits = distributed_iterator_traits; using value_t = typename itr_traits::value_type; @@ -1080,13 +1040,13 @@ OutputIt transform_inclusive_scan(distributed_parallel_tag&& policy, std::vector> res(numLoc); rt::Handle h; size_t i = 0; - for (auto locality = startingLoc, end = localities.end(); - locality != end; ++locality, ++i) { + for (auto locality = startingLoc, end = localities.end(); locality != end; + ++locality, ++i) { rt::asyncExecuteAtWithRet( h, locality, [](rt::Handle&, - const std::tuple& args, + const std::tuple& args, std::pair* result) { auto d_first = std::get<2>(args); auto df = d_first; @@ -1119,7 +1079,7 @@ OutputIt transform_inclusive_scan(distributed_parallel_tag&& policy, value_t acc = res[0].second; OutputIt chunk_end = d_first; using outitr_traits = distributed_iterator_traits; - for (i=1; i& args) { + [](rt::Handle&, const std::tuple& args) { auto gbegin = std::get<0>(args); auto gend = std::get<1>(args); - auto local_range = itr_traits::local_range(std::get<0>(args), - std::get<1>(args)); + auto local_range = + itr_traits::local_range(std::get<0>(args), std::get<1>(args)); auto begin = local_range.begin(); auto end = local_range.end(); BinaryOperation op = std::get<2>(args); auto acc = std::get<3>(args); - for (auto it = begin; it!= end; ++it) { + for (auto it = begin; it != end; ++it) { *it = op(std::move(acc), *it); } }, @@ -1150,27 +1110,26 @@ OutputIt transform_inclusive_scan(distributed_parallel_tag&& policy, return chunk_end; } -template +template OutputIt transform_inclusive_scan(distributed_sequential_tag&& policy, - InputIt first, InputIt last, - OutputIt d_first, - BinaryOperation op, - UnaryOperation uop, T init) { + InputIt first, InputIt last, OutputIt d_first, + BinaryOperation op, UnaryOperation uop, + T init) { using itr_traits = distributed_iterator_traits; auto localities = itr_traits::localities(first, last); auto startingLoc = localities.begin(); auto res = std::make_pair(d_first, init); - for (auto locality = startingLoc, end = localities.end(); - locality != end; ++locality) { + for (auto locality = startingLoc, end = localities.end(); locality != end; + ++locality) { rt::executeAtWithRet( locality, - [](const std::tuple& args, + [](const std::tuple& args, std::pair* result) { auto d_first = std::get<2>(args); - auto local_range = itr_traits::local_range(std::get<0>(args), - std::get<1>(args)); + auto local_range = + itr_traits::local_range(std::get<0>(args), std::get<1>(args)); auto begin = local_range.begin(); auto end = local_range.end(); if (begin == end) { @@ -1180,26 +1139,24 @@ OutputIt transform_inclusive_scan(distributed_sequential_tag&& policy, BinaryOperation op = std::get<4>(args); UnaryOperation uop = std::get<5>(args); T acc = op(std::get<3>(args), uop(*begin)); - *d_first = acc; + *d_first = acc; while (++begin != end) { acc = op(std::move(acc), uop(*begin)); *++d_first = acc; } *result = std::make_pair(++d_first, acc); }, - std::make_tuple(first, last, res.first, res.second, op, uop), - &res); + std::make_tuple(first, last, res.first, res.second, op, uop), &res); } return res.second; } -template +template OutputIt transform_inclusive_scan(distributed_parallel_tag&& policy, - InputIt first, InputIt last, - OutputIt d_first, - BinaryOperation op, - UnaryOperation uop, T init) { + InputIt first, InputIt last, OutputIt d_first, + BinaryOperation op, UnaryOperation uop, + T init) { using itr_traits = distributed_iterator_traits; auto localities = itr_traits::localities(first, last); auto startingLoc = localities.begin(); @@ -1210,13 +1167,13 @@ OutputIt transform_inclusive_scan(distributed_parallel_tag&& policy, std::vector> res(numLoc); rt::Handle h; size_t i = 0; - for (auto locality = startingLoc, end = localities.end(); - locality != end; ++locality, ++i) { + for (auto locality = startingLoc, end = localities.end(); locality != end; + ++locality, ++i) { rt::asyncExecuteAtWithRet( h, locality, [](rt::Handle&, - const std::tuple& args, + const std::tuple& args, std::pair* result) { auto d_first = std::get<2>(args); auto df = d_first; @@ -1249,7 +1206,7 @@ OutputIt transform_inclusive_scan(distributed_parallel_tag&& policy, auto acc = res[0].second; OutputIt chunk_end = d_first; using outitr_traits = distributed_iterator_traits; - for (i=1; i& args) { + [](rt::Handle&, + const std::tuple& args) { auto gbegin = std::get<0>(args); auto gend = std::get<1>(args); - auto local_range = itr_traits::local_range(std::get<0>(args), - std::get<1>(args)); + auto local_range = + itr_traits::local_range(std::get<0>(args), std::get<1>(args)); auto begin = local_range.begin(); auto end = local_range.end(); BinaryOperation op = std::get<2>(args); auto acc = std::get<3>(args); - for (auto it = begin; it!= end; ++it) { + for (auto it = begin; it != end; ++it) { *it = op(std::move(acc), *it); } }, diff --git a/include/shad/core/numeric.h b/include/shad/core/numeric.h index 4c2203e6..d1f12b8d 100755 --- a/include/shad/core/numeric.h +++ b/include/shad/core/numeric.h @@ -44,9 +44,13 @@ void iota(ForwardIterator first, ForwardIterator last, T value) { return impl::iota(first, last, value); } +//////////////////////////////////////////////////////////////////////////////// +// +// accumulate +// +//////////////////////////////////////////////////////////////////////////////// template -T accumulate(InputIt first, InputIt last, T init, - BinaryOperation op) { +T accumulate(InputIt first, InputIt last, T init, BinaryOperation op) { return impl::accumulate(first, last, init, op); } @@ -55,75 +59,96 @@ T accumulate(InputIt first, InputIt last, T init) { return impl::accumulate(first, last, init, std::plus()); } +//////////////////////////////////////////////////////////////////////////////// +// +// inner product +// +//////////////////////////////////////////////////////////////////////////////// template -T inner_product(InputIt1 first1, InputIt1 last1, - InputIt2 first2, T init) { +T inner_product(InputIt1 first1, InputIt1 last1, InputIt2 first2, T init) { return impl::inner_product(first1, last1, first2, init); } -template -T inner_product(InputIt1 first1, InputIt1 last1, - InputIt2 first2, T init, +template +T inner_product(InputIt1 first1, InputIt1 last1, InputIt2 first2, T init, BinaryOperation1 op1, BinaryOperation2 op2) { return impl::inner_product(first1, last1, first2, init, op1, op2); } +//////////////////////////////////////////////////////////////////////////////// +// +// adjacent_difference +// +//////////////////////////////////////////////////////////////////////////////// template -OutputIt adjacent_difference(InputIt first, InputIt last, - OutputIt d_first) { +OutputIt adjacent_difference(InputIt first, InputIt last, OutputIt d_first) { using value_t = typename distributed_iterator_traits::value_type; - return impl::adjacent_difference(distributed_sequential_tag{}, - first, last, d_first, - std::minus()); + return impl::adjacent_difference(distributed_sequential_tag{}, first, last, + d_first, std::minus()); } template std::enable_if_t::value, OutputIt> -adjacent_difference(ExecutionPolicy&& policy, InputIt first, - InputIt last, OutputIt d_first) { +adjacent_difference(ExecutionPolicy&& policy, InputIt first, InputIt last, + OutputIt d_first) { using value_t = typename distributed_iterator_traits::value_type; - return impl::adjacent_difference(std::forward(policy), - first, last, d_first, - std::minus()); + return impl::adjacent_difference(std::forward(policy), first, + last, d_first, std::minus()); } template std::enable_if_t::value, OutputIt> -adjacent_difference(InputIt first, InputIt last, - OutputIt d_first, BinaryOperation op) { - return impl::adjacent_difference(distributed_sequential_tag{}, - first, last, d_first, op); +adjacent_difference(InputIt first, InputIt last, OutputIt d_first, + BinaryOperation op) { + return impl::adjacent_difference(distributed_sequential_tag{}, first, last, + d_first, op); } -template -OutputIt adjacent_difference(ExecutionPolicy&& policy, - InputIt first, InputIt last, - OutputIt d_first, BinaryOperation op) { - return impl::adjacent_difference(std::forward(policy), - first, last, d_first, op); +template +OutputIt adjacent_difference(ExecutionPolicy&& policy, InputIt first, + InputIt last, OutputIt d_first, + BinaryOperation op) { + return impl::adjacent_difference(std::forward(policy), first, + last, d_first, op); } +//////////////////////////////////////////////////////////////////////////////// +// +// partial sum +// +//////////////////////////////////////////////////////////////////////////////// template -OutputIt partial_sum(InputIt first, InputIt last, - OutputIt d_first) { +OutputIt partial_sum(InputIt first, InputIt last, OutputIt d_first) { using value_t = typename distributed_iterator_traits::value_type; return impl::partial_sum(first, last, d_first, std::plus()); } template -OutputIt partial_sum(InputIt first, InputIt last, - OutputIt d_first, BinaryOperation op) { +OutputIt partial_sum(InputIt first, InputIt last, OutputIt d_first, + BinaryOperation op) { return impl::partial_sum(first, last, d_first, op); } +//////////////////////////////////////////////////////////////////////////////// +// +// reduce +// +//////////////////////////////////////////////////////////////////////////////// +template +T reduce(ExecutionPolicy&& policy, ForwardIt first, ForwardIt last, T init, + BinaryOp binary_op) { + return impl::reduce(std::forward(policy), first, last, init, + binary_op); +} + template -typename std::iterator_traits::value_type reduce( - InputIt first, InputIt last) { +typename std::iterator_traits::value_type reduce(InputIt first, + InputIt last) { using val_t = typename std::iterator_traits::value_type; - return impl::reduce(distributed_sequential_tag{}, - first, last, val_t{}, std::plus()); + return reduce(distributed_sequential_tag{}, first, last, val_t{}, + std::plus()); } template @@ -131,254 +156,246 @@ std::enable_if_t::value, typename std::iterator_traits::value_type> reduce(ExecutionPolicy&& policy, ForwardIt first, ForwardIt last) { using val_t = typename std::iterator_traits::value_type; - return impl::reduce(std::forward(policy), - first, last, val_t{}, std::plus()); + return reduce(std::forward(policy), first, last, val_t{}, + std::plus()); } template -std::enable_if_t::value, T> -reduce(InputIt first, InputIt last, T init) { - return impl::reduce(distributed_sequential_tag{}, - first, last, init, std::plus()); +std::enable_if_t::value, T> reduce( + InputIt first, InputIt last, T init) { + return reduce(distributed_sequential_tag{}, first, last, init, + std::plus()); } template -std::enable_if_t::value, T> -reduce(ExecutionPolicy&& policy, - ForwardIt first, ForwardIt last, T init) { - return impl::reduce(std::forward(policy), - first, last, init, std::plus()); +std::enable_if_t::value, T> reduce( + ExecutionPolicy&& policy, ForwardIt first, ForwardIt last, T init) { + return reduce(std::forward(policy), first, last, init, + std::plus()); } template -std::enable_if_t::value, T> -reduce(InputIt first, InputIt last, T init, BinaryOp binary_op) { - return impl::reduce(distributed_sequential_tag{}, - first, last, init, binary_op); -} - -template -T reduce(ExecutionPolicy&& policy, - ForwardIt first, ForwardIt last, T init, BinaryOp binary_op) { - return impl::reduce(std::forward(policy), - first, last, init, binary_op); +std::enable_if_t::value, T> reduce( + InputIt first, InputIt last, T init, BinaryOp binary_op) { + return reduce(distributed_sequential_tag{}, first, last, init, binary_op); } +//////////////////////////////////////////////////////////////////////////////// +// +// exclusive_scan +// +//////////////////////////////////////////////////////////////////////////////// template -OutputIt exclusive_scan(InputIt first, InputIt last, - OutputIt d_first, T init) { - return impl::exclusive_scan(shad::distributed_sequential_tag{}, - first, last, d_first, std::plus(), init); +OutputIt exclusive_scan(InputIt first, InputIt last, OutputIt d_first, T init) { + return impl::exclusive_scan(shad::distributed_sequential_tag{}, first, last, + d_first, std::plus(), init); } template std::enable_if_t::value, ForwardIt2> -exclusive_scan(ExecutionPolicy&& policy, ForwardIt1 first, - ForwardIt1 last, ForwardIt2 d_first, T init) { - return impl::exclusive_scan(std::forward(policy), - first, last, d_first, std::plus(), init); +exclusive_scan(ExecutionPolicy&& policy, ForwardIt1 first, ForwardIt1 last, + ForwardIt2 d_first, T init) { + return impl::exclusive_scan(std::forward(policy), first, + last, d_first, std::plus(), init); } -template +template std::enable_if_t::value, OutputIt> -exclusive_scan(InputIt first, InputIt last, - OutputIt d_first, T init, BinaryOperation binary_op) { - return impl::exclusive_scan(shad::distributed_sequential_tag{}, - first, last, d_first, binary_op, init); +exclusive_scan(InputIt first, InputIt last, OutputIt d_first, T init, + BinaryOperation binary_op) { + return impl::exclusive_scan(shad::distributed_sequential_tag{}, first, last, + d_first, binary_op, init); } -template +template ForwardIt2 exclusive_scan(ExecutionPolicy&& policy, ForwardIt1 first, - ForwardIt1 last, ForwardIt2 d_first, - T init, BinaryOperation binary_op) { - return impl::exclusive_scan(std::forward(policy), - first, last, d_first, binary_op, init); + ForwardIt1 last, ForwardIt2 d_first, T init, + BinaryOperation binary_op) { + return impl::exclusive_scan(std::forward(policy), first, + last, d_first, binary_op, init); } +//////////////////////////////////////////////////////////////////////////////// +// +// inclusive_scan +// +//////////////////////////////////////////////////////////////////////////////// template -OutputIt inclusive_scan(InputIt first, - InputIt last, OutputIt d_first) { - return impl::inclusive_scan(shad::distributed_sequential_tag{}, - first, last, d_first, std::plus<>()); +OutputIt inclusive_scan(InputIt first, InputIt last, OutputIt d_first) { + return impl::inclusive_scan(shad::distributed_sequential_tag{}, first, last, + d_first, std::plus<>()); } template -std::enable_if_t::value, - ForwardIt2> -inclusive_scan(ExecutionPolicy&& policy, ForwardIt1 first, - ForwardIt1 last, ForwardIt2 d_first) { - return impl::inclusive_scan(std::forward(policy), - first, last, d_first, std::plus<>()); +std::enable_if_t::value, ForwardIt2> +inclusive_scan(ExecutionPolicy&& policy, ForwardIt1 first, ForwardIt1 last, + ForwardIt2 d_first) { + return impl::inclusive_scan(std::forward(policy), first, + last, d_first, std::plus<>()); } template std::enable_if_t::value, OutputIt> -inclusive_scan(InputIt first, InputIt last, - OutputIt d_first, BinaryOperation binary_op) { - return impl::inclusive_scan(shad::distributed_sequential_tag{}, - first, last, d_first, binary_op); +inclusive_scan(InputIt first, InputIt last, OutputIt d_first, + BinaryOperation binary_op) { + return impl::inclusive_scan(shad::distributed_sequential_tag{}, first, last, + d_first, binary_op); } template -std::enable_if_t::value, - ForwardIt2> -inclusive_scan(ExecutionPolicy&& policy, - ForwardIt1 first, ForwardIt1 last, +std::enable_if_t::value, ForwardIt2> +inclusive_scan(ExecutionPolicy&& policy, ForwardIt1 first, ForwardIt1 last, ForwardIt2 d_first, BinaryOperation binary_op) { - return impl::inclusive_scan(std::forward(policy), - first, last, d_first, binary_op); + return impl::inclusive_scan(std::forward(policy), first, + last, d_first, binary_op); } template std::enable_if_t::value, OutputIt> inclusive_scan(InputIt first, InputIt last, OutputIt d_first, BinaryOperation binary_op, T init) { - return impl::inclusive_scan(shad::distributed_sequential_tag{}, - first, last, d_first, binary_op, init); + return impl::inclusive_scan(shad::distributed_sequential_tag{}, first, last, + d_first, binary_op, init); } template -ForwardIt2 inclusive_scan(ExecutionPolicy&& policy, - ForwardIt1 first, ForwardIt1 last, ForwardIt2 d_first, +ForwardIt2 inclusive_scan(ExecutionPolicy&& policy, ForwardIt1 first, + ForwardIt1 last, ForwardIt2 d_first, BinaryOperation binary_op, T init) { - return impl::inclusive_scan(std::forward(policy), - first, last, d_first, binary_op, init); + return impl::inclusive_scan(std::forward(policy), first, + last, d_first, binary_op, init); } -template -T transform_reduce(InputIt1 first1, InputIt1 last1, - InputIt2 first2, T init) { - return impl::transform_reduce(distributed_sequential_tag{}, - first1, last1, first2, init, - std::plus<>(), std::multiplies<>()); +//////////////////////////////////////////////////////////////////////////////// +// +// transform_reduce +// +//////////////////////////////////////////////////////////////////////////////// +// single range +template +std::enable_if_t::value, T> +transform_reduce(ExecutionPolicy&& policy, ForwardIt first, ForwardIt last, + T init, BinaryOp binary_op, UnaryOp unary_op) { + return impl::transform_reduce(std::forward(policy), first, + last, init, binary_op, unary_op); } -template -std::enable_if_t::value, T> -transform_reduce(InputIt1 first1, InputIt1 last1, InputIt2 first2, - T init, BinaryOp1 binary_op1, BinaryOp2 binary_op2) { - return impl::transform_reduce(distributed_sequential_tag{}, - first1, last1, first2, init, - binary_op1, binary_op2); +T transform_reduce(ExecutionPolicy&& policy, ForwardIt1 first1, + ForwardIt1 last1, ForwardIt2 first2, T init, + BinaryOp1 binary_op1, BinaryOp2 binary_op2) { + return impl::transform_reduce(std::forward(policy), first1, + last1, first2, init, binary_op1, binary_op2); } -template -std::enable_if_t::value, T> -transform_reduce(InputIt first, InputIt last, - T init, BinaryOp binop, UnaryOp unary_op) { - return impl::transform_reduce(distributed_sequential_tag{}, - first, last, init, binop, unary_op); +template +T transform_reduce(InputIt1 first1, InputIt1 last1, InputIt2 first2, T init) { + return transform_reduce(distributed_sequential_tag{}, first1, last1, first2, + init, std::plus<>(), std::multiplies<>()); } -template -std::enable_if_t::value, T> -transform_reduce(ExecutionPolicy&& policy, - ForwardIt1 first1, ForwardIt1 last1, - ForwardIt2 first2, T init) { - return impl::transform_reduce(std::forward(policy), - first1, last1, first2, init, - std::plus<>(), std::multiplies<>()); +template +std::enable_if_t::value, T> +transform_reduce(InputIt1 first1, InputIt1 last1, InputIt2 first2, T init, + BinaryOp1 binary_op1, BinaryOp2 binary_op2) { + return transform_reduce(distributed_sequential_tag{}, first1, last1, first2, + init, binary_op1, binary_op2); } -template -T transform_reduce(ExecutionPolicy&& policy, - ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, - T init, BinaryOp1 binary_op1, BinaryOp2 binary_op2) { - return impl::transform_reduce(std::forward(policy), - first1, last1, first2, init, - binary_op1, binary_op2); +template +std::enable_if_t::value, T> +transform_reduce(InputIt first, InputIt last, T init, BinaryOp binop, + UnaryOp unary_op) { + return transform_reduce(distributed_sequential_tag{}, first, last, init, + binop, unary_op); } -template +template std::enable_if_t::value, T> -transform_reduce(ExecutionPolicy&& policy, - ForwardIt first, ForwardIt last, - T init, BinaryOp binary_op, UnaryOp unary_op) { - return impl::transform_reduce(std::forward(policy), - first, last, init, binary_op, unary_op); +transform_reduce(ExecutionPolicy&& policy, ForwardIt1 first1, ForwardIt1 last1, + ForwardIt2 first2, T init) { + return transform_reduce(std::forward(policy), first1, last1, + first2, init, std::plus<>(), std::multiplies<>()); } -template -OutputIt transform_exclusive_scan(InputIt first, InputIt last, - OutputIt d_first, T init, - BinaryOperation binary_op, +//////////////////////////////////////////////////////////////////////////////// +// +// transform_exclusive_scan +// +//////////////////////////////////////////////////////////////////////////////// +template +OutputIt transform_exclusive_scan(InputIt first, InputIt last, OutputIt d_first, + T init, BinaryOperation binary_op, UnaryOperation unary_op) { - return impl::transform_exclusive_scan(distributed_sequential_tag{}, - first, last, d_first, init, - binary_op, unary_op); + return impl::transform_exclusive_scan(distributed_sequential_tag{}, first, + last, d_first, init, binary_op, + unary_op); } -template -ForwardIt2 transform_exclusive_scan(ExecutionPolicy&& policy, - ForwardIt1 first, ForwardIt1 last, - ForwardIt2 d_first, T init, +template +ForwardIt2 transform_exclusive_scan(ExecutionPolicy&& policy, ForwardIt1 first, + ForwardIt1 last, ForwardIt2 d_first, T init, BinaryOperation binary_op, - UnaryOperation unary_op ) { + UnaryOperation unary_op) { return impl::transform_exclusive_scan(std::forward(policy), - first, last, d_first, init, - binary_op, unary_op); + first, last, d_first, init, binary_op, + unary_op); } -template -OutputIt transform_inclusive_scan(InputIt first, InputIt last, - OutputIt d_first, +//////////////////////////////////////////////////////////////////////////////// +// +// transform_inclusive_scan +// +//////////////////////////////////////////////////////////////////////////////// +template +OutputIt transform_inclusive_scan(InputIt first, InputIt last, OutputIt d_first, BinaryOperation binary_op, - UnaryOperation unary_op ) { - return impl::transform_inclusive_scan(distributed_sequential_tag{}, - first, last, d_first, - binary_op, unary_op ); + UnaryOperation unary_op) { + return impl::transform_inclusive_scan(distributed_sequential_tag{}, first, + last, d_first, binary_op, unary_op); } template -std::enable_if_t::value, - ForwardIt2> -transform_inclusive_scan(ExecutionPolicy&& policy, - ForwardIt1 first, ForwardIt1 last, - ForwardIt2 d_first, - BinaryOperation binary_op, - UnaryOperation unary_op ) { +std::enable_if_t::value, ForwardIt2> +transform_inclusive_scan(ExecutionPolicy&& policy, ForwardIt1 first, + ForwardIt1 last, ForwardIt2 d_first, + BinaryOperation binary_op, UnaryOperation unary_op) { return impl::transform_inclusive_scan(std::forward(policy), - first, last, d_first, - binary_op, unary_op ); + first, last, d_first, binary_op, + unary_op); } -template +template std::enable_if_t::value, OutputIt> -transform_inclusive_scan(InputIt first, InputIt last, - OutputIt d_first, - BinaryOperation binary_op, - UnaryOperation unary_op, +transform_inclusive_scan(InputIt first, InputIt last, OutputIt d_first, + BinaryOperation binary_op, UnaryOperation unary_op, T init) { - return impl::transform_inclusive_scan(distributed_sequential_tag{}, - first, last, d_first, - binary_op, unary_op, init); + return impl::transform_inclusive_scan(distributed_sequential_tag{}, first, + last, d_first, binary_op, unary_op, + init); } template -ForwardIt2 transform_inclusive_scan(ExecutionPolicy&& policy, - ForwardIt1 first, ForwardIt1 last, - ForwardIt2 d_first, + class BinaryOperation, class UnaryOperation, class T> +ForwardIt2 transform_inclusive_scan(ExecutionPolicy&& policy, ForwardIt1 first, + ForwardIt1 last, ForwardIt2 d_first, BinaryOperation binary_op, UnaryOperation unary_op, T init) { return impl::transform_inclusive_scan(std::forward(policy), - first, last, d_first, - binary_op, unary_op, init); + first, last, d_first, binary_op, + unary_op, init); } } // namespace shad From be12aee0498dc800b0f6a76fae34b54ff89931cb Mon Sep 17 00:00:00 2001 From: Maurizio Drocco Date: Tue, 5 Feb 2019 14:56:19 -0800 Subject: [PATCH 15/19] [pnnl/SHAD#152] missing header --- include/shad/core/impl/numeric_ops.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/shad/core/impl/numeric_ops.h b/include/shad/core/impl/numeric_ops.h index 25e1dd7c..35c7e1e0 100644 --- a/include/shad/core/impl/numeric_ops.h +++ b/include/shad/core/impl/numeric_ops.h @@ -28,6 +28,7 @@ #include #include #include +#include #include "shad/core/execution.h" #include "shad/distributed_iterator_traits.h" From aad74a6e8c6bedb7dcc0a650e0f5f5c3f26dc853 Mon Sep 17 00:00:00 2001 From: Maurizio Drocco Date: Tue, 5 Feb 2019 15:16:19 -0800 Subject: [PATCH 16/19] [pnnl/SHAD#152] fixed generate test --- test/unit_tests/core/shad_algorithm_test.cc | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/test/unit_tests/core/shad_algorithm_test.cc b/test/unit_tests/core/shad_algorithm_test.cc index e93686b3..dd712415 100644 --- a/test/unit_tests/core/shad_algorithm_test.cc +++ b/test/unit_tests/core/shad_algorithm_test.cc @@ -418,17 +418,18 @@ TYPED_TEST(ATF, shad_transform) { TYPED_TEST(ATF, shad_generate) { using it_t = typename TypeParam::iterator; using val_t = typename TypeParam::value_type; - auto generator = [n = 42]() mutable { return n = std::negate{}(n); }; + auto flip_f = [n = 42]() mutable { return n = std::negate{}(n); }; this->test_void_with_policy( shad::distributed_sequential_tag{}, - shad::generate, - shad_test_stl::generate_, - shad_test_stl::ordered_checksum, generator); + shad::generate, + shad_test_stl::generate_, + shad_test_stl::ordered_checksum, flip_f); + auto const_f = [n = 42]() mutable { return n; }; this->test_void_with_policy( shad::distributed_parallel_tag{}, - shad::generate, - shad_test_stl::generate_, - shad_test_stl::ordered_checksum, generator); + shad::generate, + shad_test_stl::generate_, + shad_test_stl::ordered_checksum, const_f); } // replace From b6a5bf7b54f38811ed7d75dfaff3a4ccfb2b0c6d Mon Sep 17 00:00:00 2001 From: Maurizio Drocco Date: Wed, 6 Feb 2019 14:35:59 -0800 Subject: [PATCH 17/19] [pnnl/SHAD#152] fixes --- include/shad/core/impl/numeric_ops.h | 41 ++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/include/shad/core/impl/numeric_ops.h b/include/shad/core/impl/numeric_ops.h index 35c7e1e0..539a8f27 100644 --- a/include/shad/core/impl/numeric_ops.h +++ b/include/shad/core/impl/numeric_ops.h @@ -60,6 +60,16 @@ void iota(ForwardIterator first, ForwardIterator last, const T& value) { value); } +namespace accumulate_impl { +template +T accumulate(InputIt first, InputIt last, T init, BinaryOperation op) { + for (; first != last; ++first) { + init = op(std::move(init), *first); // std::move since C++20 + } + return init; +} +} // namespace accumulate_impl + template T accumulate(InputIt first, InputIt last, T init, BinaryOperation op) { using itr_traits = distributed_iterator_traits; @@ -299,6 +309,8 @@ template T reduce(distributed_parallel_tag&& policy, InputIt first, InputIt last, T init, BinaryOperation op) { using itr_traits = distributed_iterator_traits; + static_assert(std::is_default_constructible::value, + "reduce requires DefaultConstructible value type"); // distributed map auto map_res = distributed_map( @@ -321,9 +333,12 @@ T reduce(distributed_parallel_tag&& policy, InputIt first, InputIt last, T init, }); // local reduce - auto b = map_res.begin(); - auto res = *b; - while (++b != map_res.end()) res = op(std::move(res), *b); + auto b = map_res.begin(), e = map_res.end(); + T res; + if (b != e) { + res = *b++; + res = std::accumulate(b, e, std::move(res), op); + } return res; }, // map arguments @@ -725,8 +740,6 @@ T transform_reduce(distributed_sequential_tag&& policy, ForwardIt first, init, // map arguments op, uop); - - // } // single range - parallel @@ -734,6 +747,9 @@ template T transform_reduce(distributed_parallel_tag&& policy, ForwardIt first, ForwardIt last, T init, BinaryOp op, UnaryOp uop) { using itr_traits = distributed_iterator_traits; + static_assert( + std::is_default_constructible::value, + "transform_reduce requires DefaultConstructible transformed value type"); // distributed map auto map_res = distributed_map( @@ -750,15 +766,18 @@ T transform_reduce(distributed_parallel_tag&& policy, ForwardIt first, lrange.begin(), lrange.end(), // kernel [&](local_iterator_t b, local_iterator_t e) { - auto res = *b; - while (++b != e) res = op(std::move(res), uop(*b)); + auto res = uop(*b++); + for (; b != e; b++) res = op(std::move(res), uop(*b)); return res; }); // local reduce - auto b = map_res.begin(); - auto res = *b; - while (++b != map_res.end()) res = op(std::move(res), *b); + auto b = map_res.begin(), e = map_res.end(); + T res{}; + if (b != e) { + res = *b++; + res = std::accumulate(b, e, std::move(res), op); + } return res; }, // map arguments @@ -1149,7 +1168,7 @@ OutputIt transform_inclusive_scan(distributed_sequential_tag&& policy, }, std::make_tuple(first, last, res.first, res.second, op, uop), &res); } - return res.second; + return res.first; } template Date: Wed, 6 Feb 2019 15:58:37 -0800 Subject: [PATCH 18/19] [pnnl/SHAD#152] fixed distributed_map semantics with empty portions --- include/shad/core/impl/impl_patterns.h | 39 ++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/include/shad/core/impl/impl_patterns.h b/include/shad/core/impl/impl_patterns.h index fbae1340..cc5f7a3d 100755 --- a/include/shad/core/impl/impl_patterns.h +++ b/include/shad/core/impl/impl_patterns.h @@ -65,7 +65,7 @@ struct Apply { template inline auto apply_from(F&& f, T&& t) { - return Apply >::value>::apply( + return Apply>::value>::apply( ::std::forward(f), ::std::forward(t)); } @@ -74,6 +74,8 @@ inline auto apply_from(F&& f, T&& t) { // distributed_folding_map applies map_kernel sequentially to each local // portion, forwarding the solution from portion i to portion i + 1. // +// There is *no* guarantee that map_kernel is not invoked on an empty range. +// //////////////////////////////////////////////////////////////////////////////// template S distributed_folding_map(ForwardIt first, ForwardIt last, MapF&& map_kernel, @@ -145,6 +147,16 @@ S distributed_folding_map_early_termination(ForwardIt first, ForwardIt last, // The return type of map_kernel must be DefaultConstructible. // //////////////////////////////////////////////////////////////////////////////// +template +struct optional_vector { + struct entry_t { + T value; + bool valid; + }; + optional_vector(size_t s) : data(s) {} + std::vector data; +}; + // TODO specialize mapped_t to support lambdas returning bool template std::vector< @@ -159,23 +171,38 @@ distributed_map(ForwardIt first, ForwardIt last, MapF&& map_kernel, static_assert( !std::is_same::value, "distributed-map kernels returning bool are not supported (yet)"); + using opt_mapped_t = typename optional_vector::entry_t; auto localities = itr_traits::localities(first, last); size_t i = 0; rt::Handle h; auto d_args = std::make_tuple(map_kernel, first, last, args...); - std::vector res(localities.size()); + optional_vector opt_res(localities.size()); for (auto locality = localities.begin(), end = localities.end(); locality != end; ++locality, ++i) { rt::asyncExecuteAtWithRet( h, locality, - [](rt::Handle&, const typeof(d_args)& d_args, mapped_t* result) { - *result = apply_from<1>(::std::get<0>(d_args), - ::std::forward(d_args)); + [](rt::Handle&, const typeof(d_args)& d_args, opt_mapped_t* result) { + auto first = ::std::get<1>(d_args); + auto last = ::std::get<2>(d_args); + auto lrange = itr_traits::local_range(first, last); + if (lrange.begin() != lrange.end()) { + result->valid = true; + result->value = apply_from<1>( + ::std::get<0>(d_args), ::std::forward(d_args)); + } else { + result->valid = false; + } }, - d_args, &res[i]); + d_args, &opt_res.data[i]); } rt::waitForCompletion(h); + std::vector res; + for (auto& x : opt_res.data) + if (x.valid) { + printf("> pushing=%d\n", x.value); + res.push_back(x.value); + } return res; } From 5677753795a341b9c3bc984baf6ad9fcda3adff4 Mon Sep 17 00:00:00 2001 From: Maurizio Drocco Date: Wed, 6 Feb 2019 16:00:20 -0800 Subject: [PATCH 19/19] [pnnl/SHAD#152] typo --- include/shad/core/impl/impl_patterns.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/shad/core/impl/impl_patterns.h b/include/shad/core/impl/impl_patterns.h index cc5f7a3d..39a08c9a 100755 --- a/include/shad/core/impl/impl_patterns.h +++ b/include/shad/core/impl/impl_patterns.h @@ -199,10 +199,8 @@ distributed_map(ForwardIt first, ForwardIt last, MapF&& map_kernel, rt::waitForCompletion(h); std::vector res; for (auto& x : opt_res.data) - if (x.valid) { - printf("> pushing=%d\n", x.value); + if (x.valid) res.push_back(x.value); - } return res; }