From 0c9366b6c609c04e0281a637dc50af0c77f47004 Mon Sep 17 00:00:00 2001 From: Hartmut Kaiser Date: Tue, 21 Jul 2015 10:39:36 -0500 Subject: [PATCH 1/4] Streamlining transpose examples, adding transpose_await --- examples/transpose/CMakeLists.txt | 6 + examples/transpose/transpose.cpp | 55 +-- examples/transpose/transpose_await.cpp | 460 +++++++++++++++++++++ examples/transpose/transpose_smp.cpp | 6 +- examples/transpose/transpose_smp_block.cpp | 3 +- 5 files changed, 500 insertions(+), 30 deletions(-) create mode 100644 examples/transpose/transpose_await.cpp diff --git a/examples/transpose/CMakeLists.txt b/examples/transpose/CMakeLists.txt index 91a8daf56fed..21787d2b7fc6 100644 --- a/examples/transpose/CMakeLists.txt +++ b/examples/transpose/CMakeLists.txt @@ -13,6 +13,12 @@ set(example_programs transpose_serial_vector ) +if(HPX_WITH_AWAIT) + set(example_programs + ${example_programs} + transpose_await) +endif() + foreach(example_program ${example_programs}) set(sources ${example_program}.cpp) diff --git a/examples/transpose/transpose.cpp b/examples/transpose/transpose.cpp index 7b80640c5721..3d50a79f1cb7 100644 --- a/examples/transpose/transpose.cpp +++ b/examples/transpose/transpose.cpp @@ -5,8 +5,6 @@ #include #include -#include -#include #include #include @@ -172,8 +170,7 @@ typedef block_component::get_sub_block_action get_sub_block_action; HPX_REGISTER_ACTION(get_sub_block_action); void transpose(hpx::future A, hpx::future B, - hpx::future block_order, - hpx::future tile_size); + boost::uint64_t block_order, boost::uint64_t tile_size); double test_results(boost::uint64_t order, boost::uint64_t block_order, std::vector & trans, boost::uint64_t blocks_start, boost::uint64_t blocks_end); @@ -214,7 +211,7 @@ int hpx_main(boost::program_options::variables_map& vm) boost::uint64_t blocks_end = (id + 1) * num_local_blocks; // Actually allocate the block components in AGAS - for(boost::uint64_t b = 0; b < num_blocks; ++b) + for(boost::uint64_t b = 0; b != num_blocks; ++b) { // Allocate block if(b >= blocks_start && b < blocks_end) @@ -247,7 +244,6 @@ int hpx_main(boost::program_options::variables_map& vm) } using hpx::parallel::for_each; using hpx::parallel::par; - using hpx::parallel::task; // Fill the original matrix, set transpose to known garbage value. auto range = boost::irange(blocks_start, blocks_end); @@ -259,9 +255,9 @@ int hpx_main(boost::program_options::variables_map& vm) boost::shared_ptr B_ptr = hpx::get_ptr(B[b].get_gid()).get(); - for(boost::uint64_t i = 0; i < order; ++i) + for(boost::uint64_t i = 0; i != order; ++i) { - for(boost::uint64_t j = 0; j < block_order; ++j) + for(boost::uint64_t j = 0; j != block_order; ++j) { double col_val = COL_SHIFT * (b*block_order + j); A_ptr->data_[i * block_order + j] = col_val + ROW_SHIFT * i; @@ -282,15 +278,17 @@ int hpx_main(boost::program_options::variables_map& vm) auto range = boost::irange(blocks_start, blocks_end); - std::vector > block_futures; + std::vector > block_futures; block_futures.resize(num_local_blocks); for_each(par, boost::begin(range), boost::end(range), [&](boost::uint64_t b) { std::vector > phase_futures; - //phase_futures.resize(num_local_blocks); - auto phase_range = boost::irange(static_cast(0), num_blocks); + phase_futures.reserve(num_blocks); + + auto phase_range = boost::irange( + static_cast(0), num_blocks); for(boost::uint64_t phase: phase_range) { const boost::uint64_t block_size = block_order * block_order; @@ -298,19 +296,20 @@ int hpx_main(boost::program_options::variables_map& vm) const boost::uint64_t from_phase = b; const boost::uint64_t A_offset = from_phase * block_size; const boost::uint64_t B_offset = phase * block_size; + phase_futures.push_back( hpx::lcos::local::dataflow( &transpose , A[from_block].get_sub_block(A_offset, block_size) , B[b].get_sub_block(B_offset, block_size) - , hpx::make_ready_future(block_order) - , hpx::make_ready_future(tile_size) + , block_order + , tile_size ) ); } block_futures[b - blocks_start] = - hpx::when_all(phase_futures).share(); + hpx::when_all(phase_futures); } ); @@ -389,22 +388,23 @@ int main(int argc, char* argv[]) } void transpose(hpx::future Af, hpx::future Bf, - hpx::future block_order_fut, - hpx::future tile_size_fut) + boost::uint64_t block_order, boost::uint64_t tile_size) { const sub_block A(Af.get()); sub_block B(Bf.get()); - boost::uint64_t block_order(block_order_fut.get()); - boost::uint64_t tile_size(tile_size_fut.get()); + if(tile_size < block_order) { - for(boost::uint64_t i = 0; i < block_order; i += tile_size) + for(boost::uint64_t i = 0; i != block_order; i += tile_size) { - for(boost::uint64_t j = 0; j < block_order; j += tile_size) + for(boost::uint64_t j = 0; j != block_order; j += tile_size) { - for(boost::uint64_t it = i; it < (std::min)(block_order, i + tile_size); ++it) + boost::uint64_t max_i = (std::min)(block_order, i + tile_size); + boost::uint64_t max_j = (std::min)(block_order, j + tile_size); + + for(boost::uint64_t it = i; it != max_i; ++it) { - for(boost::uint64_t jt = j; jt < (std::min)(block_order, j + tile_size); ++jt) + for(boost::uint64_t jt = j; jt != max_j; ++jt) { B[it + block_order * jt] = A[jt + block_order * it]; } @@ -414,9 +414,9 @@ void transpose(hpx::future Af, hpx::future Bf, } else { - for(boost::uint64_t i = 0; i < block_order; ++i) + for(boost::uint64_t i = 0; i != block_order; ++i) { - for(boost::uint64_t j = 0; j < block_order; ++j) + for(boost::uint64_t j = 0; j != block_order; ++j) { B[i + block_order * j] = A[j + block_order * i]; } @@ -428,7 +428,7 @@ double test_results(boost::uint64_t order, boost::uint64_t block_order, std::vector & trans, boost::uint64_t blocks_start, boost::uint64_t blocks_end) { - using hpx::parallel::for_each; + using hpx::parallel::transform_reduce; using hpx::parallel::par; // Fill the original matrix, set transpose to known garbage value. @@ -437,7 +437,8 @@ double test_results(boost::uint64_t order, boost::uint64_t block_order, transform_reduce(par, boost::begin(range), boost::end(range), [&](boost::uint64_t b) -> double { - sub_block trans_block = trans[b].get_sub_block(0, order * block_order).get(); + sub_block trans_block = + trans[b].get_sub_block(0, order * block_order).get(); double errsq = 0.0; for(boost::uint64_t i = 0; i < order; ++i) { @@ -445,7 +446,7 @@ double test_results(boost::uint64_t order, boost::uint64_t block_order, for(boost::uint64_t j = 0; j < block_order; ++j) { double diff = trans_block[i * block_order + j] - - (col_val + ROW_SHIFT * (b * block_order + j)); + (col_val + ROW_SHIFT * (b * block_order + j)); errsq += diff * diff; } } diff --git a/examples/transpose/transpose_await.cpp b/examples/transpose/transpose_await.cpp new file mode 100644 index 000000000000..43fe4fd46067 --- /dev/null +++ b/examples/transpose/transpose_await.cpp @@ -0,0 +1,460 @@ +// Copyright (c) 2014 Thomas Heller +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include + +#include +#include + +#include + +#include +#include + +#define COL_SHIFT 1000.00 // Constant to shift column index +#define ROW_SHIFT 0.001 // Constant to shift row index + +bool verbose = false; + +char const* A_block_basename = "/transpose/block/A"; +char const* B_block_basename = "/transpose/block/B"; + +struct sub_block +{ + enum mode { + reference + , owning + }; + + sub_block() + : size_(0) + , data_(0) + , mode_(reference) + {} + + sub_block(double * data, boost::uint64_t size) + : size_(size) + , data_(data) + , mode_(reference) + {} + + ~sub_block() + { + if(data_ && mode_ == owning) + { + delete[] data_; + } + } + + sub_block(sub_block && other) + : size_(other.size_) + , data_(other.data_) + , mode_(other.mode_) + { + if(mode_ == owning) { other.data_ = 0; other.size_ = 0; } + } + + sub_block & operator=(sub_block && other) + { + size_ = other.size_; + data_ = other.data_; + mode_ = other.mode_; + if(mode_ == owning) { other.data_ = 0; other.size_ = 0; } + + return *this; + } + + double operator[](std::size_t i) const + { + HPX_ASSERT(data_); + return data_[i]; + } + + double & operator[](std::size_t i) + { + HPX_ASSERT(data_); + HPX_ASSERT(mode_ == reference); + return data_[i]; + } + + void load(hpx::serialization::input_archive & ar, unsigned version) + { + ar & size_; + if(size_ > 0) + { + data_ = new double[size_]; + hpx::serialization::array arr(data_, size_); + ar >> arr; + mode_ = owning; + } + } + + void save(hpx::serialization::output_archive & ar, unsigned version) const + { + ar & size_; + if(size_ > 0) + { + hpx::serialization::array arr(data_, size_); + ar << arr; + } + } + + HPX_SERIALIZATION_SPLIT_MEMBER() + + boost::uint64_t size_; + double * data_; + mode mode_; + + HPX_MOVABLE_BUT_NOT_COPYABLE(sub_block); +}; + +struct block_component + : hpx::components::simple_component_base +{ + block_component() {} + + block_component(boost::uint64_t size) + : data_(size) + {} + + sub_block get_sub_block(boost::uint64_t offset, boost::uint64_t size) + { + HPX_ASSERT(!data_.empty()); + return sub_block(&data_[offset], size); + } + + HPX_DEFINE_COMPONENT_ACTION(block_component, get_sub_block); + + std::vector data_; +}; + +struct block + : hpx::components::client_base +{ + typedef hpx::components::client_base base_type; + block() {} + + block(boost::uint64_t id, const char * base_name) + : base_type(hpx::find_id_from_basename(base_name, id)) + { + get_gid(); + } + + block(boost::uint64_t id, boost::uint64_t size, const char * base_name) + : base_type(hpx::new_(hpx::find_here(), size)) + { + hpx::register_id_with_basename(base_name, get_gid(), id); + } + + hpx::future + get_sub_block(boost::uint64_t offset, boost::uint64_t size) const + { + block_component::get_sub_block_action act; + return hpx::async(act, get_gid(), offset, size); + } +}; + +// The macros below are necessary to generate the code required for exposing +// our block_component type remotely. +// +// HPX_REGISTER_COMPONENT() exposes the component creation +// through hpx::new_<>(). +typedef hpx::components::simple_component block_component_type; +HPX_REGISTER_COMPONENT(block_component_type, block_component); + +// HPX_REGISTER_ACTION() exposes the component member function for remote +// invocation. +typedef block_component::get_sub_block_action get_sub_block_action; +HPX_REGISTER_ACTION(get_sub_block_action); + +void transpose(sub_block const A, sub_block B, + boost::uint64_t block_order, boost::uint64_t tile_size); + +double test_results(boost::uint64_t order, boost::uint64_t block_order, + std::vector & trans, boost::uint64_t blocks_start, + boost::uint64_t blocks_end); + +//////////////////////////////////////////////////////////////////////////////// +hpx::future transpose_phase( + std::vector const& A, std::vector& B, + boost::uint64_t block_order, boost::uint64_t b, + boost::uint64_t num_blocks, boost::uint64_t num_local_blocks, + boost::uint64_t block_size, boost::uint64_t tile_size) +{ + const boost::uint64_t from_phase = b; + const boost::uint64_t A_offset = from_phase * block_size; + + auto phase_range = boost::irange( + static_cast(0), num_blocks); + for(boost::uint64_t phase: phase_range) + { + const boost::uint64_t from_block = phase; + const boost::uint64_t B_offset = phase * block_size; + + hpx::future from = + A[from_block].get_sub_block(A_offset, block_size); + hpx::future to = + B[b].get_sub_block(B_offset, block_size); + + transpose(__await from, __await to, block_order, tile_size); + } +} + +/////////////////////////////////////////////////////////////////////////////// +int hpx_main(boost::program_options::variables_map& vm) +{ + { + hpx::id_type here = hpx::find_here(); + bool root = here == hpx::find_root_locality(); + + boost::uint64_t num_localities = hpx::get_num_localities().get(); + + boost::uint64_t order = vm["matrix_size"].as(); + boost::uint64_t iterations = vm["iterations"].as(); + boost::uint64_t num_local_blocks = vm["num_blocks"].as(); + boost::uint64_t tile_size = order; + + if(vm.count("tile_size")) + tile_size = vm["tile_size"].as(); + + verbose = vm.count("verbose") ? true : false; + + boost::uint64_t bytes = + static_cast(2.0 * sizeof(double) * order * order); + + boost::uint64_t num_blocks = num_localities * num_local_blocks; + + boost::uint64_t block_order = order / num_blocks; + boost::uint64_t col_block_size = order * block_order; + + boost::uint64_t id = hpx::get_locality_id(); + + std::vector A(num_blocks); + std::vector B(num_blocks); + + boost::uint64_t blocks_start = id * num_local_blocks; + boost::uint64_t blocks_end = (id + 1) * num_local_blocks; + + // Actually allocate the block components in AGAS + for(boost::uint64_t b = 0; b != num_blocks; ++b) + { + // Allocate block + if(b >= blocks_start && b != blocks_end) + { + A[b] = block(b, col_block_size, A_block_basename); + B[b] = block(b, col_block_size, B_block_basename); + } + // Retrieve the block by it's symbolic name + else + { + A[b] = block(b, A_block_basename); + B[b] = block(b, B_block_basename); + } + } + + if(root) + { + std::cout + << "Serial Matrix transpose: B = A^T\n" + << "Matrix order = " << order << "\n" + << "Matrix local columns = " << block_order << "\n" + << "Number of blocks = " << num_blocks << "\n" + << "Number of localities = " << num_localities << "\n"; + if(tile_size < order) + std::cout << "Tile size = " << tile_size << "\n"; + else + std::cout << "Untiled\n"; + std::cout + << "Number of iterations = " << iterations << "\n"; + } + using hpx::parallel::for_each; + using hpx::parallel::par; + + // Fill the original matrix, set transpose to known garbage value. + auto range = boost::irange(blocks_start, blocks_end); + for_each(par, boost::begin(range), boost::end(range), + [&](boost::uint64_t b) + { + boost::shared_ptr A_ptr = + hpx::get_ptr(A[b].get_gid()).get(); + boost::shared_ptr B_ptr = + hpx::get_ptr(B[b].get_gid()).get(); + + for(boost::uint64_t i = 0; i != order; ++i) + { + for(boost::uint64_t j = 0; j != block_order; ++j) + { + double col_val = COL_SHIFT * (b*block_order + j); + A_ptr->data_[i * block_order + j] = col_val + ROW_SHIFT * i; + B_ptr->data_[i * block_order + j] = -1.0; + } + } + } + ); + + double errsq = 0.0; + double avgtime = 0.0; + double maxtime = 0.0; + double mintime = 366.0 * 24.0*3600.0; // set the minimum time to a large value; + // one leap year should be enough + for(boost::uint64_t iter = 0; iter < iterations; ++iter) + { + hpx::util::high_resolution_timer t; + + auto range = boost::irange(blocks_start, blocks_end); + + const boost::uint64_t block_size = block_order * block_order; + for_each(par, boost::begin(range), boost::end(range), + [&](boost::uint64_t b) + { + transpose_phase(A, B, block_order, b, + num_blocks, num_local_blocks, block_size, tile_size + ); + }); + + double elapsed = t.elapsed(); + + if(iter > 0 || iterations == 1) // Skip the first iteration + { + avgtime = avgtime + elapsed; + maxtime = (std::max)(maxtime, elapsed); + mintime = (std::min)(mintime, elapsed); + } + + if(root) + errsq += test_results(order, block_order, B, blocks_start, blocks_end); + } // end of iter loop + + // Analyze and output results + + double epsilon = 1.e-8; + if(root) + { + if(errsq < epsilon) + { + std::cout << "Solution validates\n"; + avgtime = avgtime/static_cast( + (std::max)(iterations-1, static_cast(1))); + std::cout + << "Rate (MB/s): " << 1.e-6 * bytes/mintime << ", " + << "Avg time (s): " << avgtime << ", " + << "Min time (s): " << mintime << ", " + << "Max time (s): " << maxtime << "\n"; + + if(verbose) + std::cout << "Squared errors: " << errsq << "\n"; + } + else + { + std::cout + << "ERROR: Aggregate squared error " << errsq + << " exceeds threshold " << epsilon << "\n"; + hpx::terminate(); + } + } + } + + return hpx::finalize(); +} + +int main(int argc, char* argv[]) +{ + using namespace boost::program_options; + + options_description desc_commandline; + desc_commandline.add_options() + ("matrix_size", value()->default_value(1024), + "Matrix Size") + ("iterations", value()->default_value(10), + "# iterations") + ("tile_size", value(), + "Number of tiles to divide the individual matrix blocks for improved " + "cache and TLB performance") + ("num_blocks", value()->default_value(1), + "Number of blocks to divide the individual matrix blocks for " + "improved cache and TLB performance") + ( "verbose", "Verbose output") + ; + + // Initialize and run HPX, this example requires to run hpx_main on all + // localities + std::vector cfg; + cfg.push_back("hpx.run_hpx_main!=1"); + + return hpx::init(desc_commandline, argc, argv, cfg); +} + +void transpose(sub_block const A, sub_block B, + boost::uint64_t block_order, boost::uint64_t tile_size) +{ + if(tile_size < block_order) + { + for(boost::uint64_t i = 0; i != block_order; i += tile_size) + { + for(boost::uint64_t j = 0; j != block_order; j += tile_size) + { + boost::uint64_t max_i = (std::min)(block_order, i + tile_size); + boost::uint64_t max_j = (std::min)(block_order, j + tile_size); + + for(boost::uint64_t it = i; it != max_i; ++it) + { + for(boost::uint64_t jt = j; jt != max_j; ++jt) + { + B[it + block_order * jt] = A[jt + block_order * it]; + } + } + } + } + } + else + { + for(boost::uint64_t i = 0; i != block_order; ++i) + { + for(boost::uint64_t j = 0; j != block_order; ++j) + { + B[i + block_order * j] = A[j + block_order * i]; + } + } + } +} + +double test_results(boost::uint64_t order, boost::uint64_t block_order, + std::vector & trans, boost::uint64_t blocks_start, + boost::uint64_t blocks_end) +{ + using hpx::parallel::transform_reduce; + using hpx::parallel::par; + + // Fill the original matrix, set transpose to known garbage value. + auto range = boost::irange(blocks_start, blocks_end); + double errsq = + transform_reduce(par, boost::begin(range), boost::end(range), + [&](boost::uint64_t b) -> double + { + sub_block trans_block = + trans[b].get_sub_block(0, order * block_order).get(); + double errsq = 0.0; + for(boost::uint64_t i = 0; i < order; ++i) + { + double col_val = COL_SHIFT * i; + for(boost::uint64_t j = 0; j < block_order; ++j) + { + double diff = trans_block[i * block_order + j] - + (col_val + ROW_SHIFT * (b * block_order + j)); + errsq += diff * diff; + } + } + return errsq; + }, + 0.0, + [](double lhs, double rhs) { return lhs + rhs; } + ); + + if(verbose) + std::cout << " Squared sum of differences: " << errsq << "\n"; + + return errsq; +} diff --git a/examples/transpose/transpose_smp.cpp b/examples/transpose/transpose_smp.cpp index 3344a124cd80..367c3fec9071 100644 --- a/examples/transpose/transpose_smp.cpp +++ b/examples/transpose/transpose_smp.cpp @@ -86,9 +86,10 @@ int hpx_main(boost::program_options::variables_map& vm) for(boost::uint64_t j = 0; j < order; j += tile_size) { boost::uint64_t i_max = (std::min)(order, i + tile_size); + boost::uint64_t j_max = (std::min)(order, j + tile_size); + for(boost::uint64_t it = i; it < i_max; ++it) { - boost::uint64_t j_max = (std::min)(order, j + tile_size); for(boost::uint64_t jt = j; jt < j_max; ++jt) { B[it + order * jt] = A[jt + order * it]; @@ -189,7 +190,8 @@ double test_results(boost::uint64_t order, std::vector const & trans) double errsq = 0.0; for(boost::uint64_t j = 0; j < order; ++j) { - double diff = trans[i * order + j] - (COL_SHIFT*i + ROW_SHIFT * j); + double diff = trans[i * order + j] - + (COL_SHIFT*i + ROW_SHIFT * j); errsq += diff * diff; } return errsq; diff --git a/examples/transpose/transpose_smp_block.cpp b/examples/transpose/transpose_smp_block.cpp index 10d3a0d4bce2..bcd6511454c1 100644 --- a/examples/transpose/transpose_smp_block.cpp +++ b/examples/transpose/transpose_smp_block.cpp @@ -191,9 +191,10 @@ void transpose(sub_block A, sub_block B, boost::uint64_t block_order, for(boost::uint64_t j = 0; j < block_order; j += tile_size) { boost::uint64_t i_max = (std::min)(block_order, i + tile_size); + boost::uint64_t j_max = (std::min)(block_order, j + tile_size); + for(boost::uint64_t it = i; it < i_max; ++it) { - boost::uint64_t j_max = (std::min)(block_order, j + tile_size); for(boost::uint64_t jt = j; jt < j_max; ++jt) { B[it + block_order * jt] = A[jt + block_order * it]; From ea083d1f70a0bd14f82d1e92905162e9315e387e Mon Sep 17 00:00:00 2001 From: Hartmut Kaiser Date: Tue, 21 Jul 2015 12:13:22 -0500 Subject: [PATCH 2/4] Fixing transpose_await --- examples/transpose/transpose_await.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/examples/transpose/transpose_await.cpp b/examples/transpose/transpose_await.cpp index 43fe4fd46067..8bd0e75112af 100644 --- a/examples/transpose/transpose_await.cpp +++ b/examples/transpose/transpose_await.cpp @@ -177,8 +177,10 @@ double test_results(boost::uint64_t order, boost::uint64_t block_order, std::vector & trans, boost::uint64_t blocks_start, boost::uint64_t blocks_end); -//////////////////////////////////////////////////////////////////////////////// -hpx::future transpose_phase( +/////////////////////////////////////////////////////////////////////////////// +// The returned value type has to be the same as the return type used for +// __await below +hpx::future transpose_phase( std::vector const& A, std::vector& B, boost::uint64_t block_order, boost::uint64_t b, boost::uint64_t num_blocks, boost::uint64_t num_local_blocks, @@ -201,6 +203,8 @@ hpx::future transpose_phase( transpose(__await from, __await to, block_order, tile_size); } + + return sub_block(); } /////////////////////////////////////////////////////////////////////////////// @@ -312,7 +316,7 @@ int hpx_main(boost::program_options::variables_map& vm) { transpose_phase(A, B, block_order, b, num_blocks, num_local_blocks, block_size, tile_size - ); + ).get(); }); double elapsed = t.elapsed(); From d133a61d52cf3fb7e5b2ce53a3f84a32a0d34564 Mon Sep 17 00:00:00 2001 From: Hartmut Kaiser Date: Fri, 24 Jul 2015 12:53:01 -0500 Subject: [PATCH 3/4] get_shared_state() has been moved to namespace traits::detail --- hpx/lcos/local/promise.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hpx/lcos/local/promise.hpp b/hpx/lcos/local/promise.hpp index 52a25123f7d7..d8a8d910a2f4 100644 --- a/hpx/lcos/local/promise.hpp +++ b/hpx/lcos/local/promise.hpp @@ -505,7 +505,7 @@ namespace hpx { namespace lcos std::experimental::coroutine_handle rh) { // f.then([=](future result) mutable - lcos::detail::get_shared_state(f)->set_on_completed(rh); + traits::detail::get_shared_state(f)->set_on_completed(rh); } template From 831ed59e695d312c1532c9d747cf8d0398ba61bd Mon Sep 17 00:00:00 2001 From: Thomas Heller Date: Fri, 24 Jul 2015 21:21:13 +0200 Subject: [PATCH 4/4] Fixing copy&paste error --- examples/transpose/transpose_await.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/transpose/transpose_await.cpp b/examples/transpose/transpose_await.cpp index 8bd0e75112af..424844e0a016 100644 --- a/examples/transpose/transpose_await.cpp +++ b/examples/transpose/transpose_await.cpp @@ -262,7 +262,7 @@ int hpx_main(boost::program_options::variables_map& vm) if(root) { std::cout - << "Serial Matrix transpose: B = A^T\n" + << "Distributed HPX Matrix transpose (await): B = A^T\n" << "Matrix order = " << order << "\n" << "Matrix local columns = " << block_order << "\n" << "Number of blocks = " << num_blocks << "\n"