Skip to content

Commit

Permalink
Adding test
Browse files Browse the repository at this point in the history
- applying Vc build system settings to relevant files only
  • Loading branch information
hkaiser committed Sep 15, 2016
1 parent 09934f7 commit 98583d8
Show file tree
Hide file tree
Showing 15 changed files with 293 additions and 105 deletions.
4 changes: 4 additions & 0 deletions cmake/HPX_SetupVc.cmake
Expand Up @@ -39,6 +39,10 @@ if(Vc_FOUND)
link_directories(${Vc_LIB_DIR})
hpx_libraries(${Vc_LIBRARIES})

foreach(_flag ${Vc_DEFINITIONS})
add_definitions(${_flag})
endforeach()

hpx_add_config_define(HPX_HAVE_VC_DATAPAR)
endif()

15 changes: 10 additions & 5 deletions hpx/config/compiler_specific.hpp
Expand Up @@ -60,11 +60,12 @@
#endif

#if defined(_MSC_VER)
# define HPX_MSVC _MSC_VER
# define HPX_WINDOWS
# if defined(__NVCC__)
# define HPX_SINGLE_INHERITANCE __single_inheritance
# endif
# define HPX_MSVC _MSC_VER
# define HPX_WINDOWS
# if defined(__NVCC__)
# define HPX_SINGLE_INHERITANCE __single_inheritance
# endif
# define HPX_CDECL __cdecl
#endif

#if defined(__MINGW32__)
Expand All @@ -84,5 +85,9 @@
#define HPX_SINGLE_INHERITANCE /* empty */
#endif

#if !defined(HPX_CDECL)
#define HPX_CDECL
#endif

#endif

2 changes: 1 addition & 1 deletion hpx/hpx_main_impl.hpp
Expand Up @@ -25,7 +25,7 @@
//
// Note: this function is intentionally not marked as inline
//
int main(int argc, char** argv)
int HPX_CDECL main(int argc, char** argv)
{
// allow for unknown options
std::vector<std::string> const cfg = {
Expand Down
43 changes: 42 additions & 1 deletion hpx/parallel/algorithms/detail/predicates.hpp
Expand Up @@ -260,7 +260,7 @@ namespace hpx { namespace parallel { HPX_INLINE_NAMESPACE(v1) { namespace detail
{
template <typename T1, typename T2>
auto operator()(T1 const& t1, T2 const& t2) const
-> decltype(t1 == t2)
-> decltype(t1 < t2)
{
return t1 < t2;
}
Expand All @@ -285,6 +285,47 @@ namespace hpx { namespace parallel { HPX_INLINE_NAMESPACE(v1) { namespace detail
return (std::max)(t1, t2);
}
};

///////////////////////////////////////////////////////////////////////////
struct plus
{
template <typename T1, typename T2>
auto operator()(T1 const& t1, T2 const& t2) const
-> decltype(t1 + t2)
{
return t1 + t2;
}
};

struct minus
{
template <typename T1, typename T2>
auto operator()(T1 const& t1, T2 const& t2) const
-> decltype(t1 - t2)
{
return t1 - t2;
}
};

struct multiplies
{
template <typename T1, typename T2>
auto operator()(T1 const& t1, T2 const& t2) const
-> decltype(t1 * t2)
{
return t1 * t2;
}
};

struct divides
{
template <typename T1, typename T2>
auto operator()(T1 const& t1, T2 const& t2) const
-> decltype(t1 / t2)
{
return t1 / t2;
}
};
}}}}

#endif
2 changes: 1 addition & 1 deletion hpx/parallel/algorithms/inner_product.hpp
Expand Up @@ -312,7 +312,7 @@ namespace hpx { namespace parallel { HPX_INLINE_NAMESPACE(v1)

return detail::inner_product<T>().call(
std::forward<ExPolicy>(policy), is_seq(), first1, last1, first2,
std::move(init), std::plus<T>(), std::multiplies<T>());
std::move(init), detail::plus(), detail::multiplies());
}

///////////////////////////////////////////////////////////////////////////
Expand Down
85 changes: 45 additions & 40 deletions hpx/parallel/datapar/detail/iterator_helpers.hpp
Expand Up @@ -138,7 +138,7 @@ namespace hpx { namespace parallel { namespace util
return hpx::util::invoke(f, &tmp);
}

template <typename F, typename Iter>
template <typename F>
HPX_HOST_DEVICE HPX_FORCEINLINE
static typename std::result_of<F&&(V*)>::type
callv(F && f, Iter& it)
Expand Down Expand Up @@ -169,8 +169,8 @@ namespace hpx { namespace parallel { namespace util
{
V11 tmp1(std::addressof(*it1), Vc::Aligned);
V12 tmp2(std::addressof(*it2), Vc::Aligned);
it1 += V11::Size;
it2 += V12::Size;
std::advance(it1, V11::Size);
std::advance(it2, V12::Size);
return hpx::util::invoke(f, &tmp1, &tmp2);
}

Expand All @@ -179,17 +179,19 @@ namespace hpx { namespace parallel { namespace util
static typename std::result_of<F&&(V1*, V2*)>::type
callv(F && f, Iter1& it1, Iter2& it2)
{
// if (data_alignment(it1) || data_alignment(it2))
// {
// V1 tmp1(std::addressof(*it1), Vc::Unaligned);
// V2 tmp2(std::addressof(*it2), Vc::Unaligned);
// return hpx::util::invoke(f, &tmp1, &tmp2);
// }
if (data_alignment(it1) || data_alignment(it2))
{
V1 tmp1(std::addressof(*it1), Vc::Unaligned);
V2 tmp2(std::addressof(*it2), Vc::Unaligned);
std::advance(it1, V1::Size);
std::advance(it2, V2::Size);
return hpx::util::invoke(f, &tmp1, &tmp2);
}

V1 tmp1(std::addressof(*it1), Vc::Aligned);
V2 tmp2(std::addressof(*it2), Vc::Aligned);
it1 += V1::Size;
it2 += V2::Size;
std::advance(it1, V1::Size);
std::advance(it2, V2::Size);
return hpx::util::invoke(f, &tmp1, &tmp2);
}
};
Expand All @@ -210,8 +212,8 @@ namespace hpx { namespace parallel { namespace util
auto ret = hpx::util::invoke(f, &tmp);
ret.store(std::addressof(*dest), Vc::Aligned);

it += V1::Size;
dest += ret.size();
std::advance(it, V1::Size);
std::advance(dest, ret.size());
}

template <typename F, typename InIter1, typename InIter2,
Expand All @@ -232,9 +234,9 @@ namespace hpx { namespace parallel { namespace util
auto ret = hpx::util::invoke(f, &tmp1, &tmp2);
ret.store(std::addressof(*dest), Vc::Aligned);

it1 += V1::Size;
it2 += V2::Size;
dest += ret.size();
std::advance(it1, V1::Size);
std::advance(it2, V2::Size);
std::advance(dest, ret.size());
}

///////////////////////////////////////////////////////////////////
Expand All @@ -247,19 +249,22 @@ namespace hpx { namespace parallel { namespace util

typedef Vc::Vector<value_type> V;

// if (data_alignment(it) || data_alignment(dest))
// {
// V tmp(std::addressof(*it), Vc::Unaligned);
// auto ret = hpx::util::invoke(f, &tmp);
// ret.store(std::addressof(*dest), Vc::Unaligned);
// }

V tmp(std::addressof(*it), Vc::Aligned);
auto ret = hpx::util::invoke(f, &tmp);
ret.store(std::addressof(*dest), Vc::Aligned);

it += V::Size;
dest += ret.size();
if (data_alignment(it) || data_alignment(dest))
{
V tmp(std::addressof(*it), Vc::Unaligned);
auto ret = hpx::util::invoke(f, &tmp);
ret.store(std::addressof(*dest), Vc::Unaligned);
std::advance(dest, ret.size());
}
else
{
V tmp(std::addressof(*it), Vc::Aligned);
auto ret = hpx::util::invoke(f, &tmp);
ret.store(std::addressof(*dest), Vc::Aligned);
std::advance(dest, ret.size());
}

std::advance(it, V::Size);
}

template <typename F, typename InIter1, typename InIter2,
Expand All @@ -275,23 +280,23 @@ namespace hpx { namespace parallel { namespace util
typedef Vc::Vector<value1_type> V1;
typedef Vc::Vector<value2_type> V2;

// if (data_alignment(it1) || data_alignment(it2) ||
// data_alignment(dest))
// {
// V1 tmp1(std::addressof(*it1), Vc::Unaligned);
// V2 tmp2(std::addressof(*it2), Vc::Unaligned);
// auto ret = hpx::util::invoke(f, &tmp1, &tmp2);
// ret.store(std::addressof(*dest), Vc::Unaligned);
// }
if (data_alignment(it1) || data_alignment(it2) ||
data_alignment(dest))
{
V1 tmp1(std::addressof(*it1), Vc::Unaligned);
V2 tmp2(std::addressof(*it2), Vc::Unaligned);
auto ret = hpx::util::invoke(f, &tmp1, &tmp2);
ret.store(std::addressof(*dest), Vc::Unaligned);
}

V1 tmp1(std::addressof(*it1), Vc::Aligned);
V2 tmp2(std::addressof(*it2), Vc::Aligned);
auto ret = hpx::util::invoke(f, &tmp1, &tmp2);
ret.store(std::addressof(*dest), Vc::Aligned);

it1 += V1::Size;
it2 += V2::Size;
dest += ret.size();
std::advance(it1, V1::Size);
std::advance(it2, V2::Size);
std::advance(dest, ret.size());
}
};
}
Expand Down
15 changes: 12 additions & 3 deletions hpx/parallel/datapar/loop.hpp
Expand Up @@ -124,7 +124,7 @@ namespace hpx { namespace parallel { namespace util
bool loop_optimization(parallel::v1::datapar_task_execution_policy,
Iter const& first1, Iter const& last1)
{
return detail::loop_optimization<Iter1>::call(first1, last1);
return detail::loop_optimization<Iter>::call(first1, last1);
}

///////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -366,9 +366,9 @@ namespace hpx { namespace parallel { namespace util
!iterators_datapar_compatible<InIter1, InIter2>::value ||
!iterator_datapar_compatible<InIter1>::value ||
!iterator_datapar_compatible<InIter2>::value,
std::pair<Iter1, Iter2>
std::pair<InIter1, InIter2>
>::type
call(Iter1 it1, Iter1 last1, Iter2 it2, F && f)
call(InIter1 it1, InIter1 last1, InIter2 it2, F && f)
{
return std::make_pair(std::move(it1), std::move(it2));
}
Expand All @@ -385,6 +385,15 @@ namespace hpx { namespace parallel { namespace util
std::forward<F>(f));
}

template <typename Iter1, typename Iter2, typename F>
HPX_HOST_DEVICE HPX_FORCEINLINE std::pair<Iter1, Iter2>
loop2(parallel::v1::datapar_task_execution_policy, Iter1 first1, Iter1 last1,
Iter2 first2, F && f)
{
return detail::datapar_loop2<Iter1, Iter2>::call(first1, last1, first2,
std::forward<F>(f));
}

///////////////////////////////////////////////////////////////////////////
template <typename ExPolicy, typename Iter, typename F>
HPX_HOST_DEVICE HPX_FORCEINLINE Iter
Expand Down
2 changes: 1 addition & 1 deletion src/runtime.cpp
Expand Up @@ -151,7 +151,7 @@ namespace hpx
message_handler_registrations;

///////////////////////////////////////////////////////////////////////////
HPX_EXPORT void new_handler()
HPX_EXPORT void HPX_CDECL new_handler()
{
HPX_THROW_EXCEPTION(out_of_memory, "new_handler",
"new allocator failed to allocate memory");
Expand Down
12 changes: 12 additions & 0 deletions tests/performance/local/CMakeLists.txt
Expand Up @@ -161,6 +161,18 @@ if(HPX_WITH_EXAMPLES_OPENMP)
PROPERTIES LINK_FLAGS ${OpenMP_CXX_FLAGS})
endif()

if(HPX_WITH_VC_DATAPAR)
if(NOT MSVC)
foreach(_flag ${Vc_COMPILE_FLAGS})
set_target_properties(inner_product_exe PROPERTIES COMPILE_FLAGS ${_flag})
endforeach()
endif()

foreach(_flag ${Vc_ARCHITECTURE_FLAGS})
set_target_properties(inner_product_exe PROPERTIES COMPILE_FLAGS ${_flag})
endforeach()
endif()

add_hpx_pseudo_target(tests.performance.local.htts_v2)
add_subdirectory(htts_v2)
add_hpx_pseudo_dependencies(tests.performance tests.performance.local.htts_v2)
Expand Down
8 changes: 6 additions & 2 deletions tests/performance/local/inner_product.cpp
Expand Up @@ -90,10 +90,14 @@ int hpx_main(boost::program_options::variables_map& vm)
}
else
{
std::uint64_t tr_time_par = measure_inner_product(
test_count, hpx::parallel::par, data1, data2);
// warm up caches
measure_inner_product(hpx::parallel::par, data1, data2);

// do measurements
std::uint64_t tr_time_datapar = measure_inner_product(
test_count, hpx::parallel::datapar_execution, data1, data2);
std::uint64_t tr_time_par = measure_inner_product(
test_count, hpx::parallel::par, data1, data2);

if (csvoutput)
{
Expand Down

0 comments on commit 98583d8

Please sign in to comment.