Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dynamic dispatch infrastructure for class member functions #37257

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
52 changes: 50 additions & 2 deletions src/AggregateFunctions/AggregateFunctionSum.h
Expand Up @@ -14,6 +14,7 @@
#include <AggregateFunctions/IAggregateFunction.h>

#include <Common/config.h>
#include <Common/TargetSpecific.h>

#if USE_EMBEDDED_COMPILER
# include <llvm/IR/IRBuilder.h>
Expand Down Expand Up @@ -58,8 +59,11 @@ struct AggregateFunctionSumData
}

/// Vectorized version
MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(addManyImpl,
MULTITARGET_FH(
template <typename Value>
void NO_SANITIZE_UNDEFINED NO_INLINE addMany(const Value * __restrict ptr, size_t start, size_t end)
void NO_SANITIZE_UNDEFINED NO_INLINE
), /*addManyImpl*/ MULTITARGET_FB((const Value * __restrict ptr, size_t start, size_t end) /// NOLINT
{
ptr += start;
size_t count = end - start;
Expand Down Expand Up @@ -95,11 +99,34 @@ struct AggregateFunctionSumData
++ptr;
}
Impl::add(sum, local_sum);
})
)

/// Vectorized version
template <typename Value>
void NO_INLINE addMany(const Value * __restrict ptr, size_t start, size_t end)
{
#if USE_MULTITARGET_CODE
if (isArchSupported(TargetArch::AVX2))
{
addManyImplAVX2(ptr, start, end);
return;
}
else if (isArchSupported(TargetArch::SSE42))
{
addManyImplSSE42(ptr, start, end);
return;
}
#endif

addManyImpl(ptr, start, end);
}

MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(addManyConditionalInternalImpl,
MULTITARGET_FH(
template <typename Value, bool add_if_zero>
void NO_SANITIZE_UNDEFINED NO_INLINE
addManyConditionalInternal(const Value * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
), /*addManyConditionalInternalImpl*/ MULTITARGET_FB((const Value * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end) /// NOLINT
{
ptr += start;
size_t count = end - start;
Expand Down Expand Up @@ -163,6 +190,27 @@ struct AggregateFunctionSumData
++condition_map;
}
Impl::add(sum, local_sum);
})
)

/// Vectorized version
template <typename Value, bool add_if_zero>
void NO_INLINE addManyConditionalInternal(const Value * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
{
#if USE_MULTITARGET_CODE
if (isArchSupported(TargetArch::AVX2))
{
addManyConditionalInternalImplAVX2<Value, add_if_zero>(ptr, condition_map, start, end);
return;
}
else if (isArchSupported(TargetArch::SSE42))
{
addManyConditionalInternalImplSSE42<Value, add_if_zero>(ptr, condition_map, start, end);
return;
}
#endif

addManyConditionalInternalImpl<Value, add_if_zero>(ptr, condition_map, start, end);
}

template <typename Value>
Expand Down
84 changes: 84 additions & 0 deletions src/Common/TargetSpecific.h
Expand Up @@ -93,6 +93,13 @@ String toString(TargetArch arch);
#define USE_MULTITARGET_CODE 1

#if defined(__clang__)

#define AVX512_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f")))
#define AVX2_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2")))
#define AVX_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx"))
#define SSE42_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt")))
kitaisreal marked this conversation as resolved.
Show resolved Hide resolved
#define DEFAULT_FUNCTION_SPECIFIC_ATTRIBUTE

# define BEGIN_AVX512F_SPECIFIC_CODE \
_Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f\"))),apply_to=function)")
# define BEGIN_AVX2_SPECIFIC_CODE \
Expand All @@ -109,6 +116,13 @@ String toString(TargetArch arch);
*/
# define DUMMY_FUNCTION_DEFINITION [[maybe_unused]] void _dummy_function_definition();
#else

#define AVX512_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,tune=native")))
#define AVX2_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,tune=native")))
#define AVX_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,tune=native")))
#define SSE42_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt",tune=native))))
#define DEFAULT_FUNCTION_SPECIFIC_ATTRIBUTE

# define BEGIN_AVX512F_SPECIFIC_CODE \
_Pragma("GCC push_options") \
_Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,tune=native\")")
Expand Down Expand Up @@ -212,4 +226,74 @@ DECLARE_AVX512F_SPECIFIC_CODE(
constexpr auto BuildArch = TargetArch::AVX512F; /// NOLINT
) // DECLARE_AVX512F_SPECIFIC_CODE

/** Runtime Dispatch helpers for class members.
*
* Example of usage:
*
* class TestClass
* {
* public:
* MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(testFunctionImpl,
* MULTITARGET_FH(int), /\*testFunction*\/ MULTITARGET_FB((int value)
* {
* return value;
* })
* )
*
* void testFunction(int value) {
* if (isArchSupported(TargetArch::AVX2))
* {
* testFunctionImplAVX2(value);
* }
* else if (isArchSupported(TargetArch::SSE42))
* {
* testFunctionImplSSE42(value);
* }
* else
* {
* testFunction(value);
* }
* }
*};
*
*/

/// Function header
#define MULTITARGET_FH(...) __VA_ARGS__

/// Function body
#define MULTITARGET_FB(...) __VA_ARGS__

#if ENABLE_MULTITARGET_CODE && defined(__GNUC__) && defined(__x86_64__)

/// NOLINTNEXTLINE
#define MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(name, FUNCTION_HEADER, FUNCTION_BODY) \
FUNCTION_HEADER \
\
AVX2_FUNCTION_SPECIFIC_ATTRIBUTE \
name##AVX2 \
FUNCTION_BODY \
\
FUNCTION_HEADER \
\
AVX2_FUNCTION_SPECIFIC_ATTRIBUTE \
name##SSE42 \
kitaisreal marked this conversation as resolved.
Show resolved Hide resolved
FUNCTION_BODY \
\
FUNCTION_HEADER \
\
name \
FUNCTION_BODY \

#else

/// NOLINTNEXTLINE
#define MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(name, FUNCTION_HEADER, FUNCTION_BODY) \
FUNCTION_HEADER \
\
name \
FUNCTION_BODY \

#endif

}