Skip to content

Commit

Permalink
Merge pull request #58231 from Algunenano/new_minmax
Browse files Browse the repository at this point in the history
Speedup MIN and MAX for native types
  • Loading branch information
robot-ch-test-poll2 committed Dec 28, 2023
2 parents 1b6c5f6 + 68787ce commit 04178a9
Show file tree
Hide file tree
Showing 9 changed files with 784 additions and 12 deletions.
118 changes: 115 additions & 3 deletions src/AggregateFunctions/AggregateFunctionMax.cpp
@@ -1,7 +1,7 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/HelpersMinMaxAny.h>
#include <AggregateFunctions/FactoryHelpers.h>

#include <AggregateFunctions/HelpersMinMaxAny.h>
#include <AggregateFunctions/findNumeric.h>

namespace DB
{
Expand All @@ -10,10 +10,122 @@ struct Settings;
namespace
{

template <typename Data>
class AggregateFunctionsSingleValueMax final : public AggregateFunctionsSingleValue<Data>
{
using Parent = AggregateFunctionsSingleValue<Data>;

public:
explicit AggregateFunctionsSingleValueMax(const DataTypePtr & type) : Parent(type) { }

/// Specializations for native numeric types
ALWAYS_INLINE inline void addBatchSinglePlace(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
Arena * arena,
ssize_t if_argument_pos) const override;

ALWAYS_INLINE inline void addBatchSinglePlaceNotNull(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
const UInt8 * __restrict null_map,
Arena * arena,
ssize_t if_argument_pos) const override;
};

// NOLINTBEGIN(bugprone-macro-parentheses)
#define SPECIALIZE(TYPE) \
template <> \
void AggregateFunctionsSingleValueMax<typename DB::AggregateFunctionMaxData<SingleValueDataFixed<TYPE>>>::addBatchSinglePlace( \
size_t row_begin, \
size_t row_end, \
AggregateDataPtr __restrict place, \
const IColumn ** __restrict columns, \
Arena *, \
ssize_t if_argument_pos) const \
{ \
const auto & column = assert_cast<const DB::AggregateFunctionMaxData<SingleValueDataFixed<TYPE>>::ColVecType &>(*columns[0]); \
std::optional<TYPE> opt; \
if (if_argument_pos >= 0) \
{ \
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData(); \
opt = findNumericMaxIf(column.getData().data(), flags.data(), row_begin, row_end); \
} \
else \
opt = findNumericMax(column.getData().data(), row_begin, row_end); \
if (opt.has_value()) \
this->data(place).changeIfGreater(opt.value()); \
}
// NOLINTEND(bugprone-macro-parentheses)

FOR_BASIC_NUMERIC_TYPES(SPECIALIZE)
#undef SPECIALIZE

template <typename Data>
void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlace(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
Arena * arena,
ssize_t if_argument_pos) const
{
return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
}

// NOLINTBEGIN(bugprone-macro-parentheses)
#define SPECIALIZE(TYPE) \
template <> \
void AggregateFunctionsSingleValueMax<typename DB::AggregateFunctionMaxData<SingleValueDataFixed<TYPE>>>::addBatchSinglePlaceNotNull( \
size_t row_begin, \
size_t row_end, \
AggregateDataPtr __restrict place, \
const IColumn ** __restrict columns, \
const UInt8 * __restrict null_map, \
Arena *, \
ssize_t if_argument_pos) const \
{ \
const auto & column = assert_cast<const DB::AggregateFunctionMaxData<SingleValueDataFixed<TYPE>>::ColVecType &>(*columns[0]); \
std::optional<TYPE> opt; \
if (if_argument_pos >= 0) \
{ \
const auto * if_flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData().data(); \
auto final_flags = std::make_unique<UInt8[]>(row_end); \
for (size_t i = row_begin; i < row_end; ++i) \
final_flags[i] = (!null_map[i]) & !!if_flags[i]; \
opt = findNumericMaxIf(column.getData().data(), final_flags.get(), row_begin, row_end); \
} \
else \
opt = findNumericMaxNotNull(column.getData().data(), null_map, row_begin, row_end); \
if (opt.has_value()) \
this->data(place).changeIfGreater(opt.value()); \
}
// NOLINTEND(bugprone-macro-parentheses)

FOR_BASIC_NUMERIC_TYPES(SPECIALIZE)
#undef SPECIALIZE

template <typename Data>
void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlaceNotNull(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
const UInt8 * __restrict null_map,
Arena * arena,
ssize_t if_argument_pos) const
{
return Parent::addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos);
}

AggregateFunctionPtr createAggregateFunctionMax(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionMaxData>(name, argument_types, parameters, settings));
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValueMax, AggregateFunctionMaxData>(name, argument_types, parameters, settings));
}

AggregateFunctionPtr createAggregateFunctionArgMax(
Expand Down
118 changes: 116 additions & 2 deletions src/AggregateFunctions/AggregateFunctionMin.cpp
@@ -1,6 +1,7 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/HelpersMinMaxAny.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/HelpersMinMaxAny.h>
#include <AggregateFunctions/findNumeric.h>


namespace DB
Expand All @@ -10,10 +11,123 @@ struct Settings;
namespace
{

template <typename Data>
class AggregateFunctionsSingleValueMin final : public AggregateFunctionsSingleValue<Data>
{
using Parent = AggregateFunctionsSingleValue<Data>;

public:
explicit AggregateFunctionsSingleValueMin(const DataTypePtr & type) : Parent(type) { }

/// Specializations for native numeric types
ALWAYS_INLINE inline void addBatchSinglePlace(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
Arena * arena,
ssize_t if_argument_pos) const override;

ALWAYS_INLINE inline void addBatchSinglePlaceNotNull(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
const UInt8 * __restrict null_map,
Arena * arena,
ssize_t if_argument_pos) const override;
};

// NOLINTBEGIN(bugprone-macro-parentheses)
#define SPECIALIZE(TYPE) \
template <> \
void AggregateFunctionsSingleValueMin<typename DB::AggregateFunctionMinData<SingleValueDataFixed<TYPE>>>::addBatchSinglePlace( \
size_t row_begin, \
size_t row_end, \
AggregateDataPtr __restrict place, \
const IColumn ** __restrict columns, \
Arena *, \
ssize_t if_argument_pos) const \
{ \
const auto & column = assert_cast<const DB::AggregateFunctionMinData<SingleValueDataFixed<TYPE>>::ColVecType &>(*columns[0]); \
std::optional<TYPE> opt; \
if (if_argument_pos >= 0) \
{ \
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData(); \
opt = findNumericMinIf(column.getData().data(), flags.data(), row_begin, row_end); \
} \
else \
opt = findNumericMin(column.getData().data(), row_begin, row_end); \
if (opt.has_value()) \
this->data(place).changeIfLess(opt.value()); \
}
// NOLINTEND(bugprone-macro-parentheses)

FOR_BASIC_NUMERIC_TYPES(SPECIALIZE)
#undef SPECIALIZE

template <typename Data>
void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlace(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
Arena * arena,
ssize_t if_argument_pos) const
{
return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
}

// NOLINTBEGIN(bugprone-macro-parentheses)
#define SPECIALIZE(TYPE) \
template <> \
void AggregateFunctionsSingleValueMin<typename DB::AggregateFunctionMinData<SingleValueDataFixed<TYPE>>>::addBatchSinglePlaceNotNull( \
size_t row_begin, \
size_t row_end, \
AggregateDataPtr __restrict place, \
const IColumn ** __restrict columns, \
const UInt8 * __restrict null_map, \
Arena *, \
ssize_t if_argument_pos) const \
{ \
const auto & column = assert_cast<const DB::AggregateFunctionMinData<SingleValueDataFixed<TYPE>>::ColVecType &>(*columns[0]); \
std::optional<TYPE> opt; \
if (if_argument_pos >= 0) \
{ \
const auto * if_flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData().data(); \
auto final_flags = std::make_unique<UInt8[]>(row_end); \
for (size_t i = row_begin; i < row_end; ++i) \
final_flags[i] = (!null_map[i]) & !!if_flags[i]; \
opt = findNumericMinIf(column.getData().data(), final_flags.get(), row_begin, row_end); \
} \
else \
opt = findNumericMinNotNull(column.getData().data(), null_map, row_begin, row_end); \
if (opt.has_value()) \
this->data(place).changeIfLess(opt.value()); \
}
// NOLINTEND(bugprone-macro-parentheses)

FOR_BASIC_NUMERIC_TYPES(SPECIALIZE)
#undef SPECIALIZE

template <typename Data>
void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlaceNotNull(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
const UInt8 * __restrict null_map,
Arena * arena,
ssize_t if_argument_pos) const
{
return Parent::addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos);
}

AggregateFunctionPtr createAggregateFunctionMin(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionMinData>(name, argument_types, parameters, settings));
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValueMin, AggregateFunctionMinData>(
name, argument_types, parameters, settings));
}

AggregateFunctionPtr createAggregateFunctionArgMin(
Expand Down
30 changes: 24 additions & 6 deletions src/AggregateFunctions/AggregateFunctionMinMaxAny.h
Expand Up @@ -43,14 +43,12 @@ namespace ErrorCodes
template <typename T>
struct SingleValueDataFixed
{
private:
using Self = SingleValueDataFixed;
using ColVecType = ColumnVectorOrDecimal<T>;

bool has_value = false; /// We need to remember if at least one value has been passed. This is necessary for AggregateFunctionIf.
T value = T{};

public:
static constexpr bool result_is_nullable = false;
static constexpr bool should_skip_null_arguments = true;
static constexpr bool is_any = false;
Expand Down Expand Up @@ -157,6 +155,15 @@ struct SingleValueDataFixed
return false;
}

void changeIfLess(T from)
{
if (!has() || from < value)
{
has_value = true;
value = from;
}
}

bool changeIfGreater(const IColumn & column, size_t row_num, Arena * arena)
{
if (!has() || assert_cast<const ColVecType &>(column).getData()[row_num] > value)
Expand All @@ -179,6 +186,15 @@ struct SingleValueDataFixed
return false;
}

void changeIfGreater(T & from)
{
if (!has() || from > value)
{
has_value = true;
value = from;
}
}

bool isEqualTo(const Self & to) const
{
return has() && to.value == value;
Expand Down Expand Up @@ -448,7 +464,6 @@ struct SingleValueDataFixed
}

#endif

};

struct Compatibility
Expand Down Expand Up @@ -1214,7 +1229,7 @@ struct AggregateFunctionAnyHeavyData : Data


template <typename Data>
class AggregateFunctionsSingleValue final : public IAggregateFunctionDataHelper<Data, AggregateFunctionsSingleValue<Data>>
class AggregateFunctionsSingleValue : public IAggregateFunctionDataHelper<Data, AggregateFunctionsSingleValue<Data>>
{
static constexpr bool is_any = Data::is_any;

Expand All @@ -1230,8 +1245,11 @@ class AggregateFunctionsSingleValue final : public IAggregateFunctionDataHelper<
|| StringRef(Data::name()) == StringRef("max"))
{
if (!type->isComparable())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of aggregate function {} "
"because the values of that data type are not comparable", type->getName(), getName());
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument of aggregate function {} because the values of that data type are not comparable",
type->getName(),
Data::name());
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/AggregateFunctions/AggregateFunctionSum.h
Expand Up @@ -504,7 +504,7 @@ class AggregateFunctionSum final : public IAggregateFunctionDataHelper<Data, Agg
const auto * if_flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData().data();
auto final_flags = std::make_unique<UInt8[]>(row_end);
for (size_t i = row_begin; i < row_end; ++i)
final_flags[i] = (!null_map[i]) & if_flags[i];
final_flags[i] = (!null_map[i]) & !!if_flags[i];

this->data(place).addManyConditional(column.getData().data(), final_flags.get(), row_begin, row_end);
}
Expand Down
15 changes: 15 additions & 0 deletions src/AggregateFunctions/findNumeric.cpp
@@ -0,0 +1,15 @@
#include <AggregateFunctions/findNumeric.h>

namespace DB
{
#define INSTANTIATION(T) \
template std::optional<T> findNumericMin(const T * __restrict ptr, size_t start, size_t end); \
template std::optional<T> findNumericMinNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
template std::optional<T> findNumericMinIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
template std::optional<T> findNumericMax(const T * __restrict ptr, size_t start, size_t end); \
template std::optional<T> findNumericMaxNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
template std::optional<T> findNumericMaxIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end);

FOR_BASIC_NUMERIC_TYPES(INSTANTIATION)
#undef INSTANTIATION
}

0 comments on commit 04178a9

Please sign in to comment.