Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move-away "uniqCombined" as a separate aggregated function #3406

Merged
merged 20 commits into from Nov 22, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion dbms/scripts/gen-bias-data.py
@@ -1,4 +1,4 @@
#!/usr/bin/python3.4
#!/usr/bin/python3
# -*- coding: utf-8 -*-

import sys
Expand Down
3 changes: 0 additions & 3 deletions dbms/src/AggregateFunctions/AggregateFunctionUniq.cpp
Expand Up @@ -130,9 +130,6 @@ void registerAggregateFunctionsUniq(AggregateFunctionFactory & factory)

factory.registerFunction("uniqExact",
createAggregateFunctionUniq<true, AggregateFunctionUniqExactData, AggregateFunctionUniqExactData<String>>);

factory.registerFunction("uniqCombined",
createAggregateFunctionUniq<false, AggregateFunctionUniqCombinedData, AggregateFunctionUniqCombinedData<UInt64>>);
}

}
88 changes: 0 additions & 88 deletions dbms/src/AggregateFunctions/AggregateFunctionUniq.h
Expand Up @@ -22,7 +22,6 @@
#include <Common/typeid_cast.h>

#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/UniqCombinedBiasData.h>
#include <AggregateFunctions/UniqVariadicHash.h>


Expand Down Expand Up @@ -124,46 +123,6 @@ struct AggregateFunctionUniqExactData<String>
static String getName() { return "uniqExact"; }
};

template <typename T>
struct AggregateFunctionUniqCombinedData
{
using Key = UInt32;
using Set = CombinedCardinalityEstimator<
Key,
HashSet<Key, TrivialHash, HashTableGrower<>>,
16,
14,
17,
TrivialHash,
UInt32,
HyperLogLogBiasEstimator<UniqCombinedBiasData>,
HyperLogLogMode::FullFeatured>;

Set set;

static String getName() { return "uniqCombined"; }
};

template <>
struct AggregateFunctionUniqCombinedData<String>
{
using Key = UInt64;
using Set = CombinedCardinalityEstimator<
Key,
HashSet<Key, TrivialHash, HashTableGrower<>>,
16,
14,
17,
TrivialHash,
UInt64,
HyperLogLogBiasEstimator<UniqCombinedBiasData>,
HyperLogLogMode::FullFeatured>;

Set set;

static String getName() { return "uniqCombined"; }
};


namespace detail
{
Expand Down Expand Up @@ -199,39 +158,6 @@ template <> struct AggregateFunctionUniqTraits<Float64>
}
};

/** Hash function for uniqCombined.
*/
template <typename T> struct AggregateFunctionUniqCombinedTraits
{
static UInt32 hash(T x) { return static_cast<UInt32>(intHash64(x)); }
};

template <> struct AggregateFunctionUniqCombinedTraits<UInt128>
{
static UInt32 hash(UInt128 x)
{
return sipHash64(x);
}
};

template <> struct AggregateFunctionUniqCombinedTraits<Float32>
{
static UInt32 hash(Float32 x)
{
UInt64 res = ext::bit_cast<UInt64>(x);
return static_cast<UInt32>(intHash64(res));
}
};

template <> struct AggregateFunctionUniqCombinedTraits<Float64>
{
static UInt32 hash(Float64 x)
{
UInt64 res = ext::bit_cast<UInt64>(x);
return static_cast<UInt32>(intHash64(res));
}
};


/** The structure for the delegation work to add one element to the `uniq` aggregate functions.
* Used for partial specialization to add strings.
Expand All @@ -255,19 +181,6 @@ struct OneAdder
data.set.insert(CityHash_v1_0_2::CityHash64(value.data, value.size));
}
}
else if constexpr (std::is_same_v<Data, AggregateFunctionUniqCombinedData<T>>)
{
if constexpr (!std::is_same_v<T, String>)
{
const auto & value = static_cast<const ColumnVector<T> &>(column).getData()[row_num];
data.set.insert(AggregateFunctionUniqCombinedTraits<T>::hash(value));
}
else
{
StringRef value = column.getDataAt(row_num);
data.set.insert(CityHash_v1_0_2::CityHash64(value.data, value.size));
}
}
else if constexpr (std::is_same_v<Data, AggregateFunctionUniqExactData<T>>)
{
if constexpr (!std::is_same_v<T, String>)
Expand Down Expand Up @@ -387,5 +300,4 @@ class AggregateFunctionUniqVariadic final : public IAggregateFunctionDataHelper<
const char * getHeaderFilePath() const override { return __FILE__; }
};


}
127 changes: 127 additions & 0 deletions dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp
@@ -0,0 +1,127 @@
#include <AggregateFunctions/AggregateFunctionUniqCombined.h>

#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/Helpers.h>

#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>

namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ARGUMENT_OUT_OF_BOUND;
}

namespace
{
template <UInt8 K>
struct WithK
{
template <typename T>
using AggregateFunction = AggregateFunctionUniqCombined<T, K>;

template <bool is_exact, bool argument_is_tuple>
using AggregateFunctionVariadic = AggregateFunctionUniqCombinedVariadic<is_exact, argument_is_tuple, K>;
};

template <UInt8 K>
AggregateFunctionPtr createAggregateFunctionWithK(const DataTypes & argument_types)
{
/// We use exact hash function if the arguments are not contiguous in memory, because only exact hash function has support for this case.
bool use_exact_hash_function = !isAllArgumentsContiguousInMemory(argument_types);

if (argument_types.size() == 1)
{
const IDataType & argument_type = *argument_types[0];

AggregateFunctionPtr res(createWithNumericType<WithK<K>::template AggregateFunction>(*argument_types[0]));

WhichDataType which(argument_type);
if (res)
return res;
else if (which.isDate())
return std::make_shared<typename WithK<K>::template AggregateFunction<DataTypeDate::FieldType>>();
else if (which.isDateTime())
return std::make_shared<typename WithK<K>::template AggregateFunction<DataTypeDateTime::FieldType>>();
else if (which.isStringOrFixedString())
return std::make_shared<typename WithK<K>::template AggregateFunction<String>>();
else if (which.isUUID())
return std::make_shared<typename WithK<K>::template AggregateFunction<DataTypeUUID::FieldType>>();
else if (which.isTuple())
{
if (use_exact_hash_function)
return std::make_shared<typename WithK<K>::template AggregateFunctionVariadic<true, true>>(argument_types);
else
return std::make_shared<typename WithK<K>::template AggregateFunctionVariadic<false, true>>(argument_types);
}
}

/// "Variadic" method also works as a fallback generic case for a single argument.
if (use_exact_hash_function)
return std::make_shared<typename WithK<K>::template AggregateFunctionVariadic<true, false>>(argument_types);
else
return std::make_shared<typename WithK<K>::template AggregateFunctionVariadic<false, false>>(argument_types);
}

AggregateFunctionPtr createAggregateFunctionUniqCombined(
const std::string & name, const DataTypes & argument_types, const Array & params)
{
/// log2 of the number of cells in HyperLogLog.
/// Reasonable default value, selected to be comparable in quality with "uniq" aggregate function.
UInt8 precision = 17;

if (!params.empty())
{
if (params.size() != 1)
throw Exception(
"Aggregate function " + name + " requires one parameter or less.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);

UInt64 precision_param = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]);

// This range is hardcoded below
if (precision_param > 20 || precision_param < 12)
throw Exception(
"Parameter for aggregate function " + name + "is out or range: [12, 20].", ErrorCodes::ARGUMENT_OUT_OF_BOUND);

precision = precision_param;
}

if (argument_types.empty())
throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);

switch (precision)
{
case 12:
return createAggregateFunctionWithK<12>(argument_types);
case 13:
return createAggregateFunctionWithK<13>(argument_types);
case 14:
return createAggregateFunctionWithK<14>(argument_types);
case 15:
return createAggregateFunctionWithK<15>(argument_types);
case 16:
return createAggregateFunctionWithK<16>(argument_types);
case 17:
return createAggregateFunctionWithK<17>(argument_types);
case 18:
return createAggregateFunctionWithK<18>(argument_types);
case 19:
return createAggregateFunctionWithK<19>(argument_types);
case 20:
return createAggregateFunctionWithK<20>(argument_types);
}

__builtin_unreachable();
}

} // namespace

void registerAggregateFunctionUniqCombined(AggregateFunctionFactory & factory)
{
factory.registerFunction("uniqCombined", createAggregateFunctionUniqCombined);
}

} // namespace DB