Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move-away "uniqCombined" as a separate aggregated function #3406

Merged
merged 20 commits into from Nov 22, 2018
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion dbms/scripts/test_uniq_functions.sh
Expand Up @@ -6,6 +6,6 @@ do
do
n=$(( 10**p * i ))
echo -n "$n "
clickhouse-client -q "select uniqHLL12(number), uniq(number), uniqCombined(number) from numbers($n);"
clickhouse-client -q "select uniqHLL12(number), uniq(number), uniqCombined(17)(number) from numbers($n);"
done
done
3 changes: 0 additions & 3 deletions dbms/src/AggregateFunctions/AggregateFunctionUniq.cpp
Expand Up @@ -130,9 +130,6 @@ void registerAggregateFunctionsUniq(AggregateFunctionFactory & factory)

factory.registerFunction("uniqExact",
createAggregateFunctionUniq<true, AggregateFunctionUniqExactData, AggregateFunctionUniqExactData<String>>);

factory.registerFunction("uniqCombined",
createAggregateFunctionUniq<false, AggregateFunctionUniqCombinedData, AggregateFunctionUniqCombinedData<UInt64>>);
}

}
88 changes: 0 additions & 88 deletions dbms/src/AggregateFunctions/AggregateFunctionUniq.h
Expand Up @@ -22,7 +22,6 @@
#include <Common/typeid_cast.h>

#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/UniqCombinedBiasData.h>
#include <AggregateFunctions/UniqVariadicHash.h>


Expand Down Expand Up @@ -124,46 +123,6 @@ struct AggregateFunctionUniqExactData<String>
static String getName() { return "uniqExact"; }
};

template <typename T>
struct AggregateFunctionUniqCombinedData
{
using Key = UInt32;
using Set = CombinedCardinalityEstimator<
Key,
HashSet<Key, TrivialHash, HashTableGrower<>>,
16,
14,
17,
TrivialHash,
UInt32,
HyperLogLogBiasEstimator<UniqCombinedBiasData>,
HyperLogLogMode::FullFeatured>;

Set set;

static String getName() { return "uniqCombined"; }
};

template <>
struct AggregateFunctionUniqCombinedData<String>
{
using Key = UInt64;
using Set = CombinedCardinalityEstimator<
Key,
HashSet<Key, TrivialHash, HashTableGrower<>>,
16,
14,
17,
TrivialHash,
UInt64,
HyperLogLogBiasEstimator<UniqCombinedBiasData>,
HyperLogLogMode::FullFeatured>;

Set set;

static String getName() { return "uniqCombined"; }
};


namespace detail
{
Expand Down Expand Up @@ -199,39 +158,6 @@ template <> struct AggregateFunctionUniqTraits<Float64>
}
};

/** Hash function for uniqCombined.
*/
template <typename T> struct AggregateFunctionUniqCombinedTraits
{
static UInt32 hash(T x) { return static_cast<UInt32>(intHash64(x)); }
};

template <> struct AggregateFunctionUniqCombinedTraits<UInt128>
{
static UInt32 hash(UInt128 x)
{
return sipHash64(x);
}
};

template <> struct AggregateFunctionUniqCombinedTraits<Float32>
{
static UInt32 hash(Float32 x)
{
UInt64 res = ext::bit_cast<UInt64>(x);
return static_cast<UInt32>(intHash64(res));
}
};

template <> struct AggregateFunctionUniqCombinedTraits<Float64>
{
static UInt32 hash(Float64 x)
{
UInt64 res = ext::bit_cast<UInt64>(x);
return static_cast<UInt32>(intHash64(res));
}
};


/** The structure for the delegation work to add one element to the `uniq` aggregate functions.
* Used for partial specialization to add strings.
Expand All @@ -255,19 +181,6 @@ struct OneAdder
data.set.insert(CityHash_v1_0_2::CityHash64(value.data, value.size));
}
}
else if constexpr (std::is_same_v<Data, AggregateFunctionUniqCombinedData<T>>)
{
if constexpr (!std::is_same_v<T, String>)
{
const auto & value = static_cast<const ColumnVector<T> &>(column).getData()[row_num];
data.set.insert(AggregateFunctionUniqCombinedTraits<T>::hash(value));
}
else
{
StringRef value = column.getDataAt(row_num);
data.set.insert(CityHash_v1_0_2::CityHash64(value.data, value.size));
}
}
else if constexpr (std::is_same_v<Data, AggregateFunctionUniqExactData<T>>)
{
if constexpr (!std::is_same_v<T, String>)
Expand Down Expand Up @@ -387,5 +300,4 @@ class AggregateFunctionUniqVariadic final : public IAggregateFunctionDataHelper<
const char * getHeaderFilePath() const override { return __FILE__; }
};


}
90 changes: 90 additions & 0 deletions dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp
@@ -0,0 +1,90 @@
#include <AggregateFunctions/AggregateFunctionUniqCombined.h>

#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/Helpers.h>

#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>

namespace DB
{

namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ARGUMENT_OUT_OF_BOUND;
}

namespace
{

AggregateFunctionPtr createAggregateFunctionUniqCombined(
const std::string & name, const DataTypes & argument_types, const Array & params)
{
UInt8 precision = 17; /// default value - must correlate with default ctor of |AggregateFunctionUniqCombinedData|
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it MUST be correlated it's better to set named constant and use it both here and in ctor.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It correlates with a hardcoded union member initialization, the constant won't help - so I left the comment. The code now looks like: AggregateFunctionUniqCombinedDataWithKey() : set_17() {} - Also I can use a macro-definition as constant, if it's better.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My point is there should be only one place there magic number 17 set.


if (!params.empty())
{
if (params.size() != 1)
throw Exception(
"Aggregate function " + name + " requires one parameter or less.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);

UInt64 precision_param = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]);

// This range is hardcoded into |AggregateFunctionUniqCombinedData|
if (precision_param > 20 || precision_param < 12)
throw Exception(
"Parameter for aggregate function " + name + "is out or range: [12, 20].", ErrorCodes::ARGUMENT_OUT_OF_BOUND);

precision = precision_param;
}

if (argument_types.empty())
throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);

/// We use exact hash function if the user wants it;
/// or if the arguments are not contiguous in memory, because only exact hash function have support for this case.
bool use_exact_hash_function = !isAllArgumentsContiguousInMemory(argument_types);

if (argument_types.size() == 1)
{
const IDataType & argument_type = *argument_types[0];

AggregateFunctionPtr res(createWithNumericType<AggregateFunctionUniqCombined>(*argument_types[0], precision));

WhichDataType which(argument_type);
if (res)
return res;
else if (which.isDate())
return std::make_shared<AggregateFunctionUniqCombined<DataTypeDate::FieldType>>(precision);
else if (which.isDateTime())
return std::make_shared<AggregateFunctionUniqCombined<DataTypeDateTime::FieldType>>(precision);
else if (which.isStringOrFixedString())
return std::make_shared<AggregateFunctionUniqCombined<String>>(precision);
else if (which.isUUID())
return std::make_shared<AggregateFunctionUniqCombined<DataTypeUUID::FieldType>>(precision);
else if (which.isTuple())
{
if (use_exact_hash_function)
return std::make_shared<AggregateFunctionUniqCombinedVariadic<true, true>>(argument_types, precision);
else
return std::make_shared<AggregateFunctionUniqCombinedVariadic<false, true>>(argument_types, precision);
}
}

/// "Variadic" method also works as a fallback generic case for single argument.
if (use_exact_hash_function)
return std::make_shared<AggregateFunctionUniqCombinedVariadic<true, false>>(argument_types, precision);
else
return std::make_shared<AggregateFunctionUniqCombinedVariadic<false, false>>(argument_types, precision);
}

} // namespace

void registerAggregateFunctionUniqCombined(AggregateFunctionFactory & factory)
{
factory.registerFunction("uniqCombined", createAggregateFunctionUniqCombined);
}

} // namespace DB