Skip to content

Commit

Permalink
Merge pull request #44147 from ClickHouse/backport/22.11/44067
Browse files Browse the repository at this point in the history
Backport #44067 to 22.11: Fix undefined behavior in `quantiles` function
  • Loading branch information
alexey-milovidov committed Dec 13, 2022
2 parents 6fa21c3 + 9551af8 commit fc59b12
Show file tree
Hide file tree
Showing 7 changed files with 93 additions and 8 deletions.
4 changes: 2 additions & 2 deletions src/AggregateFunctions/AggregateFunctionQuantile.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,15 +170,15 @@ class AggregateFunctionQuantile final
{
auto & data_to = assert_cast<ColumnVector<FloatReturnType> &>(arr_to.getData()).getData();
size_t old_size = data_to.size();
data_to.resize(data_to.size() + size);
data_to.resize(old_size + size);

data.getManyFloat(levels.levels.data(), levels.permutation.data(), size, data_to.data() + old_size);
}
else
{
auto & data_to = static_cast<ColVecType &>(arr_to.getData()).getData();
size_t old_size = data_to.size();
data_to.resize(data_to.size() + size);
data_to.resize(old_size + size);

data.getMany(levels.levels.data(), levels.permutation.data(), size, data_to.data() + old_size);
}
Expand Down
20 changes: 17 additions & 3 deletions src/AggregateFunctions/QuantileReservoirSampler.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ struct QuantileReservoirSampler
/// Get the value of the `level` quantile. The level must be between 0 and 1.
Value get(Float64 level)
{
if (data.empty())
return {};

if constexpr (is_decimal<Value>)
return Value(static_cast<typename Value::NativeType>(data.quantileInterpolated(level)));
else
Expand All @@ -65,11 +68,22 @@ struct QuantileReservoirSampler
/// indices - an array of index levels such that the corresponding elements will go in ascending order.
void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result)
{
bool is_empty = data.empty();

for (size_t i = 0; i < size; ++i)
if constexpr (is_decimal<Value>)
result[indices[i]] = Value(static_cast<typename Value::NativeType>(data.quantileInterpolated(levels[indices[i]])));
{
if (is_empty)
{
result[i] = Value{};
}
else
result[indices[i]] = Value(data.quantileInterpolated(levels[indices[i]]));
{
if constexpr (is_decimal<Value>)
result[indices[i]] = Value(static_cast<typename Value::NativeType>(data.quantileInterpolated(levels[indices[i]])));
else
result[indices[i]] = Value(data.quantileInterpolated(levels[indices[i]]));
}
}
}

/// The same, but in the case of an empty state, NaN is returned.
Expand Down
20 changes: 17 additions & 3 deletions src/AggregateFunctions/QuantileReservoirSamplerDeterministic.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ struct QuantileReservoirSamplerDeterministic
/// Get the value of the `level` quantile. The level must be between 0 and 1.
Value get(Float64 level)
{
if (data.empty())
return {};

if constexpr (is_decimal<Value>)
return static_cast<typename Value::NativeType>(data.quantileInterpolated(level));
else
Expand All @@ -65,11 +68,22 @@ struct QuantileReservoirSamplerDeterministic
/// indices - an array of index levels such that the corresponding elements will go in ascending order.
void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result)
{
bool is_empty = data.empty();

for (size_t i = 0; i < size; ++i)
if constexpr (is_decimal<Value>)
result[indices[i]] = static_cast<typename Value::NativeType>(data.quantileInterpolated(levels[indices[i]]));
{
if (is_empty)
{
result[i] = Value{};
}
else
result[indices[i]] = static_cast<Value>(data.quantileInterpolated(levels[indices[i]]));
{
if constexpr (is_decimal<Value>)
result[indices[i]] = static_cast<typename Value::NativeType>(data.quantileInterpolated(levels[indices[i]]));
else
result[indices[i]] = static_cast<Value>(data.quantileInterpolated(levels[indices[i]]));
}
}
}

/// The same, but in the case of an empty state, NaN is returned.
Expand Down
5 changes: 5 additions & 0 deletions src/AggregateFunctions/ReservoirSampler.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,11 @@ class ReservoirSampler
return total_values;
}

bool empty() const
{
return samples.empty();
}

T quantileNearest(double level)
{
if (samples.empty())
Expand Down
5 changes: 5 additions & 0 deletions src/AggregateFunctions/ReservoirSamplerDeterministic.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,11 @@ class ReservoirSamplerDeterministic
return total_values;
}

bool empty() const
{
return samples.empty();
}

T quantileNearest(double level)
{
if (samples.empty())
Expand Down
25 changes: 25 additions & 0 deletions tests/queries/0_stateless/02499_quantile_nan_ubsan_msan.reference
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
['1970-01-01 00:00:00']
['1970-01-01 00:00:00']

['1970-01-01 00:00:00']
['1970-01-01 00:00:00']
['1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00']

['1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00']
['1970-01-01 00:00:00']
['1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00']
[18446744073709552000]
['1970-01-01 00:00:00']
['1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00']
[1.157920892373162e77]
[nan]
1970-01-01 00:00:00
1970-01-01 00:00:00

1970-01-01 00:00:00
1970-01-01 00:00:00
1970-01-01 00:00:00
18446744073709552000
1970-01-01 00:00:00
1.157920892373162e77
nan
22 changes: 22 additions & 0 deletions tests/queries/0_stateless/02499_quantile_nan_ubsan_msan.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
SELECT quantiles(0.5)(now()::DateTime('UTC')) WHERE 0;
SELECT quantiles(0.5)(now()::DateTime('UTC')) WHERE 0 WITH TOTALS;
SELECT arrayReduce('quantiles(0.5)', []::Array(DateTime('UTC')));
SELECT quantiles(0.5, 1.1754943508222875e-38, 0.0001, -0., 0.0001, -0., 0.0001, 0., 0.5)(now()::DateTime('UTC')) WHERE 0 WITH TOTALS;

SELECT DISTINCT arrayReduce('quantiles(0.5)', materialize([]::Array(DateTime('UTC')))) FROM numbers(1000) LIMIT 10;
SELECT DISTINCT arrayReduce('quantiles(0, 0.5, 0.9, 1)', materialize([]::Array(DateTime('UTC')))) FROM numbers(1000) LIMIT 10;
SELECT DISTINCT arrayReduce('quantiles(0.5)', [0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFE]) FROM numbers(1000) LIMIT 10;
SELECT DISTINCT arrayReduce('quantilesDeterministic(0.5)', materialize([]::Array(DateTime('UTC'))), []::Array(UInt64)) FROM numbers(1000) LIMIT 10;
SELECT DISTINCT arrayReduce('quantilesDeterministic(0, 0.5, 0.9, 1)', materialize([]::Array(DateTime('UTC'))), []::Array(UInt64)) FROM numbers(1000) LIMIT 10;
SELECT DISTINCT arrayReduce('quantiles(0.5)', [CAST(-1, 'UInt256'), CAST(-2, 'UInt256')]) FROM numbers(1000) LIMIT 10;
SELECT DISTINCT arrayReduce('quantiles(0.5)', []::Array(Float64)) FROM numbers(1000) LIMIT 10;

SELECT quantile(0.5)(now()::DateTime('UTC')) WHERE 0;
SELECT quantile(0.5)(now()::DateTime('UTC')) WHERE 0 WITH TOTALS;
SELECT arrayReduce('quantile(0.5)', []::Array(DateTime('UTC')));

SELECT DISTINCT arrayReduce('quantile(0.5)', materialize([]::Array(DateTime('UTC')))) FROM numbers(1000) LIMIT 10;
SELECT DISTINCT arrayReduce('quantile(0.5)', [0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFE]) FROM numbers(1000) LIMIT 10;
SELECT DISTINCT arrayReduce('quantileDeterministic(0.5)', materialize([]::Array(DateTime('UTC'))), []::Array(UInt64)) FROM numbers(1000) LIMIT 10;
SELECT DISTINCT arrayReduce('quantile(0.5)', [CAST(-1, 'UInt256'), CAST(-2, 'UInt256')]) FROM numbers(1000) LIMIT 10;
SELECT DISTINCT arrayReduce('quantile(0.5)', []::Array(Float64)) FROM numbers(1000) LIMIT 10;

0 comments on commit fc59b12

Please sign in to comment.