Skip to content

Commit

Permalink
MDEV-26849: JSON Histograms: point selectivity estimates are off
Browse files Browse the repository at this point in the history
.. for non-existent values.

Handle this special case.
  • Loading branch information
spetrunia committed Jan 19, 2022
1 parent f3f78be commit 05877df
Show file tree
Hide file tree
Showing 8 changed files with 142 additions and 10 deletions.
1 change: 1 addition & 0 deletions mysql-test/main/statistics.result
Original file line number Diff line number Diff line change
Expand Up @@ -1885,6 +1885,7 @@ t1 id 1 17384 0.0000 4.0000 14.0000 0.15705,0.15711,0.21463,0.15705,0.15711,0.15
explain select * from t1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 229376
drop table t0;
drop table t1;
set analyze_sample_percentage=@save_analyze_sample_percentage;
set histogram_size=@save_histogram_size;
Expand Down
2 changes: 1 addition & 1 deletion mysql-test/main/statistics.test
Original file line number Diff line number Diff line change
Expand Up @@ -1019,7 +1019,7 @@ select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, a
from mysql.column_stats;
explain select * from t1;


drop table t0;
drop table t1;
set analyze_sample_percentage=@save_analyze_sample_percentage;
set histogram_size=@save_histogram_size;
Expand Down
102 changes: 102 additions & 0 deletions mysql-test/main/statistics_json.result
Original file line number Diff line number Diff line change
Expand Up @@ -4148,6 +4148,7 @@ t1 id 1 17384 0.0000 4.0000 14.0000 {
explain select * from t1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 229376
drop table t0;
drop table t1;
set analyze_sample_percentage=@save_analyze_sample_percentage;
set histogram_size=@save_histogram_size;
Expand Down Expand Up @@ -7530,3 +7531,104 @@ select c from t1 where c > '1';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 16 16.00 75.00 75.00 Using where
drop table t1;
#
# MDEV-26849: JSON Histograms: point selectivity estimates are off for non-existent values
#
#
create table t0(a int);
insert into t0 (a) values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1(a int);
insert into t1 select 100*A.a from t0 A, t0 B, t0 C;
select a, count(*) from t1 group by a order by a;
a count(*)
0 100
100 100
200 100
300 100
400 100
500 100
600 100
700 100
800 100
900 100
set histogram_type=json_hb, histogram_size=default;
analyze table t1 persistent for all;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
select * from mysql.column_stats where table_name='t1';
db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
test t1 a 0 900 0.0000 4.0000 100.0000 10 JSON_HB {
"histogram_hb_v2": [
{
"start": "0",
"size": 0.1,
"ndv": 1
},
{
"start": "100",
"size": 0.1,
"ndv": 1
},
{
"start": "200",
"size": 0.1,
"ndv": 1
},
{
"start": "300",
"size": 0.1,
"ndv": 1
},
{
"start": "400",
"size": 0.1,
"ndv": 1
},
{
"start": "500",
"size": 0.1,
"ndv": 1
},
{
"start": "600",
"size": 0.1,
"ndv": 1
},
{
"start": "700",
"size": 0.1,
"ndv": 1
},
{
"start": "800",
"size": 0.1,
"ndv": 1
},
{
"start": "900",
"end": "900",
"size": 0.1,
"ndv": 1
}
]
}
analyze select * from t1 where a=0;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 10.00 10.00 Using where
analyze select * from t1 where a=50;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 0.10 0.00 Using where
analyze select * from t1 where a=70;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 0.10 0.00 Using where
analyze select * from t1 where a=100;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 10.00 10.00 Using where
analyze select * from t1 where a=150;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 0.10 0.00 Using where
analyze select * from t1 where a=200;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 10.00 10.00 Using where
drop table t0,t1;
22 changes: 22 additions & 0 deletions mysql-test/main/statistics_json.test
Original file line number Diff line number Diff line change
Expand Up @@ -295,3 +295,25 @@ select c from t1 where c > '1';

drop table t1;

--echo #
--echo # MDEV-26849: JSON Histograms: point selectivity estimates are off for non-existent values
--echo #
--echo #

create table t0(a int);
insert into t0 (a) values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1(a int);
insert into t1 select 100*A.a from t0 A, t0 B, t0 C;
select a, count(*) from t1 group by a order by a;
set histogram_type=json_hb, histogram_size=default;
analyze table t1 persistent for all;
select * from mysql.column_stats where table_name='t1';
analyze select * from t1 where a=0;
analyze select * from t1 where a=50;
analyze select * from t1 where a=70;
analyze select * from t1 where a=100;
analyze select * from t1 where a=150;
analyze select * from t1 where a=200;

drop table t0,t1;

10 changes: 6 additions & 4 deletions sql/opt_histogram_json.cc
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,7 @@ double position_in_interval(Field *field, const uchar *key, uint key_len,


double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint,
double avg_sel)
double avg_sel, double total_rows)
{
const uchar *key = endpoint->key;
if (field->real_maybe_null())
Expand All @@ -631,9 +631,11 @@ double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint,

if (buckets[idx].ndv == 1 && !equal)
{
// The bucket has a single value and it doesn't match! Use the global
// average.
sel= avg_sel;
/*
The bucket has a single value and it doesn't match! Return a very
small value.
*/
sel= 1.0 / total_rows;
}
else
{
Expand Down
3 changes: 2 additions & 1 deletion sql/opt_histogram_json.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,8 @@ class Histogram_json_hb : public Histogram_base
ulonglong size) override;

double point_selectivity(Field *field, key_range *endpoint,
double avg_selection) override;
double avg_selection,
double total_rows) override;
double range_selectivity(Field *field, key_range *min_endp,
key_range *max_endp) override;

Expand Down
6 changes: 4 additions & 2 deletions sql/sql_statistics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3882,7 +3882,8 @@ double get_column_range_cardinality(Field *field,
{
res= col_non_nulls *
hist->point_selectivity(field, min_endp,
avg_frequency / col_non_nulls);
avg_frequency / col_non_nulls,
tab_records);
}
}
else if (avg_frequency == 0.0)
Expand Down Expand Up @@ -3973,7 +3974,8 @@ double get_column_range_cardinality(Field *field,
*/

double Histogram_binary::point_selectivity(Field *field, key_range *endpoint,
double avg_sel)
double avg_sel,
double total_records)
{
double sel;
Column_statistics *col_stats= field->read_stats;
Expand Down
6 changes: 4 additions & 2 deletions sql/sql_statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,8 @@ class Histogram_base


virtual double point_selectivity(Field *field, key_range *endpoint,
double avg_selection)=0;
double avg_selectivity,
double total_rows)=0;
virtual double range_selectivity(Field *field, key_range *min_endp,
key_range *max_endp)=0;

Expand Down Expand Up @@ -355,7 +356,8 @@ class Histogram_binary : public Histogram_base
Estimate selectivity of "col=const" using a histogram
*/
double point_selectivity(Field *field, key_range *endpoint,
double avg_sel) override;
double avg_sel,
double total_rows) override;
};


Expand Down

0 comments on commit 05877df

Please sign in to comment.