Skip to content

Commit 4886d14

Browse files
author
Varun Gupta
committed
MDEV-17032: Estimates are higher for partitions of a table with @@use_stat_tables= PREFERABLY
The problem here is EITS statistics does not calculate statistics for the partitions of the table. So a temporary solution would be to not read EITS statistics for partitioned tables. Also disabling reading of EITS for columns that participate in the partition list of a table.
1 parent 12b1ba1 commit 4886d14

File tree

7 files changed

+235
-9
lines changed

7 files changed

+235
-9
lines changed

mysql-test/r/partition.result

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2645,3 +2645,103 @@ Warnings:
26452645
Note 1517 Duplicate partition name p2
26462646
DEALLOCATE PREPARE stmt;
26472647
DROP TABLE t1;
2648+
#
2649+
# MDEV-17032: Estimates are higher for partitions of a table with @@use_stat_tables= PREFERABLY
2650+
#
2651+
create table t0(a int);
2652+
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
2653+
create table t1(a int);
2654+
insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
2655+
create table t2 (
2656+
part_key int,
2657+
a int,
2658+
b int
2659+
) partition by list(part_key) (
2660+
partition p0 values in (0),
2661+
partition p1 values in (1),
2662+
partition p2 values in (2),
2663+
partition p3 values in (3),
2664+
partition p4 values in (4)
2665+
);
2666+
insert into t2
2667+
select mod(a,5), a/100, mod(a,5) from t1;
2668+
set @save_use_stat_tables= @@use_stat_tables;
2669+
set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
2670+
#
2671+
# Tests using stats provided by the storage engine
2672+
#
2673+
explain extended select * from t2 where part_key=1;
2674+
id select_type table type possible_keys key key_len ref rows filtered Extra
2675+
1 SIMPLE t2 ALL NULL NULL NULL NULL 200 100.00 Using where
2676+
Warnings:
2677+
Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` = 1)
2678+
explain partitions select * from t2 where part_key=1;
2679+
id select_type table partitions type possible_keys key key_len ref rows Extra
2680+
1 SIMPLE t2 p1 ALL NULL NULL NULL NULL 200 Using where
2681+
explain extended select * from t2 where part_key in (1,2);
2682+
id select_type table type possible_keys key key_len ref rows filtered Extra
2683+
1 SIMPLE t2 ALL NULL NULL NULL NULL 400 100.00 Using where
2684+
Warnings:
2685+
Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` in (1,2))
2686+
explain partitions select * from t2 where part_key in (1,2);
2687+
id select_type table partitions type possible_keys key key_len ref rows Extra
2688+
1 SIMPLE t2 p1,p2 ALL NULL NULL NULL NULL 400 Using where
2689+
explain extended select * from t2 where b=5;
2690+
id select_type table type possible_keys key key_len ref rows filtered Extra
2691+
1 SIMPLE t2 ALL NULL NULL NULL NULL 1000 100.00 Using where
2692+
Warnings:
2693+
Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`b` = 5)
2694+
explain partitions select * from t2 where b=5;
2695+
id select_type table partitions type possible_keys key key_len ref rows Extra
2696+
1 SIMPLE t2 p0,p1,p2,p3,p4 ALL NULL NULL NULL NULL 1000 Using where
2697+
explain extended select * from t2 partition(p0) where b=1;
2698+
id select_type table type possible_keys key key_len ref rows filtered Extra
2699+
1 SIMPLE t2 ALL NULL NULL NULL NULL 200 100.00 Using where
2700+
Warnings:
2701+
Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` PARTITION (`p0`) where (`test`.`t2`.`b` = 1)
2702+
set @save_histogram_size=@@histogram_size;
2703+
set @@histogram_size=100;
2704+
set @@use_stat_tables= PREFERABLY;
2705+
set @@optimizer_use_condition_selectivity=4;
2706+
analyze table t2;
2707+
Table Op Msg_type Msg_text
2708+
test.t2 analyze status Engine-independent statistics collected
2709+
test.t2 analyze status OK
2710+
#
2711+
# Tests using EITS
2712+
#
2713+
# filtered should be 100
2714+
explain extended select * from t2 where part_key=1;
2715+
id select_type table type possible_keys key key_len ref rows filtered Extra
2716+
1 SIMPLE t2 ALL NULL NULL NULL NULL 200 100.00 Using where
2717+
Warnings:
2718+
Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` = 1)
2719+
explain partitions select * from t2 where part_key=1;
2720+
id select_type table partitions type possible_keys key key_len ref rows Extra
2721+
1 SIMPLE t2 p1 ALL NULL NULL NULL NULL 200 Using where
2722+
# filtered should be 100
2723+
explain extended select * from t2 where part_key in (1,2);
2724+
id select_type table type possible_keys key key_len ref rows filtered Extra
2725+
1 SIMPLE t2 ALL NULL NULL NULL NULL 400 100.00 Using where
2726+
Warnings:
2727+
Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` in (1,2))
2728+
explain partitions select * from t2 where part_key in (1,2);
2729+
id select_type table partitions type possible_keys key key_len ref rows Extra
2730+
1 SIMPLE t2 p1,p2 ALL NULL NULL NULL NULL 400 Using where
2731+
explain extended select * from t2 where b=5;
2732+
id select_type table type possible_keys key key_len ref rows filtered Extra
2733+
1 SIMPLE t2 ALL NULL NULL NULL NULL 1000 19.80 Using where
2734+
Warnings:
2735+
Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`b` = 5)
2736+
explain partitions select * from t2 where b=5;
2737+
id select_type table partitions type possible_keys key key_len ref rows Extra
2738+
1 SIMPLE t2 p0,p1,p2,p3,p4 ALL NULL NULL NULL NULL 1000 Using where
2739+
explain extended select * from t2 partition(p0) where b=1;
2740+
id select_type table type possible_keys key key_len ref rows filtered Extra
2741+
1 SIMPLE t2 ALL NULL NULL NULL NULL 200 19.80 Using where
2742+
Warnings:
2743+
Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` PARTITION (`p0`) where (`test`.`t2`.`b` = 1)
2744+
set @@use_stat_tables= @save_use_stat_tables;
2745+
set @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
2746+
set @@histogram_size= @save_histogram_size;
2747+
drop table t0,t1,t2;

mysql-test/t/partition.test

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2897,3 +2897,64 @@ EXECUTE stmt;
28972897
DEALLOCATE PREPARE stmt;
28982898
DROP TABLE t1;
28992899

2900+
--echo #
2901+
--echo # MDEV-17032: Estimates are higher for partitions of a table with @@use_stat_tables= PREFERABLY
2902+
--echo #
2903+
2904+
create table t0(a int);
2905+
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
2906+
2907+
create table t1(a int);
2908+
insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
2909+
2910+
2911+
create table t2 (
2912+
part_key int,
2913+
a int,
2914+
b int
2915+
) partition by list(part_key) (
2916+
partition p0 values in (0),
2917+
partition p1 values in (1),
2918+
partition p2 values in (2),
2919+
partition p3 values in (3),
2920+
partition p4 values in (4)
2921+
);
2922+
insert into t2
2923+
select mod(a,5), a/100, mod(a,5) from t1;
2924+
2925+
set @save_use_stat_tables= @@use_stat_tables;
2926+
set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
2927+
--echo #
2928+
--echo # Tests using stats provided by the storage engine
2929+
--echo #
2930+
explain extended select * from t2 where part_key=1;
2931+
explain partitions select * from t2 where part_key=1;
2932+
explain extended select * from t2 where part_key in (1,2);
2933+
explain partitions select * from t2 where part_key in (1,2);
2934+
explain extended select * from t2 where b=5;
2935+
explain partitions select * from t2 where b=5;
2936+
explain extended select * from t2 partition(p0) where b=1;
2937+
2938+
2939+
set @save_histogram_size=@@histogram_size;
2940+
set @@histogram_size=100;
2941+
set @@use_stat_tables= PREFERABLY;
2942+
set @@optimizer_use_condition_selectivity=4;
2943+
analyze table t2;
2944+
--echo #
2945+
--echo # Tests using EITS
2946+
--echo #
2947+
--echo # filtered should be 100
2948+
explain extended select * from t2 where part_key=1;
2949+
explain partitions select * from t2 where part_key=1;
2950+
--echo # filtered should be 100
2951+
explain extended select * from t2 where part_key in (1,2);
2952+
explain partitions select * from t2 where part_key in (1,2);
2953+
explain extended select * from t2 where b=5;
2954+
explain partitions select * from t2 where b=5;
2955+
explain extended select * from t2 partition(p0) where b=1;
2956+
2957+
set @@use_stat_tables= @save_use_stat_tables;
2958+
set @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
2959+
set @@histogram_size= @save_histogram_size;
2960+
drop table t0,t1,t2;

sql/opt_range.cc

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3322,14 +3322,17 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
33223322
{
33233323
Field **field_ptr;
33243324
TABLE *table= param->table;
3325+
partition_info *part_info= NULL;
3326+
#ifdef WITH_PARTITION_STORAGE_ENGINE
3327+
part_info= table->part_info;
3328+
#endif
33253329
uint parts= 0;
33263330

33273331
for (field_ptr= table->field; *field_ptr; field_ptr++)
33283332
{
3329-
Column_statistics* col_stats= (*field_ptr)->read_stats;
3330-
if (bitmap_is_set(used_fields, (*field_ptr)->field_index)
3331-
&& col_stats && !col_stats->no_stat_values_provided()
3332-
&& !((*field_ptr)->type() == MYSQL_TYPE_GEOMETRY))
3333+
Field *field= *field_ptr;
3334+
if (bitmap_is_set(used_fields, field->field_index) &&
3335+
is_eits_usable(field))
33333336
parts++;
33343337
}
33353338

@@ -3347,12 +3350,10 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
33473350
uint max_key_len= 0;
33483351
for (field_ptr= table->field; *field_ptr; field_ptr++)
33493352
{
3350-
if (bitmap_is_set(used_fields, (*field_ptr)->field_index))
3353+
Field *field= *field_ptr;
3354+
if (bitmap_is_set(used_fields, field->field_index))
33513355
{
3352-
Field *field= *field_ptr;
3353-
Column_statistics* col_stats= field->read_stats;
3354-
if (field->type() == MYSQL_TYPE_GEOMETRY ||
3355-
!col_stats || col_stats->no_stat_values_provided())
3356+
if (!is_eits_usable(field))
33563357
continue;
33573358

33583359
uint16 store_length;

sql/partition_info.cc

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3164,6 +3164,23 @@ void partition_info::print_debug(const char *str, uint *value)
31643164
DBUG_PRINT("info", ("parser: %s", str));
31653165
DBUG_VOID_RETURN;
31663166
}
3167+
3168+
bool partition_info::field_in_partition_expr(Field *field) const
3169+
{
3170+
uint i;
3171+
for (i= 0; i < num_part_fields; i++)
3172+
{
3173+
if (field->eq(part_field_array[i]))
3174+
return TRUE;
3175+
}
3176+
for (i= 0; i < num_subpart_fields; i++)
3177+
{
3178+
if (field->eq(subpart_field_array[i]))
3179+
return TRUE;
3180+
}
3181+
return FALSE;
3182+
}
3183+
31673184
#else /* WITH_PARTITION_STORAGE_ENGINE */
31683185
/*
31693186
For builds without partitioning we need to define these functions

sql/partition_info.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,7 @@ class partition_info : public Sql_alloc
384384
bool is_full_part_expr_in_fields(List<Item> &fields);
385385
public:
386386
bool has_unique_name(partition_element *element);
387+
bool field_in_partition_expr(Field *field) const;
387388
};
388389

389390
uint32 get_next_partition_id_range(struct st_partition_iter* part_iter);

sql/sql_statistics.cc

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "opt_range.h"
3131
#include "my_atomic.h"
3232
#include "sql_show.h"
33+
#include "sql_partition.h"
3334

3435
/*
3536
The system variable 'use_stat_tables' can take one of the
@@ -3589,6 +3590,22 @@ void set_statistics_for_table(THD *thd, TABLE *table)
35893590
(use_stat_table_mode <= COMPLEMENTARY ||
35903591
!table->stats_is_read || read_stats->cardinality_is_null) ?
35913592
table->file->stats.records : read_stats->cardinality;
3593+
3594+
/*
3595+
For partitioned table, EITS statistics is based on data from all partitions.
3596+
3597+
On the other hand, Partition Pruning figures which partitions will be
3598+
accessed and then computes the estimate of rows in used_partitions.
3599+
3600+
Use the estimate from Partition Pruning as it is typically more precise.
3601+
Ideally, EITS should provide per-partition statistics but this is not
3602+
implemented currently.
3603+
*/
3604+
#ifdef WITH_PARTITION_STORAGE_ENGINE
3605+
if (table->part_info)
3606+
table->used_stat_records= table->file->stats.records;
3607+
#endif
3608+
35923609
KEY *key_info, *key_info_end;
35933610
for (key_info= table->key_info, key_info_end= key_info+table->s->keys;
35943611
key_info < key_info_end; key_info++)
@@ -3904,3 +3921,31 @@ bool is_stat_table(const char *db, const char *table)
39043921
}
39053922
return false;
39063923
}
3924+
3925+
/*
3926+
Check wheter we can use EITS statistics for a field or not
3927+
3928+
TRUE : Use EITS for the columns
3929+
FALSE: Otherwise
3930+
*/
3931+
3932+
bool is_eits_usable(Field *field)
3933+
{
3934+
partition_info *part_info= NULL;
3935+
#ifdef WITH_PARTITION_STORAGE_ENGINE
3936+
part_info= field->table->part_info;
3937+
#endif
3938+
/*
3939+
(1): checks if we have EITS statistics for a particular column
3940+
(2): Don't use EITS for GEOMETRY columns
3941+
(3): Disabling reading EITS statistics for columns involved in the
3942+
partition list of a table. We assume the selecticivity for
3943+
such columns would be handled during partition pruning.
3944+
*/
3945+
Column_statistics* col_stats= field->read_stats;
3946+
if (col_stats && !col_stats->no_stat_values_provided() && //(1)
3947+
field->type() != MYSQL_TYPE_GEOMETRY && //(2)
3948+
(!part_info || !part_info->field_in_partition_expr(field))) //(3)
3949+
return TRUE;
3950+
return FALSE;
3951+
}

sql/sql_statistics.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ double get_column_range_cardinality(Field *field,
109109
key_range *max_endp,
110110
uint range_flag);
111111
bool is_stat_table(const char *db, const char *table);
112+
bool is_eits_usable(Field* field);
112113

113114
class Histogram
114115
{

0 commit comments

Comments
 (0)