Skip to content

Commit c630e23

Browse files
committed
MDEV-34894: Poor query plan, because range estimates are not reused for ref(const)
(Variant 4, with @@optimizer_adjust_secondary_key_costs, reuse in two places, and conditions are replaced with equivalent simpler forms in two more) In best_access_path(), ReuseRangeEstimateForRef-3, the check for whether "all used key_part_i used key_part_i=const" was incorrect: it may produced a "NO" answer for cases when we had: key_part1= const // some key parts are usable key_part2= value_not_in_join_prefix //present but unusable key_part3= non_const_value // unusable due to gap in key parts. This caused the optimizer to fail to apply ReuseRangeEstimateForRef heuristics. The consequence is poor query plan choice when the index in question has very skewed data distribution. The fix is enabled if its @@optimizer_adjust_secondary_key_costs flag is set.
1 parent c41ab95 commit c630e23

File tree

9 files changed

+214
-22
lines changed

9 files changed

+214
-22
lines changed

mysql-test/main/join.result

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3495,3 +3495,87 @@ a b c
34953495
SET OPTIMIZER_USE_CONDITION_SELECTIVITY=@tmp;
34963496
DROP TABLE t1,t2;
34973497
# End of 10.6 tests
3498+
#
3499+
# MDEV-34894: Poor query plan, because range estimates are not reused for ref(const)
3500+
#
3501+
create table t0 (
3502+
a int,
3503+
b int,
3504+
dummy int
3505+
);
3506+
insert into t0 select seq,seq,seq from seq_1_to_10;
3507+
create table t1 (
3508+
pk1 int,
3509+
pk2 int,
3510+
pk3 int,
3511+
key1 int,
3512+
key(key1),
3513+
filler char(100),
3514+
primary key(pk1,pk2,pk3)
3515+
);
3516+
insert into t1
3517+
select
3518+
seq, seq, seq,
3519+
FLOOR(seq/2),
3520+
'filler-data'
3521+
from seq_1_to_10000;
3522+
analyze table t1;
3523+
Table Op Msg_type Msg_text
3524+
test.t1 analyze status Engine-independent statistics collected
3525+
test.t1 analyze status OK
3526+
update t1 set pk1=1 where pk1 between 1 and 200;
3527+
explain select * from t1 where pk1=1;
3528+
id select_type table type possible_keys key key_len ref rows Extra
3529+
1 SIMPLE t1 ref PRIMARY PRIMARY 4 const 231
3530+
explain select * from t0,t1 where t1.pk1=t0.a;
3531+
id select_type table type possible_keys key key_len ref rows Extra
3532+
1 SIMPLE t0 ALL NULL NULL NULL NULL 10 Using where
3533+
1 SIMPLE t1 ref PRIMARY PRIMARY 4 test.t0.a 1
3534+
create table t2 (
3535+
col int
3536+
);
3537+
insert into t2 select seq from seq_1_to_10000;
3538+
set optimizer_adjust_secondary_key_costs='fix_reuse_range_for_ref';
3539+
# This must use this good query plan:
3540+
# t0 - ALL
3541+
# t1 - ref, key=key1, not PRIMARY as pk1=1 is true for 20% of all rows
3542+
# t2 - ALL
3543+
explain select * from t0, t1, t2
3544+
where
3545+
t1.pk1=1 and t1.pk2=t2.col and t1.pk3=t0.dummy and
3546+
t1.key1=t0.b;
3547+
id select_type table type possible_keys key key_len ref rows Extra
3548+
1 SIMPLE t0 ALL NULL NULL NULL NULL 10 Using where
3549+
1 SIMPLE t1 ref PRIMARY,key1 key1 5 test.t0.b 1 Using where
3550+
1 SIMPLE t2 ALL NULL NULL NULL NULL 10000 Using where; Using join buffer (flat, BNL join)
3551+
set optimizer_adjust_secondary_key_costs='';
3552+
# Bad query:
3553+
# t0 - ALL
3554+
# t1 - ref, key=PRIMARY
3555+
# t2 - ALL
3556+
explain select * from t0, t1, t2
3557+
where
3558+
t1.pk1=1 and t1.pk2=t2.col and t1.pk3=t0.dummy and
3559+
t1.key1=t0.b;
3560+
id select_type table type possible_keys key key_len ref rows Extra
3561+
1 SIMPLE t0 ALL NULL NULL NULL NULL 10
3562+
1 SIMPLE t1 ref PRIMARY,key1 PRIMARY 4 const 1 Using index condition; Using where
3563+
1 SIMPLE t2 ALL NULL NULL NULL NULL 10000 Using where; Using join buffer (flat, BNL join)
3564+
drop table t0,t1,t2;
3565+
set @@optimizer_adjust_secondary_key_costs="fix_reuse_range_for_ref";
3566+
CREATE OR REPLACE TABLE t1 (a INT NOT NULL, b INT NOT NULL, c INT, key(a,b,c)) ENGINE=Aria;
3567+
INSERT INTO t1 select seq/10,mod(seq,2),seq from seq_1_to_1000;
3568+
update t1 set a=10 WHERE c < 100;
3569+
update t1 set a=12 WHERE a=11;
3570+
insert into t1 values (11,1,11), (11,2,11);
3571+
create or replace table t2 select seq from seq_1_to_10;
3572+
explain select count(*) from t1, t2 as seq where a=10 and b=seq.seq;
3573+
id select_type table type possible_keys key key_len ref rows Extra
3574+
1 SIMPLE seq ALL NULL NULL NULL NULL 10
3575+
1 SIMPLE t1 ref a a 8 const,test.seq.seq 5 Using where; Using index
3576+
explain select count(*) from t1, t2 as seq where a=11 and b=seq.seq;
3577+
id select_type table type possible_keys key key_len ref rows Extra
3578+
1 SIMPLE t1 ref a a 4 const 2 Using index
3579+
1 SIMPLE seq ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
3580+
drop table t1,t2;
3581+
set @@optimizer_adjust_secondary_key_costs=default;

mysql-test/main/join.test

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1904,3 +1904,81 @@ SELECT * FROM
19041904
SET OPTIMIZER_USE_CONDITION_SELECTIVITY=@tmp;
19051905
DROP TABLE t1,t2;
19061906
--echo # End of 10.6 tests
1907+
1908+
--source include/have_sequence.inc
1909+
1910+
--echo #
1911+
--echo # MDEV-34894: Poor query plan, because range estimates are not reused for ref(const)
1912+
--echo #
1913+
create table t0 (
1914+
a int,
1915+
b int,
1916+
dummy int
1917+
);
1918+
insert into t0 select seq,seq,seq from seq_1_to_10;
1919+
1920+
create table t1 (
1921+
pk1 int,
1922+
pk2 int,
1923+
pk3 int,
1924+
key1 int,
1925+
key(key1),
1926+
filler char(100),
1927+
primary key(pk1,pk2,pk3)
1928+
);
1929+
1930+
insert into t1
1931+
select
1932+
seq, seq, seq,
1933+
FLOOR(seq/2),
1934+
'filler-data'
1935+
from seq_1_to_10000;
1936+
analyze table t1;
1937+
1938+
update t1 set pk1=1 where pk1 between 1 and 200;
1939+
1940+
explain select * from t1 where pk1=1;
1941+
1942+
explain select * from t0,t1 where t1.pk1=t0.a;
1943+
1944+
create table t2 (
1945+
col int
1946+
);
1947+
insert into t2 select seq from seq_1_to_10000;
1948+
1949+
set optimizer_adjust_secondary_key_costs='fix_reuse_range_for_ref';
1950+
--echo # This must use this good query plan:
1951+
--echo # t0 - ALL
1952+
--echo # t1 - ref, key=key1, not PRIMARY as pk1=1 is true for 20% of all rows
1953+
--echo # t2 - ALL
1954+
explain select * from t0, t1, t2
1955+
where
1956+
t1.pk1=1 and t1.pk2=t2.col and t1.pk3=t0.dummy and
1957+
t1.key1=t0.b;
1958+
1959+
set optimizer_adjust_secondary_key_costs='';
1960+
--echo # Bad query:
1961+
--echo # t0 - ALL
1962+
--echo # t1 - ref, key=PRIMARY
1963+
--echo # t2 - ALL
1964+
explain select * from t0, t1, t2
1965+
where
1966+
t1.pk1=1 and t1.pk2=t2.col and t1.pk3=t0.dummy and
1967+
t1.key1=t0.b;
1968+
1969+
drop table t0,t1,t2;
1970+
1971+
set @@optimizer_adjust_secondary_key_costs="fix_reuse_range_for_ref";
1972+
CREATE OR REPLACE TABLE t1 (a INT NOT NULL, b INT NOT NULL, c INT, key(a,b,c)) ENGINE=Aria;
1973+
INSERT INTO t1 select seq/10,mod(seq,2),seq from seq_1_to_1000;
1974+
update t1 set a=10 WHERE c < 100;
1975+
update t1 set a=12 WHERE a=11;
1976+
insert into t1 values (11,1,11), (11,2,11);
1977+
create or replace table t2 select seq from seq_1_to_10;
1978+
1979+
explain select count(*) from t1, t2 as seq where a=10 and b=seq.seq;
1980+
# This will execute code in ReuseRangeEstimateForRef-4
1981+
explain select count(*) from t1, t2 as seq where a=11 and b=seq.seq;
1982+
drop table t1,t2;
1983+
1984+
set @@optimizer_adjust_secondary_key_costs=default;

mysql-test/main/mysqld--help.result

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -721,8 +721,10 @@ The following specify which files/extra groups are read (specified before remain
721721
disable_forced_index_in_group_by = Disable automatic
722722
forced index in GROUP BY. fix_innodb_cardinality =
723723
Disable doubling of the Cardinality for InnoDB secondary
724-
keys. This variable will be deleted in MariaDB 11.0 as it
725-
is not needed with the new 11.0 optimizer.
724+
keys. fix_reuse_range_for_ref = Do a better job at
725+
reusing range access estimates when estimating ref
726+
access. This variable will be deleted in MariaDB 11.0 as
727+
it is not needed with the new 11.0 optimizer.
726728
Use 'ALL' to set all combinations.
727729
--optimizer-join-limit-pref-ratio=#
728730
For queries with JOIN and ORDER BY LIMIT : make the
@@ -1705,7 +1707,7 @@ old-alter-table DEFAULT
17051707
old-mode UTF8_IS_UTF8MB3
17061708
old-passwords FALSE
17071709
old-style-user-limits FALSE
1708-
optimizer-adjust-secondary-key-costs
1710+
optimizer-adjust-secondary-key-costs fix_reuse_range_for_ref
17091711
optimizer-join-limit-pref-ratio 0
17101712
optimizer-max-sel-arg-weight 32000
17111713
optimizer-max-sel-args 16000

mysql-test/main/secondary_key_costs.result

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ json_detailed(json_extract(@trace, '$**.considered_access_paths'))
8282
drop table t1, name, flag2;
8383
select @@optimizer_adjust_secondary_key_costs;
8484
@@optimizer_adjust_secondary_key_costs
85-
85+
fix_reuse_range_for_ref
8686
set @@optimizer_adjust_secondary_key_costs=7;
8787
select @@optimizer_adjust_secondary_key_costs;
8888
@@optimizer_adjust_secondary_key_costs

mysql-test/suite/sys_vars/r/sysvars_server_embedded.result

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2275,11 +2275,11 @@ COMMAND_LINE_ARGUMENT REQUIRED
22752275
VARIABLE_NAME OPTIMIZER_ADJUST_SECONDARY_KEY_COSTS
22762276
VARIABLE_SCOPE SESSION
22772277
VARIABLE_TYPE SET
2278-
VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB secondary keys. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer.
2278+
VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB secondary keys. fix_reuse_range_for_ref = Do a better job at reusing range access estimates when estimating ref access. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer.
22792279
NUMERIC_MIN_VALUE NULL
22802280
NUMERIC_MAX_VALUE NULL
22812281
NUMERIC_BLOCK_SIZE NULL
2282-
ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality
2282+
ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality,fix_reuse_range_for_ref
22832283
READ_ONLY NO
22842284
COMMAND_LINE_ARGUMENT REQUIRED
22852285
VARIABLE_NAME OPTIMIZER_JOIN_LIMIT_PREF_RATIO

mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2435,11 +2435,11 @@ COMMAND_LINE_ARGUMENT REQUIRED
24352435
VARIABLE_NAME OPTIMIZER_ADJUST_SECONDARY_KEY_COSTS
24362436
VARIABLE_SCOPE SESSION
24372437
VARIABLE_TYPE SET
2438-
VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB secondary keys. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer.
2438+
VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB secondary keys. fix_reuse_range_for_ref = Do a better job at reusing range access estimates when estimating ref access. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer.
24392439
NUMERIC_MIN_VALUE NULL
24402440
NUMERIC_MAX_VALUE NULL
24412441
NUMERIC_BLOCK_SIZE NULL
2442-
ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality
2442+
ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality,fix_reuse_range_for_ref
24432443
READ_ONLY NO
24442444
COMMAND_LINE_ARGUMENT REQUIRED
24452445
VARIABLE_NAME OPTIMIZER_JOIN_LIMIT_PREF_RATIO

sql/sql_priv.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@
275275
#define OPTIMIZER_ADJ_DISABLE_MAX_SEEKS (2)
276276
#define OPTIMIZER_ADJ_DISABLE_FORCE_INDEX_GROUP_BY (4)
277277
#define OPTIMIZER_FIX_INNODB_CARDINALITY (8)
278+
#define OPTIMIZER_ADJ_FIX_REUSE_RANGE_FOR_REF (16)
278279

279280
/*
280281
Replication uses 8 bytes to store SQL_MODE in the binary log. The day you

sql/sql_select.cc

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8125,6 +8125,7 @@ best_access_path(JOIN *join,
81258125
key_part_map notnull_part=0; // key parts which won't have NULL in lookup tuple.
81268126
table_map found_ref= 0;
81278127
uint key= keyuse->key;
8128+
uint max_const_parts;
81288129
filter= 0;
81298130
bool ft_key= (keyuse->keypart == FT_KEYPART);
81308131
/* Bitmap of keyparts where the ref access is over 'keypart=const': */
@@ -8228,6 +8229,8 @@ best_access_path(JOIN *join,
82288229
rec= MATCHING_ROWS_IN_OTHER_TABLE; // Fix for small tables
82298230

82308231
Json_writer_object trace_access_idx(thd);
8232+
max_const_parts= max_part_bit(const_part);
8233+
82318234
/*
82328235
full text keys require special treatment
82338236
*/
@@ -8344,9 +8347,7 @@ best_access_path(JOIN *join,
83448347
in ReuseRangeEstimateForRef-3.
83458348
*/
83468349
if (table->opt_range_keys.is_set(key) &&
8347-
(const_part &
8348-
(((key_part_map)1 << table->opt_range[key].key_parts)-1)) ==
8349-
(((key_part_map)1 << table->opt_range[key].key_parts)-1) &&
8350+
table->opt_range[key].key_parts <= max_const_parts &&
83508351
table->opt_range[key].ranges == 1 &&
83518352
records > (double) table->opt_range[key].rows)
83528353
{
@@ -8394,6 +8395,17 @@ best_access_path(JOIN *join,
83948395
found_part == PREV_BITS(uint,keyinfo->user_defined_key_parts)))
83958396
{
83968397
max_key_part= max_part_bit(found_part);
8398+
bool all_used_equalities_are_const;
8399+
if ((thd->variables.optimizer_adjust_secondary_key_costs &
8400+
OPTIMIZER_ADJ_FIX_REUSE_RANGE_FOR_REF))
8401+
{
8402+
all_used_equalities_are_const= (max_key_part == max_const_parts);
8403+
}
8404+
else
8405+
{
8406+
// Old, incorrect check:
8407+
all_used_equalities_are_const= !found_ref;
8408+
}
83978409
/*
83988410
ReuseRangeEstimateForRef-3:
83998411
We're now considering a ref[or_null] access via
@@ -8408,7 +8420,7 @@ best_access_path(JOIN *join,
84088420
create quick select over another index), so we can't compare
84098421
them to (**). We'll make indirect judgements instead.
84108422
The sufficient conditions for re-use are:
8411-
(C1) All e_i in (**) are constants, i.e. found_ref==FALSE. (if
8423+
(C1) All e_i in (**) are constants (if
84128424
this is not satisfied we have no way to know which ranges
84138425
will be actually scanned by 'ref' until we execute the
84148426
join)
@@ -8418,7 +8430,7 @@ best_access_path(JOIN *join,
84188430
We also have a property that "range optimizer produces equal or
84198431
tighter set of scan intervals than ref(const) optimizer". Each
84208432
of the intervals in (**) are "tightest possible" intervals when
8421-
one limits itself to using keyparts 1..K (which we do in #2).
8433+
one limits itself to using keyparts 1..K (which we do in #2).
84228434
From here it follows that range access used either one, or
84238435
both of the (I1) and (I2) intervals:
84248436

@@ -8433,7 +8445,8 @@ best_access_path(JOIN *join,
84338445

84348446
(C3) "range optimizer used (have ref_or_null?2:1) intervals"
84358447
*/
8436-
if (table->opt_range_keys.is_set(key) && !found_ref && //(C1)
8448+
if (table->opt_range_keys.is_set(key) &&
8449+
all_used_equalities_are_const && // (C1)
84378450
table->opt_range[key].key_parts == max_key_part && //(C2)
84388451
table->opt_range[key].ranges == 1 + MY_TEST(ref_or_null_part)) //(C3)
84398452
{
@@ -8466,10 +8479,10 @@ best_access_path(JOIN *join,
84668479
*/
84678480
if (table->opt_range_keys.is_set(key))
84688481
{
8482+
double rows= (double) table->opt_range[key].rows;
84698483
if (table->opt_range[key].key_parts >= max_key_part) // (2)
84708484
{
8471-
double rows= (double) table->opt_range[key].rows;
8472-
if (!found_ref && // (1)
8485+
if (all_used_equalities_are_const && // (1)
84738486
records < rows) // (3)
84748487
{
84758488
trace_access_idx.add("used_range_estimates", "clipped up");
@@ -8537,15 +8550,26 @@ best_access_path(JOIN *join,
85378550
*/
85388551
if (table->opt_range_keys.is_set(key) &&
85398552
table->opt_range[key].key_parts <= max_key_part &&
8540-
const_part &
8541-
((key_part_map)1 << table->opt_range[key].key_parts) &&
85428553
table->opt_range[key].ranges == (1 +
85438554
MY_TEST(ref_or_null_part &
85448555
const_part)) &&
85458556
records > (double) table->opt_range[key].rows)
85468557
{
8547-
trace_access_idx.add("used_range_estimates", true);
8548-
records= (double) table->opt_range[key].rows;
8558+
bool all_parts_used;
8559+
if ((thd->variables.optimizer_adjust_secondary_key_costs &
8560+
OPTIMIZER_ADJ_FIX_REUSE_RANGE_FOR_REF))
8561+
{
8562+
all_parts_used= table->opt_range[key].key_parts <= max_const_parts;
8563+
}
8564+
else
8565+
all_parts_used= (bool) (const_part &
8566+
((key_part_map)1
8567+
<< table->opt_range[key].key_parts));
8568+
if (all_parts_used)
8569+
{
8570+
trace_access_idx.add("used_range_estimates", true);
8571+
records= (double) table->opt_range[key].rows;
8572+
}
85498573
}
85508574
}
85518575

sql/sys_vars.cc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2837,7 +2837,8 @@ static Sys_var_ulong Sys_optimizer_trace_max_mem_size(
28372837
*/
28382838
static const char *adjust_secondary_key_cost[]=
28392839
{
2840-
"adjust_secondary_key_cost", "disable_max_seek", "disable_forced_index_in_group_by", "fix_innodb_cardinality",0
2840+
"adjust_secondary_key_cost", "disable_max_seek", "disable_forced_index_in_group_by",
2841+
"fix_innodb_cardinality", "fix_reuse_range_for_ref", 0
28412842
};
28422843

28432844

@@ -2852,10 +2853,12 @@ static Sys_var_set Sys_optimizer_adjust_secondary_key_costs(
28522853
"GROUP BY. "
28532854
"fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB "
28542855
"secondary keys. "
2856+
"fix_reuse_range_for_ref = Do a better job at reusing range access estimates "
2857+
"when estimating ref access. "
28552858
"This variable will be deleted in MariaDB 11.0 as it is not needed with the "
28562859
"new 11.0 optimizer.",
28572860
SESSION_VAR(optimizer_adjust_secondary_key_costs), CMD_LINE(REQUIRED_ARG),
2858-
adjust_secondary_key_cost, DEFAULT(0));
2861+
adjust_secondary_key_cost, DEFAULT(OPTIMIZER_ADJ_FIX_REUSE_RANGE_FOR_REF));
28592862

28602863

28612864
static Sys_var_charptr_fscs Sys_pid_file(

0 commit comments

Comments
 (0)