Skip to content

Commit

Permalink
MDEV-16374: Filtered shows 0 for materilization scan for a semi join,…
Browse files Browse the repository at this point in the history
… which makes optimizer always picks

materialization scan over materialization lookup

For non-mergeable semi-joins we don't store the estimates of the IN subquery in table->file->stats.records.
In the function TABLE_LIST::fetch_number_of_rows, we store the number of rows in the tables
(estimates in case of derived table/views).
Currently we don't store the estimates for non-mergeable semi-joins, which leads to a problem of selecting
materialization scan over materialization lookup.
Fixed this by storing these estimated appropriately
  • Loading branch information
varunraiko committed Jun 9, 2018
1 parent 15155ec commit cd33280
Show file tree
Hide file tree
Showing 5 changed files with 161 additions and 11 deletions.
70 changes: 66 additions & 4 deletions mysql-test/r/selectivity.result
Original file line number Diff line number Diff line change
Expand Up @@ -356,13 +356,13 @@ and o_orderkey = l_orderkey
group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice
order by o_totalprice desc, o_orderdate;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 6005 0.00 Using temporary; Using filesort
1 PRIMARY orders eq_ref PRIMARY,i_o_custkey PRIMARY 4 <subquery2>.l_orderkey 1 100.00 Using where
1 PRIMARY orders ALL PRIMARY,i_o_custkey NULL NULL NULL 1500 100.00 Using where; Using temporary; Using filesort
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 dbt3_s001.orders.o_orderkey 1 100.00
1 PRIMARY customer eq_ref PRIMARY PRIMARY 4 dbt3_s001.orders.o_custkey 1 100.00
1 PRIMARY lineitem ref PRIMARY,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 <subquery2>.l_orderkey 4 100.00
1 PRIMARY lineitem ref PRIMARY,i_l_orderkey,i_l_orderkey_quantity i_l_orderkey_quantity 4 dbt3_s001.orders.o_orderkey 4 100.00 Using index
2 MATERIALIZED lineitem index NULL i_l_orderkey_quantity 13 NULL 6005 100.00 Using index
Warnings:
Note 1003 select `dbt3_s001`.`customer`.`c_name` AS `c_name`,`dbt3_s001`.`customer`.`c_custkey` AS `c_custkey`,`dbt3_s001`.`orders`.`o_orderkey` AS `o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE` AS `o_orderdate`,`dbt3_s001`.`orders`.`o_totalprice` AS `o_totalprice`,sum(`dbt3_s001`.`lineitem`.`l_quantity`) AS `sum(l_quantity)` from <materialize> (select `dbt3_s001`.`lineitem`.`l_orderkey` from `dbt3_s001`.`lineitem` group by `dbt3_s001`.`lineitem`.`l_orderkey` having (sum(`dbt3_s001`.`lineitem`.`l_quantity`) > 250)) join `dbt3_s001`.`customer` join `dbt3_s001`.`orders` join `dbt3_s001`.`lineitem` where ((`dbt3_s001`.`customer`.`c_custkey` = `dbt3_s001`.`orders`.`o_custkey`) and (`dbt3_s001`.`orders`.`o_orderkey` = `<subquery2>`.`l_orderkey`) and (`dbt3_s001`.`lineitem`.`l_orderkey` = `<subquery2>`.`l_orderkey`)) group by `dbt3_s001`.`customer`.`c_name`,`dbt3_s001`.`customer`.`c_custkey`,`dbt3_s001`.`orders`.`o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE`,`dbt3_s001`.`orders`.`o_totalprice` order by `dbt3_s001`.`orders`.`o_totalprice` desc,`dbt3_s001`.`orders`.`o_orderDATE`
Note 1003 select `dbt3_s001`.`customer`.`c_name` AS `c_name`,`dbt3_s001`.`customer`.`c_custkey` AS `c_custkey`,`dbt3_s001`.`orders`.`o_orderkey` AS `o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE` AS `o_orderdate`,`dbt3_s001`.`orders`.`o_totalprice` AS `o_totalprice`,sum(`dbt3_s001`.`lineitem`.`l_quantity`) AS `sum(l_quantity)` from <materialize> (select `dbt3_s001`.`lineitem`.`l_orderkey` from `dbt3_s001`.`lineitem` group by `dbt3_s001`.`lineitem`.`l_orderkey` having (sum(`dbt3_s001`.`lineitem`.`l_quantity`) > 250)) join `dbt3_s001`.`customer` join `dbt3_s001`.`orders` join `dbt3_s001`.`lineitem` where ((`dbt3_s001`.`customer`.`c_custkey` = `dbt3_s001`.`orders`.`o_custkey`) and (`<subquery2>`.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey`) and (`dbt3_s001`.`lineitem`.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey`)) group by `dbt3_s001`.`customer`.`c_name`,`dbt3_s001`.`customer`.`c_custkey`,`dbt3_s001`.`orders`.`o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE`,`dbt3_s001`.`orders`.`o_totalprice` order by `dbt3_s001`.`orders`.`o_totalprice` desc,`dbt3_s001`.`orders`.`o_orderDATE`
select
c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, sum(l_quantity)
from customer, orders, lineitem
Expand Down Expand Up @@ -1535,6 +1535,68 @@ t
10:00:00
11:00:00
DROP TABLE t1;
#
# MDEV-16374: filtered shows 0 for materilization scan for a semi join, which makes optimizer
# always pick materialization scan over materialization lookup
#
create table t0(a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1 (a int, b int);
insert into t1 values (0,0),(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10),
(11,11),(12,12),(13,13),(14,14),(15,15);
set @@optimizer_use_condition_selectivity=2;
explain extended select * from t1 where a in (select max(a) from t1 group by b);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 16 100.00 Using where
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 test.t1.a 1 100.00
2 MATERIALIZED t1 ALL NULL NULL NULL NULL 16 100.00 Using temporary
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from <materialize> (select max(`test`.`t1`.`a`) from `test`.`t1` group by `test`.`t1`.`b`) join `test`.`t1` where (`<subquery2>`.`max(a)` = `test`.`t1`.`a`)
select * from t1 where a in (select max(a) from t1 group by b);
a b
0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
10 10
11 11
12 12
13 13
14 14
15 15
set @@optimizer_use_condition_selectivity=1;
explain extended select * from t1 where a in (select max(a) from t1 group by b);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 16 100.00 Using where
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 test.t1.a 1 100.00
2 MATERIALIZED t1 ALL NULL NULL NULL NULL 16 100.00 Using temporary
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from <materialize> (select max(`test`.`t1`.`a`) from `test`.`t1` group by `test`.`t1`.`b`) join `test`.`t1` where (`<subquery2>`.`max(a)` = `test`.`t1`.`a`)
select * from t1 where a in (select max(a) from t1 group by b);
a b
0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
10 10
11 11
12 12
13 13
14 14
15 15
drop table t1,t0;
set histogram_size=@save_histogram_size;
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
set use_stat_tables=@save_use_stat_tables;
70 changes: 66 additions & 4 deletions mysql-test/r/selectivity_innodb.result
Original file line number Diff line number Diff line change
Expand Up @@ -359,13 +359,13 @@ and o_orderkey = l_orderkey
group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice
order by o_totalprice desc, o_orderdate;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 6005 0.00 Using temporary; Using filesort
1 PRIMARY orders eq_ref PRIMARY,i_o_custkey PRIMARY 4 <subquery2>.l_orderkey 1 100.00 Using where
1 PRIMARY orders ALL PRIMARY,i_o_custkey NULL NULL NULL 1500 100.00 Using where; Using temporary; Using filesort
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 dbt3_s001.orders.o_orderkey 1 100.00
1 PRIMARY customer eq_ref PRIMARY PRIMARY 4 dbt3_s001.orders.o_custkey 1 100.00
1 PRIMARY lineitem ref PRIMARY,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 <subquery2>.l_orderkey 4 100.00
1 PRIMARY lineitem ref PRIMARY,i_l_orderkey,i_l_orderkey_quantity i_l_orderkey_quantity 4 dbt3_s001.orders.o_orderkey 4 100.00 Using index
2 MATERIALIZED lineitem index NULL PRIMARY 8 NULL 6005 100.00
Warnings:
Note 1003 select `dbt3_s001`.`customer`.`c_name` AS `c_name`,`dbt3_s001`.`customer`.`c_custkey` AS `c_custkey`,`dbt3_s001`.`orders`.`o_orderkey` AS `o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE` AS `o_orderdate`,`dbt3_s001`.`orders`.`o_totalprice` AS `o_totalprice`,sum(`dbt3_s001`.`lineitem`.`l_quantity`) AS `sum(l_quantity)` from <materialize> (select `dbt3_s001`.`lineitem`.`l_orderkey` from `dbt3_s001`.`lineitem` group by `dbt3_s001`.`lineitem`.`l_orderkey` having (sum(`dbt3_s001`.`lineitem`.`l_quantity`) > 250)) join `dbt3_s001`.`customer` join `dbt3_s001`.`orders` join `dbt3_s001`.`lineitem` where ((`dbt3_s001`.`customer`.`c_custkey` = `dbt3_s001`.`orders`.`o_custkey`) and (`dbt3_s001`.`orders`.`o_orderkey` = `<subquery2>`.`l_orderkey`) and (`dbt3_s001`.`lineitem`.`l_orderkey` = `<subquery2>`.`l_orderkey`)) group by `dbt3_s001`.`customer`.`c_name`,`dbt3_s001`.`customer`.`c_custkey`,`dbt3_s001`.`orders`.`o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE`,`dbt3_s001`.`orders`.`o_totalprice` order by `dbt3_s001`.`orders`.`o_totalprice` desc,`dbt3_s001`.`orders`.`o_orderDATE`
Note 1003 select `dbt3_s001`.`customer`.`c_name` AS `c_name`,`dbt3_s001`.`customer`.`c_custkey` AS `c_custkey`,`dbt3_s001`.`orders`.`o_orderkey` AS `o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE` AS `o_orderdate`,`dbt3_s001`.`orders`.`o_totalprice` AS `o_totalprice`,sum(`dbt3_s001`.`lineitem`.`l_quantity`) AS `sum(l_quantity)` from <materialize> (select `dbt3_s001`.`lineitem`.`l_orderkey` from `dbt3_s001`.`lineitem` group by `dbt3_s001`.`lineitem`.`l_orderkey` having (sum(`dbt3_s001`.`lineitem`.`l_quantity`) > 250)) join `dbt3_s001`.`customer` join `dbt3_s001`.`orders` join `dbt3_s001`.`lineitem` where ((`dbt3_s001`.`customer`.`c_custkey` = `dbt3_s001`.`orders`.`o_custkey`) and (`<subquery2>`.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey`) and (`dbt3_s001`.`lineitem`.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey`)) group by `dbt3_s001`.`customer`.`c_name`,`dbt3_s001`.`customer`.`c_custkey`,`dbt3_s001`.`orders`.`o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE`,`dbt3_s001`.`orders`.`o_totalprice` order by `dbt3_s001`.`orders`.`o_totalprice` desc,`dbt3_s001`.`orders`.`o_orderDATE`
select
c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, sum(l_quantity)
from customer, orders, lineitem
Expand Down Expand Up @@ -1539,6 +1539,68 @@ t
10:00:00
11:00:00
DROP TABLE t1;
#
# MDEV-16374: filtered shows 0 for materilization scan for a semi join, which makes optimizer
# always pick materialization scan over materialization lookup
#
create table t0(a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1 (a int, b int);
insert into t1 values (0,0),(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10),
(11,11),(12,12),(13,13),(14,14),(15,15);
set @@optimizer_use_condition_selectivity=2;
explain extended select * from t1 where a in (select max(a) from t1 group by b);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 16 100.00 Using where
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 test.t1.a 1 100.00
2 MATERIALIZED t1 ALL NULL NULL NULL NULL 16 100.00 Using temporary
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from <materialize> (select max(`test`.`t1`.`a`) from `test`.`t1` group by `test`.`t1`.`b`) join `test`.`t1` where (`<subquery2>`.`max(a)` = `test`.`t1`.`a`)
select * from t1 where a in (select max(a) from t1 group by b);
a b
0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
10 10
11 11
12 12
13 13
14 14
15 15
set @@optimizer_use_condition_selectivity=1;
explain extended select * from t1 where a in (select max(a) from t1 group by b);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 16 100.00 Using where
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 test.t1.a 1 100.00
2 MATERIALIZED t1 ALL NULL NULL NULL NULL 16 100.00 Using temporary
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from <materialize> (select max(`test`.`t1`.`a`) from `test`.`t1` group by `test`.`t1`.`b`) join `test`.`t1` where (`<subquery2>`.`max(a)` = `test`.`t1`.`a`)
select * from t1 where a in (select max(a) from t1 group by b);
a b
0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
10 10
11 11
12 12
13 13
14 14
15 15
drop table t1,t0;
set histogram_size=@save_histogram_size;
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
set use_stat_tables=@save_use_stat_tables;
Expand Down
6 changes: 3 additions & 3 deletions mysql-test/r/subselect_sj_nonmerged.result
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ explain select * from t4 where
t4.a in (select max(t2.a) from t1, t2 group by t2.b) and
t4.b in (select max(t2.a) from t1, t2 group by t2.b);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <subquery3> ALL distinct_key NULL NULL NULL 5
1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 5 Using join buffer (flat, BNL join)
1 PRIMARY t4 ref a a 10 <subquery2>.max(t2.a),<subquery3>.max(t2.a) 12
1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 5
1 PRIMARY t4 ref a a 5 <subquery2>.max(t2.a) 12 Using index condition
1 PRIMARY <subquery3> eq_ref distinct_key distinct_key 4 test.t4.b 1
3 MATERIALIZED t2 ALL NULL NULL NULL NULL 5 Using temporary
3 MATERIALIZED t1 ALL NULL NULL NULL NULL 10 Using join buffer (flat, BNL join)
2 MATERIALIZED t2 ALL NULL NULL NULL NULL 5 Using temporary
Expand Down
18 changes: 18 additions & 0 deletions mysql-test/t/selectivity.test
Original file line number Diff line number Diff line change
Expand Up @@ -1043,6 +1043,24 @@ SELECT * FROM (SELECT t FROM t1 WHERE d IS NULL) sq;

DROP TABLE t1;

--echo #
--echo # MDEV-16374: filtered shows 0 for materilization scan for a semi join, which makes optimizer
--echo # always pick materialization scan over materialization lookup
--echo #

create table t0(a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1 (a int, b int);
insert into t1 values (0,0),(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10),
(11,11),(12,12),(13,13),(14,14),(15,15);
set @@optimizer_use_condition_selectivity=2;
explain extended select * from t1 where a in (select max(a) from t1 group by b);
select * from t1 where a in (select max(a) from t1 group by b);
set @@optimizer_use_condition_selectivity=1;
explain extended select * from t1 where a in (select max(a) from t1 group by b);
select * from t1 where a in (select max(a) from t1 group by b);
drop table t1,t0;

set histogram_size=@save_histogram_size;
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
set use_stat_tables=@save_use_stat_tables;
8 changes: 8 additions & 0 deletions sql/table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7099,7 +7099,15 @@ int TABLE_LIST::fetch_number_of_rows()
{
int error= 0;
if (jtbm_subselect)
{
if (jtbm_subselect->is_jtbm_merged)
{
table->file->stats.records= jtbm_subselect->jtbm_record_count;
set_if_bigger(table->file->stats.records, 2);
table->used_stat_records= table->file->stats.records;
}
return 0;
}
if (is_materialized_derived() && !fill_me)

{
Expand Down

0 comments on commit cd33280

Please sign in to comment.