Skip to content

Commit cd33280

Browse files
committed
MDEV-16374: Filtered shows 0 for materilization scan for a semi join, which makes optimizer always picks
materialization scan over materialization lookup For non-mergeable semi-joins we don't store the estimates of the IN subquery in table->file->stats.records. In the function TABLE_LIST::fetch_number_of_rows, we store the number of rows in the tables (estimates in case of derived table/views). Currently we don't store the estimates for non-mergeable semi-joins, which leads to a problem of selecting materialization scan over materialization lookup. Fixed this by storing these estimated appropriately
1 parent 15155ec commit cd33280

File tree

5 files changed

+161
-11
lines changed

5 files changed

+161
-11
lines changed

mysql-test/r/selectivity.result

Lines changed: 66 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -356,13 +356,13 @@ and o_orderkey = l_orderkey
356356
group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice
357357
order by o_totalprice desc, o_orderdate;
358358
id select_type table type possible_keys key key_len ref rows filtered Extra
359-
1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 6005 0.00 Using temporary; Using filesort
360-
1 PRIMARY orders eq_ref PRIMARY,i_o_custkey PRIMARY 4 <subquery2>.l_orderkey 1 100.00 Using where
359+
1 PRIMARY orders ALL PRIMARY,i_o_custkey NULL NULL NULL 1500 100.00 Using where; Using temporary; Using filesort
360+
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 dbt3_s001.orders.o_orderkey 1 100.00
361361
1 PRIMARY customer eq_ref PRIMARY PRIMARY 4 dbt3_s001.orders.o_custkey 1 100.00
362-
1 PRIMARY lineitem ref PRIMARY,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 <subquery2>.l_orderkey 4 100.00
362+
1 PRIMARY lineitem ref PRIMARY,i_l_orderkey,i_l_orderkey_quantity i_l_orderkey_quantity 4 dbt3_s001.orders.o_orderkey 4 100.00 Using index
363363
2 MATERIALIZED lineitem index NULL i_l_orderkey_quantity 13 NULL 6005 100.00 Using index
364364
Warnings:
365-
Note 1003 select `dbt3_s001`.`customer`.`c_name` AS `c_name`,`dbt3_s001`.`customer`.`c_custkey` AS `c_custkey`,`dbt3_s001`.`orders`.`o_orderkey` AS `o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE` AS `o_orderdate`,`dbt3_s001`.`orders`.`o_totalprice` AS `o_totalprice`,sum(`dbt3_s001`.`lineitem`.`l_quantity`) AS `sum(l_quantity)` from <materialize> (select `dbt3_s001`.`lineitem`.`l_orderkey` from `dbt3_s001`.`lineitem` group by `dbt3_s001`.`lineitem`.`l_orderkey` having (sum(`dbt3_s001`.`lineitem`.`l_quantity`) > 250)) join `dbt3_s001`.`customer` join `dbt3_s001`.`orders` join `dbt3_s001`.`lineitem` where ((`dbt3_s001`.`customer`.`c_custkey` = `dbt3_s001`.`orders`.`o_custkey`) and (`dbt3_s001`.`orders`.`o_orderkey` = `<subquery2>`.`l_orderkey`) and (`dbt3_s001`.`lineitem`.`l_orderkey` = `<subquery2>`.`l_orderkey`)) group by `dbt3_s001`.`customer`.`c_name`,`dbt3_s001`.`customer`.`c_custkey`,`dbt3_s001`.`orders`.`o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE`,`dbt3_s001`.`orders`.`o_totalprice` order by `dbt3_s001`.`orders`.`o_totalprice` desc,`dbt3_s001`.`orders`.`o_orderDATE`
365+
Note 1003 select `dbt3_s001`.`customer`.`c_name` AS `c_name`,`dbt3_s001`.`customer`.`c_custkey` AS `c_custkey`,`dbt3_s001`.`orders`.`o_orderkey` AS `o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE` AS `o_orderdate`,`dbt3_s001`.`orders`.`o_totalprice` AS `o_totalprice`,sum(`dbt3_s001`.`lineitem`.`l_quantity`) AS `sum(l_quantity)` from <materialize> (select `dbt3_s001`.`lineitem`.`l_orderkey` from `dbt3_s001`.`lineitem` group by `dbt3_s001`.`lineitem`.`l_orderkey` having (sum(`dbt3_s001`.`lineitem`.`l_quantity`) > 250)) join `dbt3_s001`.`customer` join `dbt3_s001`.`orders` join `dbt3_s001`.`lineitem` where ((`dbt3_s001`.`customer`.`c_custkey` = `dbt3_s001`.`orders`.`o_custkey`) and (`<subquery2>`.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey`) and (`dbt3_s001`.`lineitem`.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey`)) group by `dbt3_s001`.`customer`.`c_name`,`dbt3_s001`.`customer`.`c_custkey`,`dbt3_s001`.`orders`.`o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE`,`dbt3_s001`.`orders`.`o_totalprice` order by `dbt3_s001`.`orders`.`o_totalprice` desc,`dbt3_s001`.`orders`.`o_orderDATE`
366366
select
367367
c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, sum(l_quantity)
368368
from customer, orders, lineitem
@@ -1535,6 +1535,68 @@ t
15351535
10:00:00
15361536
11:00:00
15371537
DROP TABLE t1;
1538+
#
1539+
# MDEV-16374: filtered shows 0 for materilization scan for a semi join, which makes optimizer
1540+
# always pick materialization scan over materialization lookup
1541+
#
1542+
create table t0(a int);
1543+
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
1544+
create table t1 (a int, b int);
1545+
insert into t1 values (0,0),(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10),
1546+
(11,11),(12,12),(13,13),(14,14),(15,15);
1547+
set @@optimizer_use_condition_selectivity=2;
1548+
explain extended select * from t1 where a in (select max(a) from t1 group by b);
1549+
id select_type table type possible_keys key key_len ref rows filtered Extra
1550+
1 PRIMARY t1 ALL NULL NULL NULL NULL 16 100.00 Using where
1551+
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 test.t1.a 1 100.00
1552+
2 MATERIALIZED t1 ALL NULL NULL NULL NULL 16 100.00 Using temporary
1553+
Warnings:
1554+
Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from <materialize> (select max(`test`.`t1`.`a`) from `test`.`t1` group by `test`.`t1`.`b`) join `test`.`t1` where (`<subquery2>`.`max(a)` = `test`.`t1`.`a`)
1555+
select * from t1 where a in (select max(a) from t1 group by b);
1556+
a b
1557+
0 0
1558+
1 1
1559+
2 2
1560+
3 3
1561+
4 4
1562+
5 5
1563+
6 6
1564+
7 7
1565+
8 8
1566+
9 9
1567+
10 10
1568+
11 11
1569+
12 12
1570+
13 13
1571+
14 14
1572+
15 15
1573+
set @@optimizer_use_condition_selectivity=1;
1574+
explain extended select * from t1 where a in (select max(a) from t1 group by b);
1575+
id select_type table type possible_keys key key_len ref rows filtered Extra
1576+
1 PRIMARY t1 ALL NULL NULL NULL NULL 16 100.00 Using where
1577+
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 test.t1.a 1 100.00
1578+
2 MATERIALIZED t1 ALL NULL NULL NULL NULL 16 100.00 Using temporary
1579+
Warnings:
1580+
Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from <materialize> (select max(`test`.`t1`.`a`) from `test`.`t1` group by `test`.`t1`.`b`) join `test`.`t1` where (`<subquery2>`.`max(a)` = `test`.`t1`.`a`)
1581+
select * from t1 where a in (select max(a) from t1 group by b);
1582+
a b
1583+
0 0
1584+
1 1
1585+
2 2
1586+
3 3
1587+
4 4
1588+
5 5
1589+
6 6
1590+
7 7
1591+
8 8
1592+
9 9
1593+
10 10
1594+
11 11
1595+
12 12
1596+
13 13
1597+
14 14
1598+
15 15
1599+
drop table t1,t0;
15381600
set histogram_size=@save_histogram_size;
15391601
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
15401602
set use_stat_tables=@save_use_stat_tables;

mysql-test/r/selectivity_innodb.result

Lines changed: 66 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -359,13 +359,13 @@ and o_orderkey = l_orderkey
359359
group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice
360360
order by o_totalprice desc, o_orderdate;
361361
id select_type table type possible_keys key key_len ref rows filtered Extra
362-
1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 6005 0.00 Using temporary; Using filesort
363-
1 PRIMARY orders eq_ref PRIMARY,i_o_custkey PRIMARY 4 <subquery2>.l_orderkey 1 100.00 Using where
362+
1 PRIMARY orders ALL PRIMARY,i_o_custkey NULL NULL NULL 1500 100.00 Using where; Using temporary; Using filesort
363+
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 dbt3_s001.orders.o_orderkey 1 100.00
364364
1 PRIMARY customer eq_ref PRIMARY PRIMARY 4 dbt3_s001.orders.o_custkey 1 100.00
365-
1 PRIMARY lineitem ref PRIMARY,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 <subquery2>.l_orderkey 4 100.00
365+
1 PRIMARY lineitem ref PRIMARY,i_l_orderkey,i_l_orderkey_quantity i_l_orderkey_quantity 4 dbt3_s001.orders.o_orderkey 4 100.00 Using index
366366
2 MATERIALIZED lineitem index NULL PRIMARY 8 NULL 6005 100.00
367367
Warnings:
368-
Note 1003 select `dbt3_s001`.`customer`.`c_name` AS `c_name`,`dbt3_s001`.`customer`.`c_custkey` AS `c_custkey`,`dbt3_s001`.`orders`.`o_orderkey` AS `o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE` AS `o_orderdate`,`dbt3_s001`.`orders`.`o_totalprice` AS `o_totalprice`,sum(`dbt3_s001`.`lineitem`.`l_quantity`) AS `sum(l_quantity)` from <materialize> (select `dbt3_s001`.`lineitem`.`l_orderkey` from `dbt3_s001`.`lineitem` group by `dbt3_s001`.`lineitem`.`l_orderkey` having (sum(`dbt3_s001`.`lineitem`.`l_quantity`) > 250)) join `dbt3_s001`.`customer` join `dbt3_s001`.`orders` join `dbt3_s001`.`lineitem` where ((`dbt3_s001`.`customer`.`c_custkey` = `dbt3_s001`.`orders`.`o_custkey`) and (`dbt3_s001`.`orders`.`o_orderkey` = `<subquery2>`.`l_orderkey`) and (`dbt3_s001`.`lineitem`.`l_orderkey` = `<subquery2>`.`l_orderkey`)) group by `dbt3_s001`.`customer`.`c_name`,`dbt3_s001`.`customer`.`c_custkey`,`dbt3_s001`.`orders`.`o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE`,`dbt3_s001`.`orders`.`o_totalprice` order by `dbt3_s001`.`orders`.`o_totalprice` desc,`dbt3_s001`.`orders`.`o_orderDATE`
368+
Note 1003 select `dbt3_s001`.`customer`.`c_name` AS `c_name`,`dbt3_s001`.`customer`.`c_custkey` AS `c_custkey`,`dbt3_s001`.`orders`.`o_orderkey` AS `o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE` AS `o_orderdate`,`dbt3_s001`.`orders`.`o_totalprice` AS `o_totalprice`,sum(`dbt3_s001`.`lineitem`.`l_quantity`) AS `sum(l_quantity)` from <materialize> (select `dbt3_s001`.`lineitem`.`l_orderkey` from `dbt3_s001`.`lineitem` group by `dbt3_s001`.`lineitem`.`l_orderkey` having (sum(`dbt3_s001`.`lineitem`.`l_quantity`) > 250)) join `dbt3_s001`.`customer` join `dbt3_s001`.`orders` join `dbt3_s001`.`lineitem` where ((`dbt3_s001`.`customer`.`c_custkey` = `dbt3_s001`.`orders`.`o_custkey`) and (`<subquery2>`.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey`) and (`dbt3_s001`.`lineitem`.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey`)) group by `dbt3_s001`.`customer`.`c_name`,`dbt3_s001`.`customer`.`c_custkey`,`dbt3_s001`.`orders`.`o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE`,`dbt3_s001`.`orders`.`o_totalprice` order by `dbt3_s001`.`orders`.`o_totalprice` desc,`dbt3_s001`.`orders`.`o_orderDATE`
369369
select
370370
c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, sum(l_quantity)
371371
from customer, orders, lineitem
@@ -1539,6 +1539,68 @@ t
15391539
10:00:00
15401540
11:00:00
15411541
DROP TABLE t1;
1542+
#
1543+
# MDEV-16374: filtered shows 0 for materilization scan for a semi join, which makes optimizer
1544+
# always pick materialization scan over materialization lookup
1545+
#
1546+
create table t0(a int);
1547+
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
1548+
create table t1 (a int, b int);
1549+
insert into t1 values (0,0),(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10),
1550+
(11,11),(12,12),(13,13),(14,14),(15,15);
1551+
set @@optimizer_use_condition_selectivity=2;
1552+
explain extended select * from t1 where a in (select max(a) from t1 group by b);
1553+
id select_type table type possible_keys key key_len ref rows filtered Extra
1554+
1 PRIMARY t1 ALL NULL NULL NULL NULL 16 100.00 Using where
1555+
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 test.t1.a 1 100.00
1556+
2 MATERIALIZED t1 ALL NULL NULL NULL NULL 16 100.00 Using temporary
1557+
Warnings:
1558+
Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from <materialize> (select max(`test`.`t1`.`a`) from `test`.`t1` group by `test`.`t1`.`b`) join `test`.`t1` where (`<subquery2>`.`max(a)` = `test`.`t1`.`a`)
1559+
select * from t1 where a in (select max(a) from t1 group by b);
1560+
a b
1561+
0 0
1562+
1 1
1563+
2 2
1564+
3 3
1565+
4 4
1566+
5 5
1567+
6 6
1568+
7 7
1569+
8 8
1570+
9 9
1571+
10 10
1572+
11 11
1573+
12 12
1574+
13 13
1575+
14 14
1576+
15 15
1577+
set @@optimizer_use_condition_selectivity=1;
1578+
explain extended select * from t1 where a in (select max(a) from t1 group by b);
1579+
id select_type table type possible_keys key key_len ref rows filtered Extra
1580+
1 PRIMARY t1 ALL NULL NULL NULL NULL 16 100.00 Using where
1581+
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 test.t1.a 1 100.00
1582+
2 MATERIALIZED t1 ALL NULL NULL NULL NULL 16 100.00 Using temporary
1583+
Warnings:
1584+
Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from <materialize> (select max(`test`.`t1`.`a`) from `test`.`t1` group by `test`.`t1`.`b`) join `test`.`t1` where (`<subquery2>`.`max(a)` = `test`.`t1`.`a`)
1585+
select * from t1 where a in (select max(a) from t1 group by b);
1586+
a b
1587+
0 0
1588+
1 1
1589+
2 2
1590+
3 3
1591+
4 4
1592+
5 5
1593+
6 6
1594+
7 7
1595+
8 8
1596+
9 9
1597+
10 10
1598+
11 11
1599+
12 12
1600+
13 13
1601+
14 14
1602+
15 15
1603+
drop table t1,t0;
15421604
set histogram_size=@save_histogram_size;
15431605
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
15441606
set use_stat_tables=@save_use_stat_tables;

mysql-test/r/subselect_sj_nonmerged.result

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,9 @@ explain select * from t4 where
7777
t4.a in (select max(t2.a) from t1, t2 group by t2.b) and
7878
t4.b in (select max(t2.a) from t1, t2 group by t2.b);
7979
id select_type table type possible_keys key key_len ref rows Extra
80-
1 PRIMARY <subquery3> ALL distinct_key NULL NULL NULL 5
81-
1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 5 Using join buffer (flat, BNL join)
82-
1 PRIMARY t4 ref a a 10 <subquery2>.max(t2.a),<subquery3>.max(t2.a) 12
80+
1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 5
81+
1 PRIMARY t4 ref a a 5 <subquery2>.max(t2.a) 12 Using index condition
82+
1 PRIMARY <subquery3> eq_ref distinct_key distinct_key 4 test.t4.b 1
8383
3 MATERIALIZED t2 ALL NULL NULL NULL NULL 5 Using temporary
8484
3 MATERIALIZED t1 ALL NULL NULL NULL NULL 10 Using join buffer (flat, BNL join)
8585
2 MATERIALIZED t2 ALL NULL NULL NULL NULL 5 Using temporary

mysql-test/t/selectivity.test

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1043,6 +1043,24 @@ SELECT * FROM (SELECT t FROM t1 WHERE d IS NULL) sq;
10431043

10441044
DROP TABLE t1;
10451045

1046+
--echo #
1047+
--echo # MDEV-16374: filtered shows 0 for materilization scan for a semi join, which makes optimizer
1048+
--echo # always pick materialization scan over materialization lookup
1049+
--echo #
1050+
1051+
create table t0(a int);
1052+
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
1053+
create table t1 (a int, b int);
1054+
insert into t1 values (0,0),(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10),
1055+
(11,11),(12,12),(13,13),(14,14),(15,15);
1056+
set @@optimizer_use_condition_selectivity=2;
1057+
explain extended select * from t1 where a in (select max(a) from t1 group by b);
1058+
select * from t1 where a in (select max(a) from t1 group by b);
1059+
set @@optimizer_use_condition_selectivity=1;
1060+
explain extended select * from t1 where a in (select max(a) from t1 group by b);
1061+
select * from t1 where a in (select max(a) from t1 group by b);
1062+
drop table t1,t0;
1063+
10461064
set histogram_size=@save_histogram_size;
10471065
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
10481066
set use_stat_tables=@save_use_stat_tables;

sql/table.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7099,7 +7099,15 @@ int TABLE_LIST::fetch_number_of_rows()
70997099
{
71007100
int error= 0;
71017101
if (jtbm_subselect)
7102+
{
7103+
if (jtbm_subselect->is_jtbm_merged)
7104+
{
7105+
table->file->stats.records= jtbm_subselect->jtbm_record_count;
7106+
set_if_bigger(table->file->stats.records, 2);
7107+
table->used_stat_records= table->file->stats.records;
7108+
}
71027109
return 0;
7110+
}
71037111
if (is_materialized_derived() && !fill_me)
71047112

71057113
{

0 commit comments

Comments
 (0)