From cd33280b682692f0517a26178d7b5337db648751 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Sat, 9 Jun 2018 11:26:52 +0530 Subject: [PATCH] MDEV-16374: Filtered shows 0 for materilization scan for a semi join, which makes optimizer always picks materialization scan over materialization lookup For non-mergeable semi-joins we don't store the estimates of the IN subquery in table->file->stats.records. In the function TABLE_LIST::fetch_number_of_rows, we store the number of rows in the tables (estimates in case of derived table/views). Currently we don't store the estimates for non-mergeable semi-joins, which leads to a problem of selecting materialization scan over materialization lookup. Fixed this by storing these estimated appropriately --- mysql-test/r/selectivity.result | 70 ++++++++++++++++++++-- mysql-test/r/selectivity_innodb.result | 70 ++++++++++++++++++++-- mysql-test/r/subselect_sj_nonmerged.result | 6 +- mysql-test/t/selectivity.test | 18 ++++++ sql/table.cc | 8 +++ 5 files changed, 161 insertions(+), 11 deletions(-) diff --git a/mysql-test/r/selectivity.result b/mysql-test/r/selectivity.result index 61a77d135e719..3e8fb8e2e4185 100644 --- a/mysql-test/r/selectivity.result +++ b/mysql-test/r/selectivity.result @@ -356,13 +356,13 @@ and o_orderkey = l_orderkey group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice order by o_totalprice desc, o_orderdate; id select_type table type possible_keys key key_len ref rows filtered Extra -1 PRIMARY ALL distinct_key NULL NULL NULL 6005 0.00 Using temporary; Using filesort -1 PRIMARY orders eq_ref PRIMARY,i_o_custkey PRIMARY 4 .l_orderkey 1 100.00 Using where +1 PRIMARY orders ALL PRIMARY,i_o_custkey NULL NULL NULL 1500 100.00 Using where; Using temporary; Using filesort +1 PRIMARY eq_ref distinct_key distinct_key 4 dbt3_s001.orders.o_orderkey 1 100.00 1 PRIMARY customer eq_ref PRIMARY PRIMARY 4 dbt3_s001.orders.o_custkey 1 100.00 -1 PRIMARY lineitem ref PRIMARY,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 .l_orderkey 4 100.00 +1 PRIMARY lineitem ref PRIMARY,i_l_orderkey,i_l_orderkey_quantity i_l_orderkey_quantity 4 dbt3_s001.orders.o_orderkey 4 100.00 Using index 2 MATERIALIZED lineitem index NULL i_l_orderkey_quantity 13 NULL 6005 100.00 Using index Warnings: -Note 1003 select `dbt3_s001`.`customer`.`c_name` AS `c_name`,`dbt3_s001`.`customer`.`c_custkey` AS `c_custkey`,`dbt3_s001`.`orders`.`o_orderkey` AS `o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE` AS `o_orderdate`,`dbt3_s001`.`orders`.`o_totalprice` AS `o_totalprice`,sum(`dbt3_s001`.`lineitem`.`l_quantity`) AS `sum(l_quantity)` from (select `dbt3_s001`.`lineitem`.`l_orderkey` from `dbt3_s001`.`lineitem` group by `dbt3_s001`.`lineitem`.`l_orderkey` having (sum(`dbt3_s001`.`lineitem`.`l_quantity`) > 250)) join `dbt3_s001`.`customer` join `dbt3_s001`.`orders` join `dbt3_s001`.`lineitem` where ((`dbt3_s001`.`customer`.`c_custkey` = `dbt3_s001`.`orders`.`o_custkey`) and (`dbt3_s001`.`orders`.`o_orderkey` = ``.`l_orderkey`) and (`dbt3_s001`.`lineitem`.`l_orderkey` = ``.`l_orderkey`)) group by `dbt3_s001`.`customer`.`c_name`,`dbt3_s001`.`customer`.`c_custkey`,`dbt3_s001`.`orders`.`o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE`,`dbt3_s001`.`orders`.`o_totalprice` order by `dbt3_s001`.`orders`.`o_totalprice` desc,`dbt3_s001`.`orders`.`o_orderDATE` +Note 1003 select `dbt3_s001`.`customer`.`c_name` AS `c_name`,`dbt3_s001`.`customer`.`c_custkey` AS `c_custkey`,`dbt3_s001`.`orders`.`o_orderkey` AS `o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE` AS `o_orderdate`,`dbt3_s001`.`orders`.`o_totalprice` AS `o_totalprice`,sum(`dbt3_s001`.`lineitem`.`l_quantity`) AS `sum(l_quantity)` from (select `dbt3_s001`.`lineitem`.`l_orderkey` from `dbt3_s001`.`lineitem` group by `dbt3_s001`.`lineitem`.`l_orderkey` having (sum(`dbt3_s001`.`lineitem`.`l_quantity`) > 250)) join `dbt3_s001`.`customer` join `dbt3_s001`.`orders` join `dbt3_s001`.`lineitem` where ((`dbt3_s001`.`customer`.`c_custkey` = `dbt3_s001`.`orders`.`o_custkey`) and (``.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey`) and (`dbt3_s001`.`lineitem`.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey`)) group by `dbt3_s001`.`customer`.`c_name`,`dbt3_s001`.`customer`.`c_custkey`,`dbt3_s001`.`orders`.`o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE`,`dbt3_s001`.`orders`.`o_totalprice` order by `dbt3_s001`.`orders`.`o_totalprice` desc,`dbt3_s001`.`orders`.`o_orderDATE` select c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, sum(l_quantity) from customer, orders, lineitem @@ -1535,6 +1535,68 @@ t 10:00:00 11:00:00 DROP TABLE t1; +# +# MDEV-16374: filtered shows 0 for materilization scan for a semi join, which makes optimizer +# always pick materialization scan over materialization lookup +# +create table t0(a int); +insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t1 (a int, b int); +insert into t1 values (0,0),(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10), +(11,11),(12,12),(13,13),(14,14),(15,15); +set @@optimizer_use_condition_selectivity=2; +explain extended select * from t1 where a in (select max(a) from t1 group by b); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 16 100.00 Using where +1 PRIMARY eq_ref distinct_key distinct_key 4 test.t1.a 1 100.00 +2 MATERIALIZED t1 ALL NULL NULL NULL NULL 16 100.00 Using temporary +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from (select max(`test`.`t1`.`a`) from `test`.`t1` group by `test`.`t1`.`b`) join `test`.`t1` where (``.`max(a)` = `test`.`t1`.`a`) +select * from t1 where a in (select max(a) from t1 group by b); +a b +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +set @@optimizer_use_condition_selectivity=1; +explain extended select * from t1 where a in (select max(a) from t1 group by b); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 16 100.00 Using where +1 PRIMARY eq_ref distinct_key distinct_key 4 test.t1.a 1 100.00 +2 MATERIALIZED t1 ALL NULL NULL NULL NULL 16 100.00 Using temporary +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from (select max(`test`.`t1`.`a`) from `test`.`t1` group by `test`.`t1`.`b`) join `test`.`t1` where (``.`max(a)` = `test`.`t1`.`a`) +select * from t1 where a in (select max(a) from t1 group by b); +a b +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +drop table t1,t0; set histogram_size=@save_histogram_size; set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; set use_stat_tables=@save_use_stat_tables; diff --git a/mysql-test/r/selectivity_innodb.result b/mysql-test/r/selectivity_innodb.result index a026c2e6d929b..748ef0cb6ca75 100644 --- a/mysql-test/r/selectivity_innodb.result +++ b/mysql-test/r/selectivity_innodb.result @@ -359,13 +359,13 @@ and o_orderkey = l_orderkey group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice order by o_totalprice desc, o_orderdate; id select_type table type possible_keys key key_len ref rows filtered Extra -1 PRIMARY ALL distinct_key NULL NULL NULL 6005 0.00 Using temporary; Using filesort -1 PRIMARY orders eq_ref PRIMARY,i_o_custkey PRIMARY 4 .l_orderkey 1 100.00 Using where +1 PRIMARY orders ALL PRIMARY,i_o_custkey NULL NULL NULL 1500 100.00 Using where; Using temporary; Using filesort +1 PRIMARY eq_ref distinct_key distinct_key 4 dbt3_s001.orders.o_orderkey 1 100.00 1 PRIMARY customer eq_ref PRIMARY PRIMARY 4 dbt3_s001.orders.o_custkey 1 100.00 -1 PRIMARY lineitem ref PRIMARY,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 .l_orderkey 4 100.00 +1 PRIMARY lineitem ref PRIMARY,i_l_orderkey,i_l_orderkey_quantity i_l_orderkey_quantity 4 dbt3_s001.orders.o_orderkey 4 100.00 Using index 2 MATERIALIZED lineitem index NULL PRIMARY 8 NULL 6005 100.00 Warnings: -Note 1003 select `dbt3_s001`.`customer`.`c_name` AS `c_name`,`dbt3_s001`.`customer`.`c_custkey` AS `c_custkey`,`dbt3_s001`.`orders`.`o_orderkey` AS `o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE` AS `o_orderdate`,`dbt3_s001`.`orders`.`o_totalprice` AS `o_totalprice`,sum(`dbt3_s001`.`lineitem`.`l_quantity`) AS `sum(l_quantity)` from (select `dbt3_s001`.`lineitem`.`l_orderkey` from `dbt3_s001`.`lineitem` group by `dbt3_s001`.`lineitem`.`l_orderkey` having (sum(`dbt3_s001`.`lineitem`.`l_quantity`) > 250)) join `dbt3_s001`.`customer` join `dbt3_s001`.`orders` join `dbt3_s001`.`lineitem` where ((`dbt3_s001`.`customer`.`c_custkey` = `dbt3_s001`.`orders`.`o_custkey`) and (`dbt3_s001`.`orders`.`o_orderkey` = ``.`l_orderkey`) and (`dbt3_s001`.`lineitem`.`l_orderkey` = ``.`l_orderkey`)) group by `dbt3_s001`.`customer`.`c_name`,`dbt3_s001`.`customer`.`c_custkey`,`dbt3_s001`.`orders`.`o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE`,`dbt3_s001`.`orders`.`o_totalprice` order by `dbt3_s001`.`orders`.`o_totalprice` desc,`dbt3_s001`.`orders`.`o_orderDATE` +Note 1003 select `dbt3_s001`.`customer`.`c_name` AS `c_name`,`dbt3_s001`.`customer`.`c_custkey` AS `c_custkey`,`dbt3_s001`.`orders`.`o_orderkey` AS `o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE` AS `o_orderdate`,`dbt3_s001`.`orders`.`o_totalprice` AS `o_totalprice`,sum(`dbt3_s001`.`lineitem`.`l_quantity`) AS `sum(l_quantity)` from (select `dbt3_s001`.`lineitem`.`l_orderkey` from `dbt3_s001`.`lineitem` group by `dbt3_s001`.`lineitem`.`l_orderkey` having (sum(`dbt3_s001`.`lineitem`.`l_quantity`) > 250)) join `dbt3_s001`.`customer` join `dbt3_s001`.`orders` join `dbt3_s001`.`lineitem` where ((`dbt3_s001`.`customer`.`c_custkey` = `dbt3_s001`.`orders`.`o_custkey`) and (``.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey`) and (`dbt3_s001`.`lineitem`.`l_orderkey` = `dbt3_s001`.`orders`.`o_orderkey`)) group by `dbt3_s001`.`customer`.`c_name`,`dbt3_s001`.`customer`.`c_custkey`,`dbt3_s001`.`orders`.`o_orderkey`,`dbt3_s001`.`orders`.`o_orderDATE`,`dbt3_s001`.`orders`.`o_totalprice` order by `dbt3_s001`.`orders`.`o_totalprice` desc,`dbt3_s001`.`orders`.`o_orderDATE` select c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, sum(l_quantity) from customer, orders, lineitem @@ -1539,6 +1539,68 @@ t 10:00:00 11:00:00 DROP TABLE t1; +# +# MDEV-16374: filtered shows 0 for materilization scan for a semi join, which makes optimizer +# always pick materialization scan over materialization lookup +# +create table t0(a int); +insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t1 (a int, b int); +insert into t1 values (0,0),(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10), +(11,11),(12,12),(13,13),(14,14),(15,15); +set @@optimizer_use_condition_selectivity=2; +explain extended select * from t1 where a in (select max(a) from t1 group by b); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 16 100.00 Using where +1 PRIMARY eq_ref distinct_key distinct_key 4 test.t1.a 1 100.00 +2 MATERIALIZED t1 ALL NULL NULL NULL NULL 16 100.00 Using temporary +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from (select max(`test`.`t1`.`a`) from `test`.`t1` group by `test`.`t1`.`b`) join `test`.`t1` where (``.`max(a)` = `test`.`t1`.`a`) +select * from t1 where a in (select max(a) from t1 group by b); +a b +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +set @@optimizer_use_condition_selectivity=1; +explain extended select * from t1 where a in (select max(a) from t1 group by b); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 16 100.00 Using where +1 PRIMARY eq_ref distinct_key distinct_key 4 test.t1.a 1 100.00 +2 MATERIALIZED t1 ALL NULL NULL NULL NULL 16 100.00 Using temporary +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from (select max(`test`.`t1`.`a`) from `test`.`t1` group by `test`.`t1`.`b`) join `test`.`t1` where (``.`max(a)` = `test`.`t1`.`a`) +select * from t1 where a in (select max(a) from t1 group by b); +a b +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +drop table t1,t0; set histogram_size=@save_histogram_size; set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; set use_stat_tables=@save_use_stat_tables; diff --git a/mysql-test/r/subselect_sj_nonmerged.result b/mysql-test/r/subselect_sj_nonmerged.result index c7e04225ffea4..47970668ae578 100644 --- a/mysql-test/r/subselect_sj_nonmerged.result +++ b/mysql-test/r/subselect_sj_nonmerged.result @@ -77,9 +77,9 @@ explain select * from t4 where t4.a in (select max(t2.a) from t1, t2 group by t2.b) and t4.b in (select max(t2.a) from t1, t2 group by t2.b); id select_type table type possible_keys key key_len ref rows Extra -1 PRIMARY ALL distinct_key NULL NULL NULL 5 -1 PRIMARY ALL distinct_key NULL NULL NULL 5 Using join buffer (flat, BNL join) -1 PRIMARY t4 ref a a 10 .max(t2.a),.max(t2.a) 12 +1 PRIMARY ALL distinct_key NULL NULL NULL 5 +1 PRIMARY t4 ref a a 5 .max(t2.a) 12 Using index condition +1 PRIMARY eq_ref distinct_key distinct_key 4 test.t4.b 1 3 MATERIALIZED t2 ALL NULL NULL NULL NULL 5 Using temporary 3 MATERIALIZED t1 ALL NULL NULL NULL NULL 10 Using join buffer (flat, BNL join) 2 MATERIALIZED t2 ALL NULL NULL NULL NULL 5 Using temporary diff --git a/mysql-test/t/selectivity.test b/mysql-test/t/selectivity.test index 548ef295fb210..afaa937c36039 100644 --- a/mysql-test/t/selectivity.test +++ b/mysql-test/t/selectivity.test @@ -1043,6 +1043,24 @@ SELECT * FROM (SELECT t FROM t1 WHERE d IS NULL) sq; DROP TABLE t1; +--echo # +--echo # MDEV-16374: filtered shows 0 for materilization scan for a semi join, which makes optimizer +--echo # always pick materialization scan over materialization lookup +--echo # + +create table t0(a int); +insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t1 (a int, b int); +insert into t1 values (0,0),(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10), +(11,11),(12,12),(13,13),(14,14),(15,15); +set @@optimizer_use_condition_selectivity=2; +explain extended select * from t1 where a in (select max(a) from t1 group by b); +select * from t1 where a in (select max(a) from t1 group by b); +set @@optimizer_use_condition_selectivity=1; +explain extended select * from t1 where a in (select max(a) from t1 group by b); +select * from t1 where a in (select max(a) from t1 group by b); +drop table t1,t0; + set histogram_size=@save_histogram_size; set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; set use_stat_tables=@save_use_stat_tables; diff --git a/sql/table.cc b/sql/table.cc index bc6e1e754ee1a..b5082df7076bb 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -7099,7 +7099,15 @@ int TABLE_LIST::fetch_number_of_rows() { int error= 0; if (jtbm_subselect) + { + if (jtbm_subselect->is_jtbm_merged) + { + table->file->stats.records= jtbm_subselect->jtbm_record_count; + set_if_bigger(table->file->stats.records, 2); + table->used_stat_records= table->file->stats.records; + } return 0; + } if (is_materialized_derived() && !fill_me) {