MDEV-20519: Query plan regression with optimizer_use_condition_select…

…ivity > 1 The issue here is the wrong estimate of the cardinality of a partial join, the cardinality is too high because the function table_cond_selectivity() returns an absurd number 100 while selectivity cannot be greater than 1. When accessing table t by outer reference t1.a via index we do not perform any range analysis for t. Yet we see TABLE::quick_key_parts[key] and TABLE->quick_rows[key] contain a non-zero value though these should have been remained untouched and equal to 0. Thus real cause of the problem is that TABLE::init does not clean the arrays TABLE::quick_key_parts[] and TABLE::>quick_rows[]. It should have done it because the TABLE structure created for any instance of a table can be reused for many queries.
MariaDB · Nov 7, 2019 · b1ab2ba · b1ab2ba
1 parent 4e99e67
commit b1ab2ba
Show file tree

Hide file tree

Showing 6 changed files with 166 additions and 4 deletions.
diff --git a/mysql-test/r/selectivity.result b/mysql-test/r/selectivity.result
@@ -1810,4 +1810,60 @@ b	a	a	b
 9	9	10	10
 set optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity;
 drop table t1,t2,t3;
+#
+# MDEV-20519: Query plan regression with optimizer_use_condition_selectivity=4
+#
+create table t1 (id int, a int, PRIMARY KEY(id), key(a));
+insert into t1 select seq,seq from seq_1_to_100;
+create table t2 (id int, a int, b int, PRIMARY KEY(id), key(a), key(b));
+insert into t2 select seq,seq,seq from seq_1_to_100;
+set optimizer_switch='exists_to_in=off';
+set optimizer_use_condition_selectivity=2;
+SELECT * FROM t1
+WHERE
+EXISTS (SELECT * FROM t1 A INNER JOIN t2 ON t2.a = A.id
+WHERE A.a=t1.a AND t2.b < 20);
+id	a
+1	1
+2	2
+3	3
+4	4
+5	5
+6	6
+7	7
+8	8
+9	9
+10	10
+11	11
+12	12
+13	13
+14	14
+15	15
+16	16
+17	17
+18	18
+19	19
+explain SELECT * FROM t1
+WHERE
+EXISTS (SELECT * FROM t1 A INNER JOIN t2 ON t2.a = A.id
+WHERE A.a=t1.a AND t2.b < 20);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	100	Using where
+2	DEPENDENT SUBQUERY	A	ref	PRIMARY,a	a	5	test.t1.a	1	
+2	DEPENDENT SUBQUERY	t2	ref	a,b	a	5	test.A.id	1	Using where
+EXPLAIN  SELECT * FROM t1 A, t1 B WHERE A.a = B.a and A.id = 65;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	A	const	PRIMARY,a	PRIMARY	4	const	1	
+1	SIMPLE	B	ref	a	a	5	const	1	
+explain SELECT * FROM t1
+WHERE
+EXISTS (SELECT * FROM t1 A INNER JOIN t2 ON t2.a = A.id
+WHERE A.a=t1.a AND t2.b < 20);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	100	Using where
+2	DEPENDENT SUBQUERY	A	ref	PRIMARY,a	a	5	test.t1.a	1	
+2	DEPENDENT SUBQUERY	t2	ref	a,b	a	5	test.A.id	1	Using where
+set optimizer_switch= @save_optimizer_switch;
+set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
+drop table t1,t2;
 # End of 10.1 tests
diff --git a/mysql-test/r/selectivity_innodb.result b/mysql-test/r/selectivity_innodb.result
@@ -1820,6 +1820,62 @@ b	a	a	b
 9	9	10	10
 set optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity;
 drop table t1,t2,t3;
+#
+# MDEV-20519: Query plan regression with optimizer_use_condition_selectivity=4
+#
+create table t1 (id int, a int, PRIMARY KEY(id), key(a));
+insert into t1 select seq,seq from seq_1_to_100;
+create table t2 (id int, a int, b int, PRIMARY KEY(id), key(a), key(b));
+insert into t2 select seq,seq,seq from seq_1_to_100;
+set optimizer_switch='exists_to_in=off';
+set optimizer_use_condition_selectivity=2;
+SELECT * FROM t1
+WHERE
+EXISTS (SELECT * FROM t1 A INNER JOIN t2 ON t2.a = A.id
+WHERE A.a=t1.a AND t2.b < 20);
+id	a
+1	1
+2	2
+3	3
+4	4
+5	5
+6	6
+7	7
+8	8
+9	9
+10	10
+11	11
+12	12
+13	13
+14	14
+15	15
+16	16
+17	17
+18	18
+19	19
+explain SELECT * FROM t1
+WHERE
+EXISTS (SELECT * FROM t1 A INNER JOIN t2 ON t2.a = A.id
+WHERE A.a=t1.a AND t2.b < 20);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	index	NULL	a	5	NULL	100	Using where; Using index
+2	DEPENDENT SUBQUERY	A	ref	PRIMARY,a	a	5	test.t1.a	1	Using index
+2	DEPENDENT SUBQUERY	t2	ref	a,b	a	5	test.A.id	1	Using where
+EXPLAIN  SELECT * FROM t1 A, t1 B WHERE A.a = B.a and A.id = 65;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	A	const	PRIMARY,a	PRIMARY	4	const	1	
+1	SIMPLE	B	ref	a	a	5	const	1	Using index
+explain SELECT * FROM t1
+WHERE
+EXISTS (SELECT * FROM t1 A INNER JOIN t2 ON t2.a = A.id
+WHERE A.a=t1.a AND t2.b < 20);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	index	NULL	a	5	NULL	100	Using where; Using index
+2	DEPENDENT SUBQUERY	A	ref	PRIMARY,a	a	5	test.t1.a	1	Using index
+2	DEPENDENT SUBQUERY	t2	ref	a,b	a	5	test.A.id	1	Using where
+set optimizer_switch= @save_optimizer_switch;
+set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
+drop table t1,t2;
 # End of 10.1 tests
 set optimizer_switch=@save_optimizer_switch_for_selectivity_test;
 set @tmp_ust= @@use_stat_tables;

diff --git a/mysql-test/t/selectivity.test b/mysql-test/t/selectivity.test
@@ -1234,5 +1234,37 @@ set optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity;
 
 drop table t1,t2,t3;
 
+
+--echo #
+--echo # MDEV-20519: Query plan regression with optimizer_use_condition_selectivity=4
+--echo #
+
+
+
+create table t1 (id int, a int, PRIMARY KEY(id), key(a));
+insert into t1 select seq,seq from seq_1_to_100;
+
+create table t2 (id int, a int, b int, PRIMARY KEY(id), key(a), key(b));
+insert into t2 select seq,seq,seq from seq_1_to_100;
+
+set optimizer_switch='exists_to_in=off';
+set optimizer_use_condition_selectivity=2;
+
+let $query= SELECT * FROM t1
+            WHERE
+              EXISTS (SELECT * FROM t1 A INNER JOIN t2 ON t2.a = A.id
+                      WHERE A.a=t1.a AND t2.b < 20);
+
+eval $query;
+eval explain $query;
+
+EXPLAIN  SELECT * FROM t1 A, t1 B WHERE A.a = B.a and A.id = 65;
+
+eval explain $query;
+
+set optimizer_switch= @save_optimizer_switch;
+set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
+drop table t1,t2;
+
 --echo # End of 10.1 tests
 
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
@@ -7654,7 +7654,6 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
           something went wrong.
 	*/
         sel /= (double)table->quick_rows[key] / (double) table->stat_records();
-        DBUG_ASSERT(0 < sel && sel <= 2.0);
         set_if_smaller(sel, 1.0);
         used_range_selectivity= true;
       }
@@ -7703,7 +7702,6 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
               if (table->field[fldno]->cond_selectivity > 0)
 	      {            
                 sel /= table->field[fldno]->cond_selectivity;
-                DBUG_ASSERT(0 < sel && sel <= 2.0);
                 set_if_smaller(sel, 1.0);
               }
               /* 
@@ -7761,7 +7759,6 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
           if (field->cond_selectivity > 0)
 	  {
             sel/= field->cond_selectivity;
-            DBUG_ASSERT(0 < sel && sel <= 2.0);  
             set_if_smaller(sel, 1.0);
           }
           break;
@@ -7773,7 +7770,6 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
   sel*= table_multi_eq_cond_selectivity(join, idx, s, rem_tables,
                                         keyparts, ref_keyuse_steps);
 
-  DBUG_ASSERT(0.0 < sel && sel <= 1.0);
   return sel;
 }
 

diff --git a/sql/table.cc b/sql/table.cc
@@ -4162,6 +4162,8 @@ void TABLE::init(THD *thd, TABLE_LIST *tl)
   created= TRUE;
   cond_selectivity= 1.0;
   cond_selectivity_sampling_explain= NULL;
+  quick_condition_rows=0;
+  initialize_quick_structures();
 #ifdef HAVE_REPLICATION
   /* used in RBR Triggers */
   master_had_triggers= 0;
@@ -7546,3 +7548,22 @@ bool fk_modifies_child(enum_fk_option opt)
   static bool can_write[]= { false, false, true, true, false, true };
   return can_write[opt];
 }
+
+
+/*
+  @brief
+    Initialize all the quick structures that are used to stored the
+    estimates when the range optimizer is run.
+  @details
+    This is specifically needed when we read the TABLE structure from the
+    table cache. There can be some garbage data from previous queries
+    that need to be reset here.
+*/
+
+void TABLE::initialize_quick_structures()
+{
+  bzero(quick_rows, sizeof(quick_rows));
+  bzero(quick_key_parts, sizeof(quick_key_parts));
+  bzero(quick_costs, sizeof(quick_costs));
+  bzero(quick_n_ranges, sizeof(quick_n_ranges));
+}
diff --git a/sql/table.h b/sql/table.h
@@ -1450,6 +1450,7 @@ struct TABLE
   }
 
   bool update_const_key_parts(COND *conds);
+  void initialize_quick_structures();
 
   my_ptrdiff_t default_values_offset() const
   { return (my_ptrdiff_t) (s->default_values - record[0]); }