Skip to content

Commit

Permalink
MDEV-32351 Significant slowdown for query with many outer joins
Browse files Browse the repository at this point in the history
This patch fixes a performance regression introduced in the patch for the
bug MDEV-21104. The performance regression could affect queries for which
join buffer was used for an outer join such that its on expression from
which a conjunctive condition depended only on outer tables can be
extracted. If the number of records in the join buffer for which this
condition was false greatly exceeded the number of other records the
slowdown could be significant.

If there is a conjunctive condition extracted from the ON expression
depending only on outer tables this condition is evaluated when interesting
fields of each survived record of outer tables are put into the join buffer.
Each such set of fields for any join operation is supplied with a match
flag field used to generate null complemented rows. If the result of the
evaluation of the condition is false the flag is set to MATCH_IMPOSSIBLE.
When looking in the join buffer for records matching a record of the
right operand of the outer join operation the records with such flags
are not needed to be unpacked into record buffers for evaluation of on
expressions.

The patch for MDEV-21104 fixing some problem of wrong results when
'not exists' optimization by mistake broke the code that allowed to
ignore records with the match flag set to MATCH_IMPOSSIBLE when looking
for matching records. As a result such records were unpacked for each
record of the right operand of the outer join operation. This caused
significant execution penalty in some cases.

One of the test cases added in the patch can be used only for demonstration
of the restored performance for the reported query. The second test case is
needed to demonstrate the validity of the fix.
  • Loading branch information
igorbabaev authored and sanja-byelkin committed Oct 27, 2023
1 parent 11abc21 commit 954a6de
Show file tree
Hide file tree
Showing 4 changed files with 202 additions and 12 deletions.
126 changes: 126 additions & 0 deletions mysql-test/main/join_cache.result
Expand Up @@ -6270,5 +6270,131 @@ ERROR HY001: Could not create a join buffer. Please check and adjust the value o
SET JOIN_buffer_size=16384;
SELECT * FROM information_schema.statistics JOIN information_schema.COLUMNS USING (table_name,column_name);
#
# MDEV-32351: Join buffer used for outer join with ON condition
# depending only on outer tables
#
CREATE TABLE t1 (b int NOT NULL, PRIMARY KEY (b)) ENGINE=MYISAM;
INSERT INTO t1 select seq from seq_1_to_10000;
CREATE TABLE t2 (b int NOT NULL, d varchar(255), KEY (b)) ENGINE=MYISAM ;
INSERT INTO t2 VALUES (1,1),(2,2),(3,3);
CREATE TABLE t3 (c int NOT NULL, PRIMARY KEY (c)) ENGINE=MYISAM ;
INSERT INTO t3 select seq from seq_1_to_3000;
CREATE TABLE t4 (c int NOT NULL, PRIMARY KEY (c)) ENGINE=MYISAM;
INSERT INTO t4 select seq from seq_1_to_3000;
ANALYZE TABLE t1,t2,t3,t4;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
test.t2 analyze status Engine-independent statistics collected
test.t2 analyze status OK
test.t3 analyze status Engine-independent statistics collected
test.t3 analyze status OK
test.t4 analyze status Engine-independent statistics collected
test.t4 analyze status OK
set join_cache_level=0;
EXPLAIN SELECT COUNT(*)
FROM t1
LEFT JOIN t2 ON t1.b = t2.b
LEFT JOIN t3 ON t2.d = t3.c
LEFT JOIN t4 ON t3.c=1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index NULL PRIMARY 4 NULL 10000 Using index
1 SIMPLE t2 ref b b 4 test.t1.b 1
1 SIMPLE t3 eq_ref PRIMARY PRIMARY 4 test.t2.d 1 Using where; Using index
1 SIMPLE t4 index NULL PRIMARY 4 NULL 3000 Using where; Using index
SELECT COUNT(*)
FROM t1
LEFT JOIN t2 ON t1.b = t2.b
LEFT JOIN t3 ON t2.d = t3.c
LEFT JOIN t4 ON t3.c=1;
COUNT(*)
12999
set join_cache_level=default;
EXPLAIN SELECT COUNT(*)
FROM t1
LEFT JOIN t2 ON t1.b = t2.b
LEFT JOIN t3 ON t2.d = t3.c
LEFT JOIN t4 ON t3.c=1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index NULL PRIMARY 4 NULL 10000 Using index
1 SIMPLE t2 ref b b 4 test.t1.b 1
1 SIMPLE t3 eq_ref PRIMARY PRIMARY 4 test.t2.d 1 Using where; Using index
1 SIMPLE t4 index NULL PRIMARY 4 NULL 3000 Using where; Using index; Using join buffer (flat, BNL join)
SELECT COUNT(*)
FROM t1
LEFT JOIN t2 ON t1.b = t2.b
LEFT JOIN t3 ON t2.d = t3.c
LEFT JOIN t4 ON t3.c=1;
COUNT(*)
12999
DROP TABLE t1,t2,t3,t4;
CREATE TABLE t1 (b int NOT NULL, PRIMARY KEY (b));
INSERT INTO t1 select seq from seq_1_to_10;
CREATE TABLE t2 (b int NOT NULL, d varchar(255), KEY (b)) ;
INSERT INTO t2 VALUES (1,1),(2,2),(3,3);
CREATE TABLE t3 (c int NOT NULL, PRIMARY KEY (c)) ;
INSERT INTO t3 select seq from seq_1_to_3;
CREATE TABLE t4 (c int NOT NULL, PRIMARY KEY (c)) ;
INSERT INTO t4 select seq from seq_1_to_3;
set join_cache_level=0;
EXPLAIN SELECT *
FROM t1
LEFT JOIN t2 ON t1.b = t2.b
LEFT JOIN t3 ON t2.d = t3.c
LEFT JOIN t4 ON t3.c=1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index NULL PRIMARY 4 NULL 10 Using index
1 SIMPLE t2 ALL b NULL NULL NULL 3 Using where
1 SIMPLE t3 eq_ref PRIMARY PRIMARY 4 test.t2.d 1 Using where; Using index
1 SIMPLE t4 index NULL PRIMARY 4 NULL 3 Using where; Using index
SELECT *
FROM t1
LEFT JOIN t2 ON t1.b = t2.b
LEFT JOIN t3 ON t2.d = t3.c
LEFT JOIN t4 ON t3.c=1;
b b d c c
1 1 1 1 1
1 1 1 1 2
1 1 1 1 3
2 2 2 2 NULL
3 3 3 3 NULL
4 NULL NULL NULL NULL
5 NULL NULL NULL NULL
6 NULL NULL NULL NULL
7 NULL NULL NULL NULL
8 NULL NULL NULL NULL
9 NULL NULL NULL NULL
10 NULL NULL NULL NULL
set join_cache_level=default;
EXPLAIN SELECT *
FROM t1
LEFT JOIN t2 ON t1.b = t2.b
LEFT JOIN t3 ON t2.d = t3.c
LEFT JOIN t4 ON t3.c=1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index NULL PRIMARY 4 NULL 10 Using index
1 SIMPLE t2 ALL b NULL NULL NULL 3 Using where; Using join buffer (flat, BNL join)
1 SIMPLE t3 eq_ref PRIMARY PRIMARY 4 test.t2.d 1 Using where; Using index
1 SIMPLE t4 index NULL PRIMARY 4 NULL 3 Using where; Using index; Using join buffer (flat, BNL join)
SELECT *
FROM t1
LEFT JOIN t2 ON t1.b = t2.b
LEFT JOIN t3 ON t2.d = t3.c
LEFT JOIN t4 ON t3.c=1;
b b d c c
1 1 1 1 1
1 1 1 1 2
1 1 1 1 3
2 2 2 2 NULL
3 3 3 3 NULL
4 NULL NULL NULL NULL
5 NULL NULL NULL NULL
6 NULL NULL NULL NULL
7 NULL NULL NULL NULL
8 NULL NULL NULL NULL
9 NULL NULL NULL NULL
10 NULL NULL NULL NULL
DROP TABLE t1,t2,t3,t4;
#
# End of 10.4 tests
#
58 changes: 58 additions & 0 deletions mysql-test/main/join_cache.test
Expand Up @@ -4247,6 +4247,64 @@ SET JOIN_buffer_size=16384;
SELECT * FROM information_schema.statistics JOIN information_schema.COLUMNS USING (table_name,column_name);
--enable_result_log

--echo #
--echo # MDEV-32351: Join buffer used for outer join with ON condition
--echo # depending only on outer tables
--echo #

--source include/have_sequence.inc

CREATE TABLE t1 (b int NOT NULL, PRIMARY KEY (b)) ENGINE=MYISAM;
INSERT INTO t1 select seq from seq_1_to_10000;
CREATE TABLE t2 (b int NOT NULL, d varchar(255), KEY (b)) ENGINE=MYISAM ;
INSERT INTO t2 VALUES (1,1),(2,2),(3,3);
CREATE TABLE t3 (c int NOT NULL, PRIMARY KEY (c)) ENGINE=MYISAM ;
INSERT INTO t3 select seq from seq_1_to_3000;
CREATE TABLE t4 (c int NOT NULL, PRIMARY KEY (c)) ENGINE=MYISAM;
INSERT INTO t4 select seq from seq_1_to_3000;
ANALYZE TABLE t1,t2,t3,t4;

let $q1=
SELECT COUNT(*)
FROM t1
LEFT JOIN t2 ON t1.b = t2.b
LEFT JOIN t3 ON t2.d = t3.c
LEFT JOIN t4 ON t3.c=1;

set join_cache_level=0;
eval EXPLAIN $q1;
eval $q1;
set join_cache_level=default;
eval EXPLAIN $q1;
eval $q1;

DROP TABLE t1,t2,t3,t4;

CREATE TABLE t1 (b int NOT NULL, PRIMARY KEY (b));
INSERT INTO t1 select seq from seq_1_to_10;
CREATE TABLE t2 (b int NOT NULL, d varchar(255), KEY (b)) ;
INSERT INTO t2 VALUES (1,1),(2,2),(3,3);
CREATE TABLE t3 (c int NOT NULL, PRIMARY KEY (c)) ;
INSERT INTO t3 select seq from seq_1_to_3;
CREATE TABLE t4 (c int NOT NULL, PRIMARY KEY (c)) ;
INSERT INTO t4 select seq from seq_1_to_3;

let $q2=
SELECT *
FROM t1
LEFT JOIN t2 ON t1.b = t2.b
LEFT JOIN t3 ON t2.d = t3.c
LEFT JOIN t4 ON t3.c=1;

set join_cache_level=0;
eval EXPLAIN $q2;
eval $q2;
set join_cache_level=default;
eval EXPLAIN $q2;
eval $q2;

DROP TABLE t1,t2,t3,t4;

--echo #
--echo # End of 10.4 tests
--echo #
26 changes: 14 additions & 12 deletions sql/sql_join_cache.cc
Expand Up @@ -2054,10 +2054,11 @@ bool JOIN_CACHE::skip_if_matched()
- In the case of a semi-nest the match flag may be in two states
{MATCH_NOT_FOUND, MATCH_FOUND}. The record is skipped if the flag is set
to MATCH_FOUND.
- In the case of a outer join nest when not_exists optimization is applied
the match may be in three states {MATCH_NOT_FOUND, MATCH_IMPOSSIBLE,
MATCH_FOUND. The record is skipped if the flag is set to MATCH_FOUND or
to MATCH_IMPOSSIBLE.
- In the case of an outer join the match may be in three states
{MATCH_NOT_FOUND, MATCH_IMPOSSIBLE, MATCH_FOUND}.
If not_exists optimization is applied the record is skipped when
the flag is set to MATCH_FOUND or to MATCH_IMPOSSIBLE. Otherwise
the record is skipped only when the flag is set to MATCH_IMPOSSIBLE.
If the record is skipped the value of 'pos' is set to point to the position
right after the record.
Expand All @@ -2080,13 +2081,13 @@ bool JOIN_CACHE::skip_if_not_needed_match()
if (prev_cache)
offset+= prev_cache->get_size_of_rec_offset();

if (!join_tab->check_only_first_match())
return FALSE;

match_fl= get_match_flag_by_pos(pos+offset);
skip= join_tab->first_sj_inner_tab ?
match_fl == MATCH_FOUND : // the case of semi-join
match_fl != MATCH_NOT_FOUND; // the case of outer-join
match_fl == MATCH_FOUND : // the case of semi-join
not_exists_opt_is_applicable &&
join_tab->table->reginfo.not_exists_optimize ?
match_fl != MATCH_NOT_FOUND : // the case of not exist opt
match_fl == MATCH_IMPOSSIBLE;

if (skip)
{
Expand Down Expand Up @@ -2383,7 +2384,7 @@ enum_nested_loop_state JOIN_CACHE::join_matching_records(bool skip_last)
as candidates for matches.
*/

bool not_exists_opt_is_applicable= true;
not_exists_opt_is_applicable= true;
if (check_only_first_match && join_tab->first_inner)
{
/*
Expand All @@ -2408,8 +2409,9 @@ enum_nested_loop_state JOIN_CACHE::join_matching_records(bool skip_last)
}
}

if (!check_only_first_match ||
(join_tab->first_inner && !not_exists_opt_is_applicable) ||
if ((!join_tab->on_precond &&
(!check_only_first_match ||
(join_tab->first_inner && !not_exists_opt_is_applicable))) ||
!skip_next_candidate_for_match(rec_ptr))
{
read_next_candidate_for_match(rec_ptr);
Expand Down
4 changes: 4 additions & 0 deletions sql/sql_join_cache.h
Expand Up @@ -340,6 +340,9 @@ class JOIN_CACHE :public Sql_alloc
/* The length of an embedded key value */
uint emb_key_length;

/* This flag is used only when 'not exists' optimization can be applied */
bool not_exists_opt_is_applicable;

/*
This object provides the methods to iterate over records of
the joined table join_tab when looking for join matches between
Expand Down Expand Up @@ -536,6 +539,7 @@ class JOIN_CACHE :public Sql_alloc
prev_cache= next_cache= 0;
buff= 0;
min_buff_size= max_buff_size= 0; // Caches
not_exists_opt_is_applicable= false;
}

/*
Expand Down

0 comments on commit 954a6de

Please sign in to comment.