Skip to content

Commit 90780bb

Browse files
committed
MDEV-21104 Wrong result (extra rows and wrong values) with incremental BNLH
This bug could affect multi-way join queries with embedded outer joins that contained a conjunctive IS NULL predicate over a non-nullable column from inner table of an outer join. The predicate could occur in WHERE condition or in ON condition. Due to this bug a wrong result set could be returned by the query. The bug manifested itself only when join buffers were employed for join operations. The problem appeared because - a bug in the function JOIN_CACHE::get_match_flag_by_pos that not always returned proper match flags for embedding outer joins stored together with table rows put a join buffer. - bug in the function JOIN_CACHE::join_matching_records that not always correctly determined that a row from the buffer could be skipped due to applied 'not_exists' optimization. Example: SELECT * FROM t1 LEFT JOIN ((t2 LEFT JOIN t3 ON c = d) JOIN t4) ON b = e WHERE e IS NULL; The patch introduces a new function that finds the match flag for a record from join buffer specifying the buffer where this flag has to be found. The function is called JOIN_CACHE::get_match_flag_by_pos_from_join_buffer(). Now this function rather than JOIN_CACHE::get_match_flag_by_pos() is used in JOIN_CACHE::skip_if_matched() to check whether a record from the join buffer must be ignored when extending the record by null complements. Also the code of the function JOIN_CACHE::skip_if_not_needed_match() has been changed. The function checks whether a record from the join buffer still may produce some useful extensions. Also some clarifying comments has been added. Approved by monty@mariadb.com.
1 parent 1af8558 commit 90780bb

File tree

4 files changed

+177
-11
lines changed

4 files changed

+177
-11
lines changed

mysql-test/r/join_cache.result

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6054,4 +6054,57 @@ select f2 from t2,t1 where f2 = 0;
60546054
f2
60556055
drop table t1, t2;
60566056
set join_buffer_size=@save_join_buffer_size;
6057+
#
6058+
# MDEV-21104: BNLH used for multi-join query with embedded outer join
6059+
# and possible 'not exists' optimization
6060+
#
6061+
set join_cache_level=4;
6062+
CREATE TABLE t1 (a int) ENGINE=MyISAM;
6063+
INSERT INTO t1 VALUES (1),(2);
6064+
CREATE TABLE t2 (b int, c int) ENGINE=MyISAM;
6065+
INSERT INTO t2 VALUES (1,2),(2,4);
6066+
CREATE TABLE t3 (d int, KEY(d)) ENGINE=MyISAM;
6067+
INSERT INTO t3 VALUES (1),(2);
6068+
CREATE TABLE t4 (e int primary key) ENGINE=MyISAM;
6069+
INSERT INTO t4 VALUES (1),(2);
6070+
ANALYZE TABLE t1,t2,t3,t4;
6071+
Table Op Msg_type Msg_text
6072+
test.t1 analyze status OK
6073+
test.t2 analyze status OK
6074+
test.t3 analyze status OK
6075+
test.t4 analyze status OK
6076+
SELECT * FROM t2 LEFT JOIN t3 ON c = d;
6077+
b c d
6078+
1 2 2
6079+
2 4 NULL
6080+
SELECT * FROM (t2 LEFT JOIN t3 ON c = d ) JOIN t4;
6081+
b c d e
6082+
1 2 2 1
6083+
2 4 NULL 1
6084+
1 2 2 2
6085+
2 4 NULL 2
6086+
EXPLAIN SELECT * FROM t1 LEFT JOIN ( ( t2 LEFT JOIN t3 ON c = d ) JOIN t4 ) ON b = e;
6087+
id select_type table type possible_keys key key_len ref rows Extra
6088+
1 SIMPLE t1 ALL NULL NULL NULL NULL 2
6089+
1 SIMPLE t2 ALL NULL NULL NULL NULL 2 Using where; Using join buffer (flat, BNL join)
6090+
1 SIMPLE t3 hash_index d #hash#d:d 5:5 test.t2.c 2 Using where; Using index; Using join buffer (incremental, BNLH join)
6091+
1 SIMPLE t4 hash_index PRIMARY #hash#PRIMARY:PRIMARY 4:4 test.t2.b 2 Using index; Using join buffer (incremental, BNLH join)
6092+
SELECT * FROM t1 LEFT JOIN ( ( t2 LEFT JOIN t3 ON c = d ) JOIN t4 ) ON b = e;
6093+
a b c d e
6094+
1 1 2 2 1
6095+
2 1 2 2 1
6096+
1 2 4 NULL 2
6097+
2 2 4 NULL 2
6098+
EXPLAIN SELECT * FROM t1 LEFT JOIN ( ( t2 LEFT JOIN t3 ON c = d ) JOIN t4 ) ON b = e
6099+
WHERE e IS NULL;
6100+
id select_type table type possible_keys key key_len ref rows Extra
6101+
1 SIMPLE t1 ALL NULL NULL NULL NULL 2
6102+
1 SIMPLE t2 ALL NULL NULL NULL NULL 2 Using where; Using join buffer (flat, BNL join)
6103+
1 SIMPLE t3 hash_index d #hash#d:d 5:5 test.t2.c 2 Using where; Using index; Using join buffer (incremental, BNLH join)
6104+
1 SIMPLE t4 hash_index PRIMARY #hash#PRIMARY:PRIMARY 4:4 test.t2.b 2 Using where; Using index; Not exists; Using join buffer (incremental, BNLH join)
6105+
SELECT * FROM t1 LEFT JOIN ( ( t2 LEFT JOIN t3 ON c = d ) JOIN t4 ) ON b = e
6106+
WHERE e IS NULL;
6107+
a b c d e
6108+
DROP TABLE t1,t2,t3,t4;
6109+
set join_cache_level=@save_join_cache_level;
60576110
set @@optimizer_switch=@save_optimizer_switch;

mysql-test/t/join_cache.test

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4014,5 +4014,41 @@ select f2 from t2,t1 where f2 = 0;
40144014
drop table t1, t2;
40154015
set join_buffer_size=@save_join_buffer_size;
40164016

4017+
4018+
--echo #
4019+
--echo # MDEV-21104: BNLH used for multi-join query with embedded outer join
4020+
--echo # and possible 'not exists' optimization
4021+
--echo #
4022+
4023+
set join_cache_level=4;
4024+
4025+
CREATE TABLE t1 (a int) ENGINE=MyISAM;
4026+
INSERT INTO t1 VALUES (1),(2);
4027+
CREATE TABLE t2 (b int, c int) ENGINE=MyISAM;
4028+
INSERT INTO t2 VALUES (1,2),(2,4);
4029+
CREATE TABLE t3 (d int, KEY(d)) ENGINE=MyISAM;
4030+
INSERT INTO t3 VALUES (1),(2);
4031+
CREATE TABLE t4 (e int primary key) ENGINE=MyISAM;
4032+
INSERT INTO t4 VALUES (1),(2);
4033+
ANALYZE TABLE t1,t2,t3,t4;
4034+
4035+
SELECT * FROM t2 LEFT JOIN t3 ON c = d;
4036+
SELECT * FROM (t2 LEFT JOIN t3 ON c = d ) JOIN t4;
4037+
4038+
let $q1=
4039+
SELECT * FROM t1 LEFT JOIN ( ( t2 LEFT JOIN t3 ON c = d ) JOIN t4 ) ON b = e;
4040+
eval EXPLAIN $q1;
4041+
eval $q1;
4042+
4043+
let $q2=
4044+
SELECT * FROM t1 LEFT JOIN ( ( t2 LEFT JOIN t3 ON c = d ) JOIN t4 ) ON b = e
4045+
WHERE e IS NULL;
4046+
eval EXPLAIN $q2;
4047+
eval $q2;
4048+
4049+
DROP TABLE t1,t2,t3,t4;
4050+
4051+
set join_cache_level=@save_join_cache_level;
4052+
40174053
# The following command must be the last one in the file
40184054
set @@optimizer_switch=@save_optimizer_switch;

sql/sql_join_cache.cc

Lines changed: 78 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1649,7 +1649,7 @@ void JOIN_CACHE::get_record_by_pos(uchar *rec_ptr)
16491649
}
16501650

16511651

1652-
/*
1652+
/*
16531653
Get the match flag from the referenced record: the default implementation
16541654
16551655
SYNOPSIS
@@ -1661,6 +1661,7 @@ void JOIN_CACHE::get_record_by_pos(uchar *rec_ptr)
16611661
get the match flag for the record pointed by the reference at the position
16621662
rec_ptr. If the match flag is placed in one of the previous buffers the
16631663
function first reaches the linked record fields in this buffer.
1664+
The function returns the value of the first encountered match flag.
16641665
16651666
RETURN VALUE
16661667
match flag for the record at the position rec_ptr
@@ -1685,6 +1686,39 @@ enum JOIN_CACHE::Match_flag JOIN_CACHE::get_match_flag_by_pos(uchar *rec_ptr)
16851686

16861687

16871688
/*
1689+
Get the match flag for the referenced record from specified join buffer
1690+
1691+
SYNOPSIS
1692+
get_match_flag_by_pos_from_join_buffer()
1693+
rec_ptr position of the first field of the record in the join buffer
1694+
tab join table with join buffer where to look for the match flag
1695+
1696+
DESCRIPTION
1697+
This default implementation of the get_match_flag_by_pos_from_join_buffer
1698+
method gets the match flag for the record pointed by the reference at the
1699+
position rec_ptr from the join buffer attached to the join table tab.
1700+
1701+
RETURN VALUE
1702+
match flag for the record at the position rec_ptr from the join
1703+
buffer attached to the table tab.
1704+
*/
1705+
1706+
enum JOIN_CACHE::Match_flag
1707+
JOIN_CACHE::get_match_flag_by_pos_from_join_buffer(uchar *rec_ptr,
1708+
JOIN_TAB *tab)
1709+
{
1710+
DBUG_ASSERT(tab->cache && tab->cache->with_match_flag);
1711+
for (JOIN_CACHE *cache= this; ; )
1712+
{
1713+
if (cache->join_tab == tab)
1714+
return (enum Match_flag) rec_ptr[0];
1715+
cache= cache->prev_cache;
1716+
rec_ptr= cache->get_rec_ref(rec_ptr);
1717+
}
1718+
}
1719+
1720+
1721+
/*
16881722
Calculate the increment of the auxiliary buffer for a record write
16891723
16901724
SYNOPSIS
@@ -1954,6 +1988,10 @@ bool JOIN_CACHE::read_referenced_field(CACHE_FIELD *copy,
19541988
If the record is skipped the value of 'pos' is set to point to the position
19551989
right after the record.
19561990
1991+
NOTE
1992+
Currently this function is called only when generating null complemented
1993+
records for outer joins (=> only when join_tab->first_unmatched != NULL).
1994+
19571995
RETURN VALUE
19581996
TRUE the match flag is set to MATCH_FOUND and the record has been skipped
19591997
FALSE otherwise
@@ -1966,7 +2004,9 @@ bool JOIN_CACHE::skip_if_matched()
19662004
if (prev_cache)
19672005
offset+= prev_cache->get_size_of_rec_offset();
19682006
/* Check whether the match flag is MATCH_FOUND */
1969-
if (get_match_flag_by_pos(pos+offset) == MATCH_FOUND)
2007+
if (get_match_flag_by_pos_from_join_buffer(pos+offset,
2008+
join_tab->first_unmatched) ==
2009+
MATCH_FOUND)
19702010
{
19712011
pos+= size_of_rec_len + get_rec_length(pos);
19722012
return TRUE;
@@ -1983,13 +2023,23 @@ bool JOIN_CACHE::skip_if_matched()
19832023
19842024
DESCRIPTION
19852025
This default implementation of the virtual function skip_if_not_needed_match
1986-
skips the next record from the join buffer if its match flag is not
1987-
MATCH_NOT_FOUND, and, either its value is MATCH_FOUND and join_tab is the
1988-
first inner table of an inner join, or, its value is MATCH_IMPOSSIBLE
1989-
and join_tab is the first inner table of an outer join.
2026+
skips the next record from the join when generating join extensions
2027+
for the records in the join buffer depending on the value of the match flag.
2028+
- In the case of a semi-nest the match flag may be in two states
2029+
{MATCH_NOT_FOUND, MATCH_FOUND}. The record is skipped if the flag is set
2030+
to MATCH_FOUND.
2031+
- In the case of a outer join nest when not_exists optimization is applied
2032+
the match may be in three states {MATCH_NOT_FOUND, MATCH_IMPOSSIBLE,
2033+
MATCH_FOUND. The record is skipped if the flag is set to MATCH_FOUND or
2034+
to MATCH_IMPOSSIBLE.
2035+
19902036
If the record is skipped the value of 'pos' is set to point to the position
19912037
right after the record.
19922038
2039+
NOTE
2040+
Currently the function is called only when generating non-null complemented
2041+
extensions for records in the join buffer.
2042+
19932043
RETURN VALUE
19942044
TRUE the record has to be skipped
19952045
FALSE otherwise
@@ -2000,11 +2050,19 @@ bool JOIN_CACHE::skip_if_not_needed_match()
20002050
DBUG_ASSERT(with_length);
20012051
enum Match_flag match_fl;
20022052
uint offset= size_of_rec_len;
2053+
bool skip= FALSE;
20032054
if (prev_cache)
20042055
offset+= prev_cache->get_size_of_rec_offset();
20052056

2006-
if ((match_fl= get_match_flag_by_pos(pos+offset)) != MATCH_NOT_FOUND &&
2007-
(join_tab->check_only_first_match() == (match_fl == MATCH_FOUND)) )
2057+
if (!join_tab->check_only_first_match())
2058+
return FALSE;
2059+
2060+
match_fl= get_match_flag_by_pos(pos+offset);
2061+
skip= join_tab->first_sj_inner_tab ?
2062+
match_fl == MATCH_FOUND : // the case of semi-join
2063+
match_fl != MATCH_NOT_FOUND; // the case of outer-join
2064+
2065+
if (skip)
20082066
{
20092067
pos+= size_of_rec_len + get_rec_length(pos);
20102068
return TRUE;
@@ -2104,7 +2162,14 @@ enum_nested_loop_state JOIN_CACHE::join_records(bool skip_last)
21042162
goto finish;
21052163
}
21062164
join_tab->not_null_compl= FALSE;
2107-
/* Prepare for generation of null complementing extensions */
2165+
/*
2166+
Prepare for generation of null complementing extensions.
2167+
For all inner tables of the outer join operation for which
2168+
regular matches have been just found the field 'first_unmatched'
2169+
is set to point the the first inner table. After all null
2170+
complement rows are generated for this outer join this field
2171+
is set back to NULL.
2172+
*/
21082173
for (tab= join_tab->first_inner; tab <= join_tab->last_inner; tab++)
21092174
tab->first_unmatched= join_tab->first_inner;
21102175
}
@@ -2221,7 +2286,10 @@ enum_nested_loop_state JOIN_CACHE::join_matching_records(bool skip_last)
22212286
int error;
22222287
enum_nested_loop_state rc= NESTED_LOOP_OK;
22232288
join_tab->table->null_row= 0;
2224-
bool check_only_first_match= join_tab->check_only_first_match();
2289+
bool check_only_first_match=
2290+
join_tab->check_only_first_match() &&
2291+
(!join_tab->first_inner || // semi-join case
2292+
join_tab->first_inner == join_tab->first_unmatched); // outer join case
22252293
bool outer_join_first_inner= join_tab->is_first_inner_for_outer_join();
22262294
DBUG_ENTER("JOIN_CACHE::join_matching_records");
22272295

sql/sql_join_cache.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,9 @@ class JOIN_CACHE :public Sql_alloc
206206

207207
/*
208208
This flag indicates that records written into the join buffer contain
209-
a match flag field. The flag must be set by the init method.
209+
a match flag field. The flag must be set by the init method.
210+
Currently any implementation of the virtial init method calls
211+
the function JOIN_CACHE::calc_record_fields() to set this flag.
210212
*/
211213
bool with_match_flag;
212214
/*
@@ -646,6 +648,13 @@ class JOIN_CACHE :public Sql_alloc
646648
/* Shall return the value of the match flag for the positioned record */
647649
virtual enum Match_flag get_match_flag_by_pos(uchar *rec_ptr);
648650

651+
/*
652+
Shall return the value of the match flag for the positioned record
653+
from the join buffer attached to the specified table
654+
*/
655+
virtual enum Match_flag
656+
get_match_flag_by_pos_from_join_buffer(uchar *rec_ptr, JOIN_TAB *tab);
657+
649658
/* Shall return the position of the current record */
650659
virtual uchar *get_curr_rec() { return curr_rec_pos; }
651660

0 commit comments

Comments
 (0)