Skip to content

Commit

Permalink
MDEV-31496: Make optimizer handle UCASE(varchar_col)=...
Browse files Browse the repository at this point in the history
(Review input addressed)
(Added handling of UPDATE/DELETE and partitioning w/o index)

If the properties of the used collation allow, do the following
equivalent rewrites:

1. UPPER(key_col)=expr  ->  key_col=expr
   expr=UPPER(key_col)  ->  expr=key_col
   (also rewrite both sides of the equality at the same time)

2. UPPER(key_col) IN (constant-list)  -> key_col IN (constant-list)

- Mark utf8mb{3,4}_general_ci as collations that allow this.
- Add optimizer_switch='sargable_casefold=ON' to control this.
  (ON by default in this patch)
- Cover the rewrite in Optimizer Trace, rewrite name is
  "sargable_casefold_removal".
  • Loading branch information
spetrunia committed Sep 12, 2023
1 parent 8ad1e26 commit e987b93
Show file tree
Hide file tree
Showing 25 changed files with 858 additions and 24 deletions.
1 change: 1 addition & 0 deletions include/m_ctype.h
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ extern MY_UNI_CTYPE my_uni_ctype[256];
#define MY_CS_NON1TO1 0x40000 /* Has a complex mapping from characters
to weights, e.g. contractions, expansions,
ignorable characters */
#define MY_CS_UPPER_EQUAL_AS_EQUAL 0x80000 /* (UPPER(x)=UPPER(y)) <=> (x=y)*/
#define MY_CHARSET_UNDEFINED 0

/* Character repertoire flags */
Expand Down
1 change: 1 addition & 0 deletions libmysqld/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ SET(SQL_EMBEDDED_SOURCES emb_qcache.cc libmysqld.c lib_sql.cc
../sql/mf_iocache.cc ../sql/my_decimal.cc
../sql/net_serv.cc ../sql/opt_range.cc
../sql/opt_rewrite_date_cmp.cc
../sql/opt_rewrite_remove_casefold.cc
../sql/opt_sum.cc
../sql/parse_file.cc ../sql/procedure.cc ../sql/protocol.cc
../sql/records.cc ../sql/repl_failsafe.cc ../sql/rpl_filter.cc
Expand Down
44 changes: 44 additions & 0 deletions mysql-test/include/sargable_casefold.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Check sargable_casefold rewrite for $collation

eval create table t1 (
col1 varchar(32),
col2 varchar(32),
col3 char(32),
col4 text,
key(col1),
key(col2),
key(col3),
key(col4(32))
) collate $collation;

insert into t1
select
concat('A-', seq),
concat('A-', seq),
concat('A-', seq),
concat('A-', seq)
from seq_1_to_100;

analyze table t1 persistent for all;

--echo # Basic examples. All should use ref(col1):
explain
select * from t1 where upper(col1)='A-3';
select * from t1 where upper(col1)='A-3';

explain
select * from t1 where ucase(col1)='a-3';
select * from t1 where ucase(col1)='a-3';

explain select * from t1 where 'abc'=upper(col1);
explain select * from t1 where 'xyz'=ucase(col1);

create view v1 as select * from t1;
explain select * from v1 where 'abc'=upper(col1);
drop view v1;

explain select * from t1 where upper(col3)='a-3';
explain select * from t1 where upper(col4)='a-3';

# DROP TABLE t1 is missing intentionally here.

4 changes: 2 additions & 2 deletions mysql-test/main/mysqld--help.result
Original file line number Diff line number Diff line change
Expand Up @@ -804,7 +804,7 @@ The following specify which files/extra groups are read (specified before remain
condition_pushdown_for_derived, split_materialized,
condition_pushdown_for_subquery, rowid_filter,
condition_pushdown_from_having, not_null_range_scan,
hash_join_cardinality
hash_join_cardinality, sargable_casefold
--optimizer-trace=name
Controls tracing of the Optimizer:
optimizer_trace=option=val[,option=val...], where option
Expand Down Expand Up @@ -1764,7 +1764,7 @@ optimizer-rowid-copy-cost 0.002653
optimizer-scan-setup-cost 10
optimizer-search-depth 62
optimizer-selectivity-sampling-limit 100
optimizer-switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,hash_join_cardinality=on
optimizer-switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,hash_join_cardinality=on,sargable_casefold=on
optimizer-trace
optimizer-trace-max-mem-size 1048576
optimizer-use-condition-selectivity 4
Expand Down
2 changes: 1 addition & 1 deletion mysql-test/main/mysqltest_tracking_info.result
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ SET @@session.session_track_system_variables='optimizer_switch';
set optimizer_switch='index_merge=off,index_merge_union=off,index_merge_sort_union=off,index_merge_intersection=off,index_merge_sort_intersection=on,engine_condition_pushdown=on,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=on,mrr_cost_based=on,mrr_sort_keys=on,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=on,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off';
-- Tracker : SESSION_TRACK_SYSTEM_VARIABLES
-- optimizer_switch
-- index_merge=off,index_merge_union=off,index_merge_sort_union=off,index_merge_intersection=off,index_merge_sort_intersection=on,engine_condition_pushdown=on,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=on,mrr_cost_based=on,mrr_sort_keys=on,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=on,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,hash_join_cardinality=on
-- index_merge=off,index_merge_union=off,index_merge_sort_union=off,index_merge_intersection=off,index_merge_sort_intersection=on,engine_condition_pushdown=on,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=on,mrr_cost_based=on,mrr_sort_keys=on,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=on,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,hash_join_cardinality=on,sargable_casefold=on

Warnings:
Warning 1681 'engine_condition_pushdown=on' is deprecated and will be removed in a future release
Expand Down
278 changes: 278 additions & 0 deletions mysql-test/main/sargable_casefold.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,278 @@
set
@tmp_switch_sarg_casefold=@@optimizer_switch,
optimizer_switch='sargable_casefold=on';
create table t1 (
col1 varchar(32),
col2 varchar(32),
col3 char(32),
col4 text,
key(col1),
key(col2),
key(col3),
key(col4(32))
) collate utf8mb3_general_ci;
insert into t1
select
concat('A-', seq),
concat('A-', seq),
concat('A-', seq),
concat('A-', seq)
from seq_1_to_100;
analyze table t1 persistent for all;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze Warning Engine-independent statistics are not collected for column 'col4'
test.t1 analyze status Table is already up to date
# Basic examples. All should use ref(col1):
explain
select * from t1 where upper(col1)='A-3';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref col1 col1 99 const 1 Using index condition
select * from t1 where upper(col1)='A-3';
col1 col2 col3 col4
A-3 A-3 A-3 A-3
explain
select * from t1 where ucase(col1)='a-3';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref col1 col1 99 const 1 Using index condition
select * from t1 where ucase(col1)='a-3';
col1 col2 col3 col4
A-3 A-3 A-3 A-3
explain select * from t1 where 'abc'=upper(col1);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref col1 col1 99 const 1 Using index condition
explain select * from t1 where 'xyz'=ucase(col1);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref col1 col1 99 const 1 Using index condition
create view v1 as select * from t1;
explain select * from v1 where 'abc'=upper(col1);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref col1 col1 99 const 1 Using index condition
drop view v1;
explain select * from t1 where upper(col3)='a-3';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref col3 col3 97 const 1 Using index condition
explain select * from t1 where upper(col4)='a-3';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref col4 col4 99 const 1 Using where
# must not be rewritten:
explain select * from t1 where ucase(col1 collate utf8mb3_bin)='a-3';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
# Will not do the rewrite due to collation mismatch:
explain select * from t1 where ucase(col1)=_utf8mb3'abc' COLLATE utf8mb3_bin;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
drop table t1;
create table t1 (
col1 varchar(32),
col2 varchar(32),
col3 char(32),
col4 text,
key(col1),
key(col2),
key(col3),
key(col4(32))
) collate utf8mb4_general_ci;
insert into t1
select
concat('A-', seq),
concat('A-', seq),
concat('A-', seq),
concat('A-', seq)
from seq_1_to_100;
analyze table t1 persistent for all;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze Warning Engine-independent statistics are not collected for column 'col4'
test.t1 analyze status Table is already up to date
# Basic examples. All should use ref(col1):
explain
select * from t1 where upper(col1)='A-3';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref col1 col1 131 const 1 Using index condition
select * from t1 where upper(col1)='A-3';
col1 col2 col3 col4
A-3 A-3 A-3 A-3
explain
select * from t1 where ucase(col1)='a-3';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref col1 col1 131 const 1 Using index condition
select * from t1 where ucase(col1)='a-3';
col1 col2 col3 col4
A-3 A-3 A-3 A-3
explain select * from t1 where 'abc'=upper(col1);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref col1 col1 131 const 1 Using index condition
explain select * from t1 where 'xyz'=ucase(col1);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref col1 col1 131 const 1 Using index condition
create view v1 as select * from t1;
explain select * from v1 where 'abc'=upper(col1);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref col1 col1 131 const 1 Using index condition
drop view v1;
explain select * from t1 where upper(col3)='a-3';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref col3 col3 129 const 1 Using index condition
explain select * from t1 where upper(col4)='a-3';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref col4 col4 131 const 1 Using where
# must not be rewritten:
explain select * from t1 where ucase(col1 collate utf8mb4_bin)='a-3';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
# Will not do the rewrite due to collation mismatch:
explain select * from t1 where ucase(col1)=_utf8mb4'abc' COLLATE utf8mb4_bin;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
#
# Check if optimizer_switch turns the rewrite off:
#
set
@save_os=@@optimizer_switch,
optimizer_switch='sargable_casefold=off';
explain select * from t1 where upper(col1)='A-3';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
explain select * from t1 where ucase(col1)='a-3';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
set optimizer_switch=@save_os;
# The following will not do the rewrite because the comparison
# is done as DOUBLEs. Come to think of it, it won't harm to do
# the rewrite but it is outside of the scope of this patch:
explain select * from t1 where ucase(col1)=123.456;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
select
coercibility(upper(col1))
from t1 limit 1;
coercibility(upper(col1))
2
select coercibility(_utf8mb3'abc' COLLATE utf8mb3_bin);
coercibility(_utf8mb3'abc' COLLATE utf8mb3_bin)
0
# This is transformed too even if it doesn't create any new
# [potential] access paths:
explain format=json select * from t1 where upper(col1)=upper(col2);
EXPLAIN
{
"query_block": {
"select_id": 1,
"cost": 0.0256761,
"nested_loop": [
{
"table": {
"table_name": "t1",
"access_type": "ALL",
"loops": 1,
"rows": 100,
"cost": 0.0256761,
"filtered": 100,
"attached_condition": "t1.col2 = t1.col1"
}
}
]
}
}
#
# Check if ref access works
#
create table t2 (
a varchar(32),
non_key varchar(32),
key(a)
) collate utf8mb4_general_ci;
insert into t2
select
concat('A-', seq),
concat('A-', seq)
from seq_1_to_10;
# Must use ref access for t1:
explain select * from t1, t2 where upper(t1.col1)= t2.non_key;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 10 Using where
1 SIMPLE t1 ref col1 col1 131 test.t2.non_key 1
create table t3 (
a varchar(32),
b varchar(32),
key(a),
key(b)
) collate utf8mb3_general_ci;
insert into t3 values ('abc','ABC'), ('xyz','XYZ');
explain extended
select a from t3 ignore index(a) where a=b and upper(b)='ABC';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t3 ref b b 99 const 1 100.00 Using index condition; Using where
Warnings:
Note 1003 select `test`.`t3`.`a` AS `a` from `test`.`t3` IGNORE INDEX (`a`) where `test`.`t3`.`a` = `test`.`t3`.`b` and `test`.`t3`.`b` = 'ABC'
#
# Check that rewrite isn't applied for non-applicable collations
#
create table t4 (
col1 varchar(32) collate utf8mb3_bin,
col2 varchar(32) collate utf8mb3_czech_ci,
col3 varchar(32) collate latin1_bin,
key(col1),
key(col2),
key(col3)
);
insert into t4
select
concat('A-', seq),
concat('A-', seq),
concat('A-', seq)
from seq_1_to_100;
analyze table t4 persistent for all;
Table Op Msg_type Msg_text
test.t4 analyze status Engine-independent statistics collected
test.t4 analyze status Table is already up to date
# None should use ref access:
explain select * from t4 where upper(col1)='A-3';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t4 ALL NULL NULL NULL NULL 100 Using where
explain select * from t4 where upper(col2)='a-3';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t4 ALL NULL NULL NULL NULL 100 Using where
explain select * from t4 where upper(col3)='a-3';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t4 ALL NULL NULL NULL NULL 100 Using where
#
# Check that rewrite works for UPPER(col) IN (const-list)
#
set
@tmp_ot= @@optimizer_trace,
optimizer_trace=1;
# must use range:
explain
select * from t1 where upper(col1) IN ('A-3','A-4','a-5');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range col1 col1 131 NULL 3 Using index condition
select * from t1 where upper(col1) IN ('A-3','A-4','a-5');
col1 col2 col3 col4
A-3 A-3 A-3 A-3
A-4 A-4 A-4 A-4
A-5 A-5 A-5 A-5
# Will not use the rewrite:
explain
select * from t1 where upper(col1) IN ('A-3','A-4',col2);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
#
# MDEV-31946: Optimizer handle UCASE(varchar_col)=... does not work for UPDATE/DELETE
#
explain delete from t1 where upper(col1)='A';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range col1 col1 131 NULL 1 Using where
explain delete from t1 where upper(col1) IN ('A','B');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range col1 col1 131 NULL 2 Using where
explain update t1 set col2='ABC' where upper(col1)='A';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range col1 col1 131 NULL 1 Using where
explain update t1 set col2='ABC' where upper(col1) IN ('A','B');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range col1 col1 131 NULL 2 Using where
drop table t1,t2,t3,t4;
set optimizer_switch=@tmp_switch_sarg_casefold;

0 comments on commit e987b93

Please sign in to comment.