diff --git a/mysql-test/main/range_vs_index_merge.result b/mysql-test/main/range_vs_index_merge.result index 59c093ff010d9..1e969112db0d2 100644 --- a/mysql-test/main/range_vs_index_merge.result +++ b/mysql-test/main/range_vs_index_merge.result @@ -1320,7 +1320,7 @@ WHERE ((Name > 'Ca' AND Name < 'Cf') OR (Country > 'E' AND Country < 'H')) AND (Population >= 100000 AND Population < 120000) ORDER BY Population LIMIT 5; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE City index_merge Country,Name,Population Name,Country 35,3 NULL # Using sort_union(Name,Country); Using where; Using filesort +1 SIMPLE City range Country,Name,Population Population 4 NULL # Using where FLUSH STATUS; SELECT * FROM City WHERE ((Name > 'Ca' AND Name < 'Cf') OR (Country > 'E' AND Country < 'H')) @@ -1335,12 +1335,12 @@ ID Name Country Population SHOW STATUS LIKE 'Handler_read_%'; Variable_name Value Handler_read_first 0 -Handler_read_key 2 +Handler_read_key 1 Handler_read_last 0 -Handler_read_next 385 +Handler_read_next 59 Handler_read_prev 0 Handler_read_retry 0 -Handler_read_rnd 377 +Handler_read_rnd 0 Handler_read_rnd_deleted 0 Handler_read_rnd_next 0 set @tmp_mdev585=@@optimizer_use_condition_selectivity; diff --git a/mysql-test/main/range_vs_index_merge_innodb.result b/mysql-test/main/range_vs_index_merge_innodb.result index 2ecd4d4abaa15..9e85973474cce 100644 --- a/mysql-test/main/range_vs_index_merge_innodb.result +++ b/mysql-test/main/range_vs_index_merge_innodb.result @@ -1344,6 +1344,38 @@ Handler_read_retry 0 Handler_read_rnd 0 Handler_read_rnd_deleted 0 Handler_read_rnd_next 0 +set @tmp_mdev585=@@optimizer_use_condition_selectivity; +set optimizer_use_condition_selectivity=1; +EXPLAIN +SELECT * FROM City +WHERE ((Name > 'Ca' AND Name < 'Cf') OR (Country > 'E' AND Country < 'H')) +AND (Population >= 100000 AND Population < 120000) +ORDER BY Population LIMIT 5; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE City range Country,Name,Population Population 4 NULL # Using where +FLUSH STATUS; +SELECT * FROM City +WHERE ((Name > 'Ca' AND Name < 'Cf') OR (Country > 'E' AND Country < 'H')) +AND (Population >= 100000 AND Population < 120000) +ORDER BY Population LIMIT 5; +ID Name Country Population +519 Worthing GBR 100000 +638 al-Arish EGY 100447 +518 Basildon GBR 100924 +707 Marbella ESP 101144 +3792 Tartu EST 101246 +SHOW STATUS LIKE 'Handler_read_%'; +Variable_name Value +Handler_read_first 0 +Handler_read_key 1 +Handler_read_last 0 +Handler_read_next 59 +Handler_read_prev 0 +Handler_read_retry 0 +Handler_read_rnd 0 +Handler_read_rnd_deleted 0 +Handler_read_rnd_next 0 +set optimizer_use_condition_selectivity=@tmp_mdev585; set optimizer_switch='index_merge=off'; EXPLAIN SELECT * FROM City diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 4da7cd1a0048e..d547ff311444b 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -26685,16 +26685,22 @@ void JOIN::cache_const_exprs() /* - Get a cost of reading rows_limit rows through index keynr. + Get the cost of using index keynr to read #LIMIT matching rows @detail - If there is a quick select, we try to use it. - if there is a ref(const) access, we try to use it, too. - quick and ref(const) use different cost formulas, so if both are possible we should make a cost-based choice. - + + rows_limit is the number of rows we would need to read when using a full + index scan. This is generally higher than the N from "LIMIT N" clause, + because there's a WHERE condition (a part of which is used to construct a + range access we are considering using here) + @param tab JOIN_TAB with table access (is NULL for single-table UPDATE/DELETE) + @param rows_limit See explanation above @param read_time OUT Cost of reading using quick or ref(const) access. @@ -26707,6 +26713,7 @@ void JOIN::cache_const_exprs() static bool get_range_limit_read_cost(const JOIN_TAB *tab, const TABLE *table, + ha_rows table_records, uint keynr, ha_rows rows_limit, double *read_time) @@ -26773,8 +26780,32 @@ static bool get_range_limit_read_cost(const JOIN_TAB *tab, } } } + + /* + Consider an example: + + SELECT * + FROM t1 + WHERE key1 BETWEEN 10 AND 20 AND col2='foo' + ORDER BY key1 LIMIT 10 + + If we were using a full index scan on key1, we would need to read this + many rows to get 10 matches: + + 10 / selectivity(key1 BETWEEN 10 AND 20 AND col2='foo') + + This is the number we get in rows_limit. + But we intend to use range access on key1. The rows returned by quick + select will satisfy the range part of the condition, + "key1 BETWEEN 10 and 20". We will still need to filter them with + the remainder condition, (col2='foo'). + + The selectivity of the range access is (best_rows/table_records). We need + to discount it from the rows_limit: + */ + double rows_limit_for_quick= rows_limit * (best_rows / table_records); - if (best_rows > rows_limit) + if (best_rows > rows_limit_for_quick) { /* LIMIT clause specifies that we will need to read fewer records than @@ -26783,7 +26814,7 @@ static bool get_range_limit_read_cost(const JOIN_TAB *tab, only need 1/3rd of records, it will cost us 1/3rd of quick select's read time) */ - best_cost *= rows_limit / best_rows; + best_cost *= rows_limit_for_quick / best_rows; } *read_time= best_cost; res= true; @@ -27076,8 +27107,8 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, index_scan_time= select_limit/rec_per_key * MY_MIN(rec_per_key, table->file->scan_time()); double range_scan_time; - if (get_range_limit_read_cost(tab, table, nr, select_limit, - &range_scan_time)) + if (get_range_limit_read_cost(tab, table, table_records, nr, + select_limit, &range_scan_time)) { if (range_scan_time < index_scan_time) index_scan_time= range_scan_time;